From 7e1b14a41e7f79953e647846c505360a18b6fe8d Mon Sep 17 00:00:00 2001 From: pseusys Date: Sat, 18 Feb 2023 17:45:23 +0100 Subject: [PATCH] keeping up-to-date --- sources/download_manager.py | 158 ++++++++++++++++++++++++++---------- sources/loc.py | 41 +++++----- sources/main.py | 10 +-- 3 files changed, 143 insertions(+), 66 deletions(-) diff --git a/sources/download_manager.py b/sources/download_manager.py index b884e5c..6df7cc7 100644 --- a/sources/download_manager.py +++ b/sources/download_manager.py @@ -1,7 +1,7 @@ from hashlib import md5 from json import dumps from string import Template -from typing import Awaitable, Dict, Callable, Optional +from typing import Awaitable, Dict, Callable, Optional, List, Tuple from httpx import AsyncClient from yaml import safe_load @@ -12,7 +12,7 @@ GITHUB_API_QUERIES = { "repositories_contributed_to": """ { user(login: "$username") { - repositoriesContributedTo(last: 100, includeUserRepositories: true) { + repositoriesContributedTo(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, includeUserRepositories: true) { nodes { isFork name @@ -20,6 +20,10 @@ GITHUB_API_QUERIES = { login } } + pageInfo { + endCursor + hasNextPage + } } } }""", @@ -29,11 +33,13 @@ GITHUB_API_QUERIES = { defaultBranchRef { target { ... on Commit { - history(first: 100, author: { id: "$id" }) { - edges { - node { - committedDate - } + history($pagination, author: { id: "$id" }) { + nodes { + committedDate + } + pageInfo { + endCursor + hasNextPage } } } @@ -44,44 +50,57 @@ GITHUB_API_QUERIES = { "user_repository_list": """ { user(login: "$username") { - repositories(orderBy: {field: CREATED_AT, direction: ASC}, last: 100, affiliations: [OWNER, COLLABORATOR], isFork: false) { - edges { - node { - primaryLanguage { - name - } + repositories(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, affiliations: [OWNER, COLLABORATOR], isFork: false) { + nodes { + primaryLanguage { name - owner { - login - } } + name + owner { + login + } + } + pageInfo { + endCursor + hasNextPage } } } } """, - "repository_commit_list": """ + "repository_branches_list": """ { repository(owner: "$owner", name: "$name") { - refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, first: 10) { - edges { - node { - ... on Ref { - target { + refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, $pagination) { + nodes { + name + } + pageInfo { + endCursor + hasNextPage + } + } + } +} +""", + "repository_branch_commit_list": """ +{ + repository(owner: "$owner", name: "$name") { + ref(qualifiedName: "refs/heads/$branch") { + target { + ... on Commit { + history(author: { id: "$id" }, $pagination) { + nodes { ... on Commit { - history(first: 100, author: { id: "$id" }) { - edges { - node { - ... on Commit { - additions - deletions - committedDate - } - } - } - } + additions + deletions + committedDate } } + pageInfo { + endCursor + hasNextPage + } } } } @@ -176,13 +195,72 @@ class DownloadManager: """ return await DownloadManager._get_remote_resource(resource, safe_load) + @staticmethod + async def _fetch_graphql_query(query: str, **kwargs) -> Dict: + """ + Execute GitHub GraphQL API simple query. + :param query: Dynamic query identifier. + :param kwargs: Parameters for substitution of variables in dynamic query. + :return: Response JSON dictionary. + """ + res = await DownloadManager._client.post("https://api.github.com/graphql", json={ + "query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs) + }) + if res.status_code == 200: + return res.json() + else: + raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}") + + @staticmethod + def _find_pagination_and_data_list(response: Dict) -> Tuple[List, Dict]: + """ + Parses response as a paginated response. + NB! All paginated responses are expected to have the following structure: + { + ...: { + "nodes": [], + "pageInfo" : {} + } + } + Where `...` states for any number of dictionaries containing _one single key_ only. + If the structure of the response isn't met, an exception is thrown! + :param response: Response JSON dictionary. + :returns: Tuple of the acquired pagination data list ("nodes" key) and pagination info dict ("pageInfo" key). + """ + if "nodes" in response.keys() and "pageInfo" in response.keys(): + return response["nodes"], response["pageInfo"] + elif len(response) == 1: + return DownloadManager._find_pagination_and_data_list(response[list(response.keys())[0]]) + else: + raise RuntimeError(f"Received structure '{response}' isn't a paginated response!") + + @staticmethod + async def _fetch_graphql_paginated(query: str, **kwargs) -> Dict: + """ + Execute GitHub GraphQL API paginated query. + Queries 100 new results each time until no more results are left. + Merges result list into single query, clears pagination-related info. + :param query: Dynamic query identifier. + :param kwargs: Parameters for substitution of variables in dynamic query. + :return: Response JSON dictionary. + """ + initial_query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f"first: 100") + page_list, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response) + while page_info["hasNextPage"]: + query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f'first: 100, after: "{page_info["endCursor"]}"') + new_page_list, page_info = DownloadManager._find_pagination_and_data_list(query_response) + page_list += new_page_list + _, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response) + page_info.clear() + return initial_query_response + @staticmethod async def get_remote_graphql(query: str, **kwargs) -> Dict: """ Execute GitHub GraphQL API query. The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs. If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash. - NB! Caching is done before response parsing - to throw exception on accessing cached erroneous response. + Merges paginated sub-queries if pagination is required for the query. Parse and return response as JSON. :param query: Dynamic query identifier. :param kwargs: Parameters for substitution of variables in dynamic query. @@ -190,13 +268,11 @@ class DownloadManager: """ key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}" if key not in DownloadManager._REMOTE_RESOURCES_CACHE: - res = await DownloadManager._client.post("https://api.github.com/graphql", json={ - "query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs) - }) + if "$pagination" in GITHUB_API_QUERIES[query]: + res = await DownloadManager._fetch_graphql_paginated(query, **kwargs) + else: + res = await DownloadManager._fetch_graphql_query(query, **kwargs) DownloadManager._REMOTE_RESOURCES_CACHE[key] = res else: res = DownloadManager._REMOTE_RESOURCES_CACHE[key] - if res.status_code == 200: - return res.json() - else: - raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}") + return res diff --git a/sources/loc.py b/sources/loc.py index 7b5cfb1..eb5ff23 100644 --- a/sources/loc.py +++ b/sources/loc.py @@ -19,11 +19,11 @@ class LinesOfCode: async def calculateLoc(self): result = self.repositoryData yearly_data = {} - total = len(result['data']['user']['repositories']['edges']) - for ind, repo in enumerate(result['data']['user']['repositories']['edges']): - if repo['node']['name'] not in self.ignored_repos: - print(f"{ind}/{total}", "Retrieving repo:", repo['node']["owner"]["login"], repo['node']['name']) - await self.getCommitStat(repo['node'], yearly_data) + total = len(result['data']['user']['repositories']['nodes']) + for ind, repo in enumerate(result['data']['user']['repositories']['nodes']): + if repo['name'] not in self.ignored_repos: + print(f"{ind}/{total}", "Retrieving repo:", repo["owner"]["login"], repo['name']) + await self.getCommitStat(repo, yearly_data) await sleep(0.7) return yearly_data @@ -43,26 +43,27 @@ class LinesOfCode: return 4 async def getCommitStat(self, repoDetails, yearly_data): - commit_data = await DownloadManager.get_remote_graphql("repository_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], id=self.user.node_id) - - if commit_data["data"]["repository"] is None: + branch_data = await DownloadManager.get_remote_graphql("repository_branches_list", owner=repoDetails["owner"]["login"], name=repoDetails['name']) + if branch_data["data"]["repository"] is None: print("\tSkipping:", repoDetails['name']) return - for commit in [commit["node"] for branch in commit_data["data"]["repository"]["refs"]["edges"] for commit in branch["node"]["target"]["history"]["edges"]]: - date = re.search(r'\d+-\d+-\d+', commit["committedDate"]).group(0) - curr_year = datetime.datetime.fromisoformat(date).year - quarter = self.getQuarter(date) + for branch in branch_data["data"]["repository"]["refs"]["nodes"]: + commit_data = await DownloadManager.get_remote_graphql("repository_branch_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], branch=branch["name"], id=self.user.node_id) - if repoDetails['primaryLanguage'] is not None: - if curr_year not in yearly_data: - yearly_data[curr_year] = {} - if quarter not in yearly_data[curr_year]: - yearly_data[curr_year][quarter] = {} - if repoDetails['primaryLanguage']['name'] not in yearly_data[curr_year][quarter]: - yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] = 0 - yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] += (commit["additions"] - commit["deletions"]) + for commit in commit_data["data"]["repository"]["ref"]["target"]["history"]["nodes"]: + date = re.search(r'\d+-\d+-\d+', commit["committedDate"]).group(0) + curr_year = datetime.datetime.fromisoformat(date).year + quarter = self.getQuarter(date) + if repoDetails['primaryLanguage'] is not None: + if curr_year not in yearly_data: + yearly_data[curr_year] = {} + if quarter not in yearly_data[curr_year]: + yearly_data[curr_year][quarter] = {} + if repoDetails['primaryLanguage']['name'] not in yearly_data[curr_year][quarter]: + yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] = 0 + yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] += (commit["additions"] - commit["deletions"]) def pushChart(self): repo = self.g.get_repo(f"{self.user.login}/{self.user.login}") diff --git a/sources/main.py b/sources/main.py index 30b5b9d..fdde122 100644 --- a/sources/main.py +++ b/sources/main.py @@ -139,9 +139,9 @@ async def generate_commit_list(tz): if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None: continue - committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"] + committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"] for committedDate in committed_dates: - date = datetime.datetime.strptime(committedDate["node"]["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz)) + date = datetime.datetime.strptime(committedDate["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz)) hour = date.hour weekday = date.strftime('%A') if 6 <= hour < 12: @@ -266,10 +266,10 @@ async def get_waka_time_stats(): def generate_language_per_repo(result): language_count = {} total = 0 - for repo in result['data']['user']['repositories']['edges']: - if repo['node']['primaryLanguage'] is None: + for repo in result['data']['user']['repositories']['nodes']: + if repo['primaryLanguage'] is None: continue - language = repo['node']['primaryLanguage']['name'] + language = repo['primaryLanguage']['name'] total += 1 if language not in language_count.keys(): language_count[language] = {}