diff --git a/sources/graphics_list_formatter.py b/sources/graphics_list_formatter.py index de6c25c..10f4862 100644 --- a/sources/graphics_list_formatter.py +++ b/sources/graphics_list_formatter.py @@ -97,9 +97,9 @@ async def make_commit_day_time_list(time_zone: str) -> str: if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None: continue - committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"] + committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"] for committed_date in committed_dates: - local_date = datetime.strptime(committed_date["node"]["committedDate"], "%Y-%m-%dT%H:%M:%SZ") + local_date = datetime.strptime(committed_date["committedDate"], "%Y-%m-%dT%H:%M:%SZ") date = local_date.replace(tzinfo=utc).astimezone(timezone(time_zone)) day_times[date.hour // 6] += 1 @@ -133,9 +133,9 @@ def make_language_per_repo_list(repositories: Dict) -> str: :returns: string representation of statistics. """ language_count = dict() - repos_with_language = [repo for repo in repositories["data"]["user"]["repositories"]["edges"] if repo["node"]["primaryLanguage"] is not None] + repos_with_language = [repo for repo in repositories["data"]["user"]["repositories"]["nodes"] if repo["primaryLanguage"] is not None] for repo in repos_with_language: - language = repo["node"]["primaryLanguage"]["name"] + language = repo["primaryLanguage"]["name"] language_count[language] = language_count.get(language, {"count": 0}) language_count[language]["count"] += 1 diff --git a/sources/manager_download.py b/sources/manager_download.py index 8ae1a0a..37e2592 100644 --- a/sources/manager_download.py +++ b/sources/manager_download.py @@ -2,7 +2,7 @@ from asyncio import Task from hashlib import md5 from json import dumps from string import Template -from typing import Awaitable, Dict, Callable, Optional +from typing import Awaitable, Dict, Callable, Optional, List, Tuple from httpx import AsyncClient from yaml import safe_load @@ -17,7 +17,7 @@ GITHUB_API_QUERIES = { "repos_contributed_to": """ { user(login: "$username") { - repositoriesContributedTo(last: 100, includeUserRepositories: true) { + repositoriesContributedTo(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, includeUserRepositories: true) { nodes { isFork name @@ -25,6 +25,10 @@ GITHUB_API_QUERIES = { login } } + pageInfo { + endCursor + hasNextPage + } } } }""", @@ -36,11 +40,13 @@ GITHUB_API_QUERIES = { defaultBranchRef { target { ... on Commit { - history(first: 100, author: { id: "$id" }) { - edges { - node { - committedDate - } + history($pagination, author: { id: "$id" }) { + nodes { + committedDate + } + pageInfo { + endCursor + hasNextPage } } } @@ -53,46 +59,59 @@ GITHUB_API_QUERIES = { "user_repository_list": """ { user(login: "$username") { - repositories(orderBy: {field: CREATED_AT, direction: ASC}, last: 100, affiliations: [OWNER, COLLABORATOR], isFork: false) { - edges { - node { - primaryLanguage { - name - } + repositories(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, affiliations: [OWNER, COLLABORATOR], isFork: false) { + nodes { + primaryLanguage { name - owner { - login - } } + name + owner { + login + } + } + pageInfo { + endCursor + hasNextPage + } + } + } +} +""", + # Query to collect info about branches in the given repository, including: names. + "repo_branch_list": """ +{ + repository(owner: "$owner", name: "$name") { + refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, $pagination) { + nodes { + name + } + pageInfo { + endCursor + hasNextPage } } } } """, # Query to collect info about user commits to given repository, including: commit date, additions and deletions numbers. - # TODO: increase branch number with pagination "repo_commit_list": """ { repository(owner: "$owner", name: "$name") { - refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, first: 10) { - edges { - node { - ... on Ref { - target { + ref(qualifiedName: "refs/heads/$branch") { + target { + ... on Commit { + history(author: { id: "$id" }, $pagination) { + nodes { ... on Commit { - history(first: 100, author: { id: "$id" }) { - edges { - node { - ... on Commit { - additions - deletions - committedDate - } - } - } - } + additions + deletions + committedDate } } + pageInfo { + endCursor + hasNextPage + } } } } @@ -198,13 +217,72 @@ class DownloadManager: """ return await DownloadManager._get_remote_resource(resource, safe_load) + @staticmethod + async def _fetch_graphql_query(query: str, **kwargs) -> Dict: + """ + Execute GitHub GraphQL API simple query. + :param query: Dynamic query identifier. + :param kwargs: Parameters for substitution of variables in dynamic query. + :return: Response JSON dictionary. + """ + res = await DownloadManager._client.post("https://api.github.com/graphql", json={ + "query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs) + }) + if res.status_code == 200: + return res.json() + else: + raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}") + + @staticmethod + def _find_pagination_and_data_list(response: Dict) -> Tuple[List, Dict]: + """ + Parses response as a paginated response. + NB! All paginated responses are expected to have the following structure: + { + ...: { + "nodes": [], + "pageInfo" : {} + } + } + Where `...` states for any number of dictionaries containing _one single key_ only. + If the structure of the response isn't met, an exception is thrown! + :param response: Response JSON dictionary. + :returns: Tuple of the acquired pagination data list ("nodes" key) and pagination info dict ("pageInfo" key). + """ + if "nodes" in response.keys() and "pageInfo" in response.keys(): + return response["nodes"], response["pageInfo"] + elif len(response) == 1: + return DownloadManager._find_pagination_and_data_list(response[list(response.keys())[0]]) + else: + raise RuntimeError(f"Received structure '{response}' isn't a paginated response!") + + @staticmethod + async def _fetch_graphql_paginated(query: str, **kwargs) -> Dict: + """ + Execute GitHub GraphQL API paginated query. + Queries 100 new results each time until no more results are left. + Merges result list into single query, clears pagination-related info. + :param query: Dynamic query identifier. + :param kwargs: Parameters for substitution of variables in dynamic query. + :return: Response JSON dictionary. + """ + initial_query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f"first: 100") + page_list, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response) + while page_info["hasNextPage"]: + query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f'first: 100, after: "{page_info["endCursor"]}"') + new_page_list, page_info = DownloadManager._find_pagination_and_data_list(query_response) + page_list += new_page_list + _, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response) + page_info.clear() + return initial_query_response + @staticmethod async def get_remote_graphql(query: str, **kwargs) -> Dict: """ Execute GitHub GraphQL API query. The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs. If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash. - NB! Caching is done before response parsing - to throw exception on accessing cached erroneous response. + Merges paginated sub-queries if pagination is required for the query. Parse and return response as JSON. :param query: Dynamic query identifier. :param kwargs: Parameters for substitution of variables in dynamic query. @@ -212,11 +290,11 @@ class DownloadManager: """ key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}" if key not in DownloadManager._REMOTE_RESOURCES_CACHE: - res = await DownloadManager._client.post("https://api.github.com/graphql", json={"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)}) + if "$pagination" in GITHUB_API_QUERIES[query]: + res = await DownloadManager._fetch_graphql_paginated(query, **kwargs) + else: + res = await DownloadManager._fetch_graphql_query(query, **kwargs) DownloadManager._REMOTE_RESOURCES_CACHE[key] = res else: res = DownloadManager._REMOTE_RESOURCES_CACHE[key] - if res.status_code == 200: - return res.json() - else: - raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}") + return res diff --git a/sources/yearly_commit_calculator.py b/sources/yearly_commit_calculator.py index 891025a..8170c7d 100644 --- a/sources/yearly_commit_calculator.py +++ b/sources/yearly_commit_calculator.py @@ -16,11 +16,11 @@ async def calculate_yearly_commit_data(repositories: Dict) -> Dict: :returns: Commit quarter yearly data dictionary. """ yearly_data = dict() - total = len(repositories["data"]["user"]["repositories"]["edges"]) - for ind, repo in enumerate(repositories["data"]["user"]["repositories"]["edges"]): - if repo["node"]["name"] not in EM.IGNORED_REPOS: - print(f"{ind + 1}/{total}", "Retrieving repo:", repo["node"]["owner"]["login"], repo["node"]["name"]) - await update_yearly_data_with_commit_stats(repo["node"], yearly_data) + total = len(repositories['data']['user']['repositories']['nodes']) + for ind, repo in enumerate(repositories['data']['user']['repositories']['nodes']): + if repo['name'] not in EM.IGNORED_REPOS: + print(f"{ind + 1}/{total}", "Retrieving repo:", repo["owner"]["login"], repo['name']) + await update_yearly_data_with_commit_stats(repo, yearly_data) return yearly_data @@ -32,22 +32,24 @@ async def update_yearly_data_with_commit_stats(repo_details: Dict, yearly_data: :param repo_details: Dictionary with information about the given repository. :param yearly_data: Yearly data dictionary to update. """ - commit_data = await DM.get_remote_graphql("repo_commit_list", owner=repo_details["owner"]["login"], name=repo_details["name"], id=GHM.USER.node_id) - - if commit_data["data"]["repository"] is None: + owner = repo_details["owner"]["login"] + branch_data = await DM.get_remote_graphql("repo_branch_list", owner=owner, name=repo_details['name']) + if branch_data["data"]["repository"] is None: print(f"\tSkipping repo: {repo_details['name']}") return dict() - for commit in [commit["node"] for branch in commit_data["data"]["repository"]["refs"]["edges"] for commit in branch["node"]["target"]["history"]["edges"]]: - date = search(r"\d+-\d+-\d+", commit["committedDate"]).group() - curr_year = datetime.fromisoformat(date).year - quarter = (datetime.fromisoformat(date).month - 1) // 3 + 1 + for branch in branch_data["data"]["repository"]["refs"]["nodes"]: + commit_data = await DM.get_remote_graphql("repo_commit_list", owner=owner, name=repo_details['name'], branch=branch["name"], id=GHM.USER.node_id) + for commit in commit_data["data"]["repository"]["ref"]["target"]["history"]["nodes"]: + date = search(r"\d+-\d+-\d+", commit["committedDate"]).group() + curr_year = datetime.fromisoformat(date).year + quarter = (datetime.fromisoformat(date).month - 1) // 3 + 1 - if repo_details["primaryLanguage"] is not None: - if curr_year not in yearly_data: - yearly_data[curr_year] = dict() - if quarter not in yearly_data[curr_year]: - yearly_data[curr_year][quarter] = dict() - if repo_details["primaryLanguage"]["name"] not in yearly_data[curr_year][quarter]: - yearly_data[curr_year][quarter][repo_details["primaryLanguage"]["name"]] = 0 - yearly_data[curr_year][quarter][repo_details["primaryLanguage"]["name"]] += commit["additions"] - commit["deletions"] + if repo_details["primaryLanguage"] is not None: + if curr_year not in yearly_data: + yearly_data[curr_year] = dict() + if quarter not in yearly_data[curr_year]: + yearly_data[curr_year][quarter] = dict() + if repo_details["primaryLanguage"]["name"] not in yearly_data[curr_year][quarter]: + yearly_data[curr_year][quarter][repo_details["primaryLanguage"]["name"]] = 0 + yearly_data[curr_year][quarter][repo_details["primaryLanguage"]["name"]] += commit["additions"] - commit["deletions"]