keeping up to date

This commit is contained in:
pseusys
2023-02-21 20:57:37 +01:00
3 changed files with 143 additions and 63 deletions

View File

@@ -97,9 +97,9 @@ async def make_commit_day_time_list(time_zone: str) -> str:
if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None: if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None:
continue continue
committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"] committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"]
for committed_date in committed_dates: for committed_date in committed_dates:
local_date = datetime.strptime(committed_date["node"]["committedDate"], "%Y-%m-%dT%H:%M:%SZ") local_date = datetime.strptime(committed_date["committedDate"], "%Y-%m-%dT%H:%M:%SZ")
date = local_date.replace(tzinfo=utc).astimezone(timezone(time_zone)) date = local_date.replace(tzinfo=utc).astimezone(timezone(time_zone))
day_times[date.hour // 6] += 1 day_times[date.hour // 6] += 1
@@ -133,9 +133,9 @@ def make_language_per_repo_list(repositories: Dict) -> str:
:returns: string representation of statistics. :returns: string representation of statistics.
""" """
language_count = dict() language_count = dict()
repos_with_language = [repo for repo in repositories["data"]["user"]["repositories"]["edges"] if repo["node"]["primaryLanguage"] is not None] repos_with_language = [repo for repo in repositories["data"]["user"]["repositories"]["nodes"] if repo["primaryLanguage"] is not None]
for repo in repos_with_language: for repo in repos_with_language:
language = repo["node"]["primaryLanguage"]["name"] language = repo["primaryLanguage"]["name"]
language_count[language] = language_count.get(language, {"count": 0}) language_count[language] = language_count.get(language, {"count": 0})
language_count[language]["count"] += 1 language_count[language]["count"] += 1

View File

@@ -2,7 +2,7 @@ from asyncio import Task
from hashlib import md5 from hashlib import md5
from json import dumps from json import dumps
from string import Template from string import Template
from typing import Awaitable, Dict, Callable, Optional from typing import Awaitable, Dict, Callable, Optional, List, Tuple
from httpx import AsyncClient from httpx import AsyncClient
from yaml import safe_load from yaml import safe_load
@@ -17,7 +17,7 @@ GITHUB_API_QUERIES = {
"repos_contributed_to": """ "repos_contributed_to": """
{ {
user(login: "$username") { user(login: "$username") {
repositoriesContributedTo(last: 100, includeUserRepositories: true) { repositoriesContributedTo(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, includeUserRepositories: true) {
nodes { nodes {
isFork isFork
name name
@@ -25,6 +25,10 @@ GITHUB_API_QUERIES = {
login login
} }
} }
pageInfo {
endCursor
hasNextPage
}
} }
} }
}""", }""",
@@ -36,11 +40,13 @@ GITHUB_API_QUERIES = {
defaultBranchRef { defaultBranchRef {
target { target {
... on Commit { ... on Commit {
history(first: 100, author: { id: "$id" }) { history($pagination, author: { id: "$id" }) {
edges { nodes {
node {
committedDate committedDate
} }
pageInfo {
endCursor
hasNextPage
} }
} }
} }
@@ -53,9 +59,8 @@ GITHUB_API_QUERIES = {
"user_repository_list": """ "user_repository_list": """
{ {
user(login: "$username") { user(login: "$username") {
repositories(orderBy: {field: CREATED_AT, direction: ASC}, last: 100, affiliations: [OWNER, COLLABORATOR], isFork: false) { repositories(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, affiliations: [OWNER, COLLABORATOR], isFork: false) {
edges { nodes {
node {
primaryLanguage { primaryLanguage {
name name
} }
@@ -64,34 +69,48 @@ GITHUB_API_QUERIES = {
login login
} }
} }
pageInfo {
endCursor
hasNextPage
}
}
}
}
""",
# Query to collect info about branches in the given repository, including: names.
"repo_branch_list": """
{
repository(owner: "$owner", name: "$name") {
refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, $pagination) {
nodes {
name
}
pageInfo {
endCursor
hasNextPage
} }
} }
} }
} }
""", """,
# Query to collect info about user commits to given repository, including: commit date, additions and deletions numbers. # Query to collect info about user commits to given repository, including: commit date, additions and deletions numbers.
# TODO: increase branch number with pagination
"repo_commit_list": """ "repo_commit_list": """
{ {
repository(owner: "$owner", name: "$name") { repository(owner: "$owner", name: "$name") {
refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, first: 10) { ref(qualifiedName: "refs/heads/$branch") {
edges {
node {
... on Ref {
target { target {
... on Commit { ... on Commit {
history(first: 100, author: { id: "$id" }) { history(author: { id: "$id" }, $pagination) {
edges { nodes {
node {
... on Commit { ... on Commit {
additions additions
deletions deletions
committedDate committedDate
} }
} }
} pageInfo {
} endCursor
} hasNextPage
} }
} }
} }
@@ -198,13 +217,72 @@ class DownloadManager:
""" """
return await DownloadManager._get_remote_resource(resource, safe_load) return await DownloadManager._get_remote_resource(resource, safe_load)
@staticmethod
async def _fetch_graphql_query(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API simple query.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
res = await DownloadManager._client.post("https://api.github.com/graphql", json={
"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)
})
if res.status_code == 200:
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")
@staticmethod
def _find_pagination_and_data_list(response: Dict) -> Tuple[List, Dict]:
"""
Parses response as a paginated response.
NB! All paginated responses are expected to have the following structure:
{
...: {
"nodes": [],
"pageInfo" : {}
}
}
Where `...` states for any number of dictionaries containing _one single key_ only.
If the structure of the response isn't met, an exception is thrown!
:param response: Response JSON dictionary.
:returns: Tuple of the acquired pagination data list ("nodes" key) and pagination info dict ("pageInfo" key).
"""
if "nodes" in response.keys() and "pageInfo" in response.keys():
return response["nodes"], response["pageInfo"]
elif len(response) == 1:
return DownloadManager._find_pagination_and_data_list(response[list(response.keys())[0]])
else:
raise RuntimeError(f"Received structure '{response}' isn't a paginated response!")
@staticmethod
async def _fetch_graphql_paginated(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API paginated query.
Queries 100 new results each time until no more results are left.
Merges result list into single query, clears pagination-related info.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
initial_query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f"first: 100")
page_list, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
while page_info["hasNextPage"]:
query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f'first: 100, after: "{page_info["endCursor"]}"')
new_page_list, page_info = DownloadManager._find_pagination_and_data_list(query_response)
page_list += new_page_list
_, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
page_info.clear()
return initial_query_response
@staticmethod @staticmethod
async def get_remote_graphql(query: str, **kwargs) -> Dict: async def get_remote_graphql(query: str, **kwargs) -> Dict:
""" """
Execute GitHub GraphQL API query. Execute GitHub GraphQL API query.
The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs. The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs.
If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash. If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash.
NB! Caching is done before response parsing - to throw exception on accessing cached erroneous response. Merges paginated sub-queries if pagination is required for the query.
Parse and return response as JSON. Parse and return response as JSON.
:param query: Dynamic query identifier. :param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query. :param kwargs: Parameters for substitution of variables in dynamic query.
@@ -212,11 +290,11 @@ class DownloadManager:
""" """
key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}" key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}"
if key not in DownloadManager._REMOTE_RESOURCES_CACHE: if key not in DownloadManager._REMOTE_RESOURCES_CACHE:
res = await DownloadManager._client.post("https://api.github.com/graphql", json={"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)}) if "$pagination" in GITHUB_API_QUERIES[query]:
res = await DownloadManager._fetch_graphql_paginated(query, **kwargs)
else:
res = await DownloadManager._fetch_graphql_query(query, **kwargs)
DownloadManager._REMOTE_RESOURCES_CACHE[key] = res DownloadManager._REMOTE_RESOURCES_CACHE[key] = res
else: else:
res = DownloadManager._REMOTE_RESOURCES_CACHE[key] res = DownloadManager._REMOTE_RESOURCES_CACHE[key]
if res.status_code == 200: return res
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")

View File

@@ -16,11 +16,11 @@ async def calculate_yearly_commit_data(repositories: Dict) -> Dict:
:returns: Commit quarter yearly data dictionary. :returns: Commit quarter yearly data dictionary.
""" """
yearly_data = dict() yearly_data = dict()
total = len(repositories["data"]["user"]["repositories"]["edges"]) total = len(repositories['data']['user']['repositories']['nodes'])
for ind, repo in enumerate(repositories["data"]["user"]["repositories"]["edges"]): for ind, repo in enumerate(repositories['data']['user']['repositories']['nodes']):
if repo["node"]["name"] not in EM.IGNORED_REPOS: if repo['name'] not in EM.IGNORED_REPOS:
print(f"{ind + 1}/{total}", "Retrieving repo:", repo["node"]["owner"]["login"], repo["node"]["name"]) print(f"{ind + 1}/{total}", "Retrieving repo:", repo["owner"]["login"], repo['name'])
await update_yearly_data_with_commit_stats(repo["node"], yearly_data) await update_yearly_data_with_commit_stats(repo, yearly_data)
return yearly_data return yearly_data
@@ -32,13 +32,15 @@ async def update_yearly_data_with_commit_stats(repo_details: Dict, yearly_data:
:param repo_details: Dictionary with information about the given repository. :param repo_details: Dictionary with information about the given repository.
:param yearly_data: Yearly data dictionary to update. :param yearly_data: Yearly data dictionary to update.
""" """
commit_data = await DM.get_remote_graphql("repo_commit_list", owner=repo_details["owner"]["login"], name=repo_details["name"], id=GHM.USER.node_id) owner = repo_details["owner"]["login"]
branch_data = await DM.get_remote_graphql("repo_branch_list", owner=owner, name=repo_details['name'])
if commit_data["data"]["repository"] is None: if branch_data["data"]["repository"] is None:
print(f"\tSkipping repo: {repo_details['name']}") print(f"\tSkipping repo: {repo_details['name']}")
return dict() return dict()
for commit in [commit["node"] for branch in commit_data["data"]["repository"]["refs"]["edges"] for commit in branch["node"]["target"]["history"]["edges"]]: for branch in branch_data["data"]["repository"]["refs"]["nodes"]:
commit_data = await DM.get_remote_graphql("repo_commit_list", owner=owner, name=repo_details['name'], branch=branch["name"], id=GHM.USER.node_id)
for commit in commit_data["data"]["repository"]["ref"]["target"]["history"]["nodes"]:
date = search(r"\d+-\d+-\d+", commit["committedDate"]).group() date = search(r"\d+-\d+-\d+", commit["committedDate"]).group()
curr_year = datetime.fromisoformat(date).year curr_year = datetime.fromisoformat(date).year
quarter = (datetime.fromisoformat(date).month - 1) // 3 + 1 quarter = (datetime.fromisoformat(date).month - 1) // 3 + 1