Merge pull request #375 from anmol098/feat/pagination

Pagination support added
This commit is contained in:
Alexander Sergeev
2023-02-21 20:29:22 +01:00
committed by GitHub
3 changed files with 143 additions and 66 deletions

View File

@@ -1,7 +1,7 @@
from hashlib import md5 from hashlib import md5
from json import dumps from json import dumps
from string import Template from string import Template
from typing import Awaitable, Dict, Callable, Optional from typing import Awaitable, Dict, Callable, Optional, List, Tuple
from httpx import AsyncClient from httpx import AsyncClient
from yaml import safe_load from yaml import safe_load
@@ -12,7 +12,7 @@ GITHUB_API_QUERIES = {
"repositories_contributed_to": """ "repositories_contributed_to": """
{ {
user(login: "$username") { user(login: "$username") {
repositoriesContributedTo(last: 100, includeUserRepositories: true) { repositoriesContributedTo(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, includeUserRepositories: true) {
nodes { nodes {
isFork isFork
name name
@@ -20,6 +20,10 @@ GITHUB_API_QUERIES = {
login login
} }
} }
pageInfo {
endCursor
hasNextPage
}
} }
} }
}""", }""",
@@ -29,11 +33,13 @@ GITHUB_API_QUERIES = {
defaultBranchRef { defaultBranchRef {
target { target {
... on Commit { ... on Commit {
history(first: 100, author: { id: "$id" }) { history($pagination, author: { id: "$id" }) {
edges { nodes {
node {
committedDate committedDate
} }
pageInfo {
endCursor
hasNextPage
} }
} }
} }
@@ -44,9 +50,8 @@ GITHUB_API_QUERIES = {
"user_repository_list": """ "user_repository_list": """
{ {
user(login: "$username") { user(login: "$username") {
repositories(orderBy: {field: CREATED_AT, direction: ASC}, last: 100, affiliations: [OWNER, COLLABORATOR], isFork: false) { repositories(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, affiliations: [OWNER, COLLABORATOR], isFork: false) {
edges { nodes {
node {
primaryLanguage { primaryLanguage {
name name
} }
@@ -55,32 +60,46 @@ GITHUB_API_QUERIES = {
login login
} }
} }
pageInfo {
endCursor
hasNextPage
} }
} }
} }
} }
""", """,
"repository_commit_list": """ "repository_branches_list": """
{ {
repository(owner: "$owner", name: "$name") { repository(owner: "$owner", name: "$name") {
refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, first: 10) { refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, $pagination) {
edges { nodes {
node { name
... on Ref { }
pageInfo {
endCursor
hasNextPage
}
}
}
}
""",
"repository_branch_commit_list": """
{
repository(owner: "$owner", name: "$name") {
ref(qualifiedName: "refs/heads/$branch") {
target { target {
... on Commit { ... on Commit {
history(first: 100, author: { id: "$id" }) { history(author: { id: "$id" }, $pagination) {
edges { nodes {
node {
... on Commit { ... on Commit {
additions additions
deletions deletions
committedDate committedDate
} }
} }
} pageInfo {
} endCursor
} hasNextPage
} }
} }
} }
@@ -176,13 +195,72 @@ class DownloadManager:
""" """
return await DownloadManager._get_remote_resource(resource, safe_load) return await DownloadManager._get_remote_resource(resource, safe_load)
@staticmethod
async def _fetch_graphql_query(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API simple query.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
res = await DownloadManager._client.post("https://api.github.com/graphql", json={
"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)
})
if res.status_code == 200:
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")
@staticmethod
def _find_pagination_and_data_list(response: Dict) -> Tuple[List, Dict]:
"""
Parses response as a paginated response.
NB! All paginated responses are expected to have the following structure:
{
...: {
"nodes": [],
"pageInfo" : {}
}
}
Where `...` states for any number of dictionaries containing _one single key_ only.
If the structure of the response isn't met, an exception is thrown!
:param response: Response JSON dictionary.
:returns: Tuple of the acquired pagination data list ("nodes" key) and pagination info dict ("pageInfo" key).
"""
if "nodes" in response.keys() and "pageInfo" in response.keys():
return response["nodes"], response["pageInfo"]
elif len(response) == 1:
return DownloadManager._find_pagination_and_data_list(response[list(response.keys())[0]])
else:
raise RuntimeError(f"Received structure '{response}' isn't a paginated response!")
@staticmethod
async def _fetch_graphql_paginated(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API paginated query.
Queries 100 new results each time until no more results are left.
Merges result list into single query, clears pagination-related info.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
initial_query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f"first: 100")
page_list, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
while page_info["hasNextPage"]:
query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f'first: 100, after: "{page_info["endCursor"]}"')
new_page_list, page_info = DownloadManager._find_pagination_and_data_list(query_response)
page_list += new_page_list
_, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
page_info.clear()
return initial_query_response
@staticmethod @staticmethod
async def get_remote_graphql(query: str, **kwargs) -> Dict: async def get_remote_graphql(query: str, **kwargs) -> Dict:
""" """
Execute GitHub GraphQL API query. Execute GitHub GraphQL API query.
The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs. The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs.
If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash. If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash.
NB! Caching is done before response parsing - to throw exception on accessing cached erroneous response. Merges paginated sub-queries if pagination is required for the query.
Parse and return response as JSON. Parse and return response as JSON.
:param query: Dynamic query identifier. :param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query. :param kwargs: Parameters for substitution of variables in dynamic query.
@@ -190,13 +268,11 @@ class DownloadManager:
""" """
key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}" key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}"
if key not in DownloadManager._REMOTE_RESOURCES_CACHE: if key not in DownloadManager._REMOTE_RESOURCES_CACHE:
res = await DownloadManager._client.post("https://api.github.com/graphql", json={ if "$pagination" in GITHUB_API_QUERIES[query]:
"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs) res = await DownloadManager._fetch_graphql_paginated(query, **kwargs)
}) else:
res = await DownloadManager._fetch_graphql_query(query, **kwargs)
DownloadManager._REMOTE_RESOURCES_CACHE[key] = res DownloadManager._REMOTE_RESOURCES_CACHE[key] = res
else: else:
res = DownloadManager._REMOTE_RESOURCES_CACHE[key] res = DownloadManager._REMOTE_RESOURCES_CACHE[key]
if res.status_code == 200: return res
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")

View File

@@ -19,11 +19,11 @@ class LinesOfCode:
async def calculateLoc(self): async def calculateLoc(self):
result = self.repositoryData result = self.repositoryData
yearly_data = {} yearly_data = {}
total = len(result['data']['user']['repositories']['edges']) total = len(result['data']['user']['repositories']['nodes'])
for ind, repo in enumerate(result['data']['user']['repositories']['edges']): for ind, repo in enumerate(result['data']['user']['repositories']['nodes']):
if repo['node']['name'] not in self.ignored_repos: if repo['name'] not in self.ignored_repos:
print(f"{ind}/{total}", "Retrieving repo:", repo['node']["owner"]["login"], repo['node']['name']) print(f"{ind}/{total}", "Retrieving repo:", repo["owner"]["login"], repo['name'])
await self.getCommitStat(repo['node'], yearly_data) await self.getCommitStat(repo, yearly_data)
await sleep(0.7) await sleep(0.7)
return yearly_data return yearly_data
@@ -43,13 +43,15 @@ class LinesOfCode:
return 4 return 4
async def getCommitStat(self, repoDetails, yearly_data): async def getCommitStat(self, repoDetails, yearly_data):
commit_data = await DownloadManager.get_remote_graphql("repository_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], id=self.user.node_id) branch_data = await DownloadManager.get_remote_graphql("repository_branches_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'])
if branch_data["data"]["repository"] is None:
if commit_data["data"]["repository"] is None:
print("\tSkipping:", repoDetails['name']) print("\tSkipping:", repoDetails['name'])
return return
for commit in [commit["node"] for branch in commit_data["data"]["repository"]["refs"]["edges"] for commit in branch["node"]["target"]["history"]["edges"]]: for branch in branch_data["data"]["repository"]["refs"]["nodes"]:
commit_data = await DownloadManager.get_remote_graphql("repository_branch_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], branch=branch["name"], id=self.user.node_id)
for commit in commit_data["data"]["repository"]["ref"]["target"]["history"]["nodes"]:
date = re.search(r'\d+-\d+-\d+', commit["committedDate"]).group(0) date = re.search(r'\d+-\d+-\d+', commit["committedDate"]).group(0)
curr_year = datetime.datetime.fromisoformat(date).year curr_year = datetime.datetime.fromisoformat(date).year
quarter = self.getQuarter(date) quarter = self.getQuarter(date)
@@ -63,7 +65,6 @@ class LinesOfCode:
yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] = 0 yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] = 0
yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] += (commit["additions"] - commit["deletions"]) yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] += (commit["additions"] - commit["deletions"])
def pushChart(self): def pushChart(self):
repo = self.g.get_repo(f"{self.user.login}/{self.user.login}") repo = self.g.get_repo(f"{self.user.login}/{self.user.login}")
committer = InputGitAuthor('readme-bot', '41898282+github-actions[bot]@users.noreply.github.com') committer = InputGitAuthor('readme-bot', '41898282+github-actions[bot]@users.noreply.github.com')

View File

@@ -139,9 +139,9 @@ async def generate_commit_list(tz):
if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None: if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None:
continue continue
committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"] committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"]
for committedDate in committed_dates: for committedDate in committed_dates:
date = datetime.datetime.strptime(committedDate["node"]["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz)) date = datetime.datetime.strptime(committedDate["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz))
hour = date.hour hour = date.hour
weekday = date.strftime('%A') weekday = date.strftime('%A')
if 6 <= hour < 12: if 6 <= hour < 12:
@@ -266,10 +266,10 @@ async def get_waka_time_stats():
def generate_language_per_repo(result): def generate_language_per_repo(result):
language_count = {} language_count = {}
total = 0 total = 0
for repo in result['data']['user']['repositories']['edges']: for repo in result['data']['user']['repositories']['nodes']:
if repo['node']['primaryLanguage'] is None: if repo['primaryLanguage'] is None:
continue continue
language = repo['node']['primaryLanguage']['name'] language = repo['primaryLanguage']['name']
total += 1 total += 1
if language not in language_count.keys(): if language not in language_count.keys():
language_count[language] = {} language_count[language] = {}