Merge pull request #375 from anmol098/feat/pagination

Pagination support added
This commit is contained in:
Alexander Sergeev
2023-02-21 20:29:22 +01:00
committed by GitHub
3 changed files with 143 additions and 66 deletions

View File

@@ -1,7 +1,7 @@
from hashlib import md5
from json import dumps
from string import Template
from typing import Awaitable, Dict, Callable, Optional
from typing import Awaitable, Dict, Callable, Optional, List, Tuple
from httpx import AsyncClient
from yaml import safe_load
@@ -12,7 +12,7 @@ GITHUB_API_QUERIES = {
"repositories_contributed_to": """
{
user(login: "$username") {
repositoriesContributedTo(last: 100, includeUserRepositories: true) {
repositoriesContributedTo(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, includeUserRepositories: true) {
nodes {
isFork
name
@@ -20,6 +20,10 @@ GITHUB_API_QUERIES = {
login
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
}""",
@@ -29,11 +33,13 @@ GITHUB_API_QUERIES = {
defaultBranchRef {
target {
... on Commit {
history(first: 100, author: { id: "$id" }) {
edges {
node {
history($pagination, author: { id: "$id" }) {
nodes {
committedDate
}
pageInfo {
endCursor
hasNextPage
}
}
}
@@ -44,9 +50,8 @@ GITHUB_API_QUERIES = {
"user_repository_list": """
{
user(login: "$username") {
repositories(orderBy: {field: CREATED_AT, direction: ASC}, last: 100, affiliations: [OWNER, COLLABORATOR], isFork: false) {
edges {
node {
repositories(orderBy: {field: CREATED_AT, direction: DESC}, $pagination, affiliations: [OWNER, COLLABORATOR], isFork: false) {
nodes {
primaryLanguage {
name
}
@@ -55,32 +60,46 @@ GITHUB_API_QUERIES = {
login
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
}
""",
"repository_commit_list": """
"repository_branches_list": """
{
repository(owner: "$owner", name: "$name") {
refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, first: 10) {
edges {
node {
... on Ref {
refs(refPrefix: "refs/heads/", orderBy: {direction: DESC, field: TAG_COMMIT_DATE}, $pagination) {
nodes {
name
}
pageInfo {
endCursor
hasNextPage
}
}
}
}
""",
"repository_branch_commit_list": """
{
repository(owner: "$owner", name: "$name") {
ref(qualifiedName: "refs/heads/$branch") {
target {
... on Commit {
history(first: 100, author: { id: "$id" }) {
edges {
node {
history(author: { id: "$id" }, $pagination) {
nodes {
... on Commit {
additions
deletions
committedDate
}
}
}
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
@@ -176,13 +195,72 @@ class DownloadManager:
"""
return await DownloadManager._get_remote_resource(resource, safe_load)
@staticmethod
async def _fetch_graphql_query(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API simple query.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
res = await DownloadManager._client.post("https://api.github.com/graphql", json={
"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)
})
if res.status_code == 200:
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")
@staticmethod
def _find_pagination_and_data_list(response: Dict) -> Tuple[List, Dict]:
"""
Parses response as a paginated response.
NB! All paginated responses are expected to have the following structure:
{
...: {
"nodes": [],
"pageInfo" : {}
}
}
Where `...` states for any number of dictionaries containing _one single key_ only.
If the structure of the response isn't met, an exception is thrown!
:param response: Response JSON dictionary.
:returns: Tuple of the acquired pagination data list ("nodes" key) and pagination info dict ("pageInfo" key).
"""
if "nodes" in response.keys() and "pageInfo" in response.keys():
return response["nodes"], response["pageInfo"]
elif len(response) == 1:
return DownloadManager._find_pagination_and_data_list(response[list(response.keys())[0]])
else:
raise RuntimeError(f"Received structure '{response}' isn't a paginated response!")
@staticmethod
async def _fetch_graphql_paginated(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API paginated query.
Queries 100 new results each time until no more results are left.
Merges result list into single query, clears pagination-related info.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
:return: Response JSON dictionary.
"""
initial_query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f"first: 100")
page_list, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
while page_info["hasNextPage"]:
query_response = await DownloadManager._fetch_graphql_query(query, **kwargs, pagination=f'first: 100, after: "{page_info["endCursor"]}"')
new_page_list, page_info = DownloadManager._find_pagination_and_data_list(query_response)
page_list += new_page_list
_, page_info = DownloadManager._find_pagination_and_data_list(initial_query_response)
page_info.clear()
return initial_query_response
@staticmethod
async def get_remote_graphql(query: str, **kwargs) -> Dict:
"""
Execute GitHub GraphQL API query.
The queries are defined in `GITHUB_API_QUERIES`, all parameters should be passed as kwargs.
If the query wasn't cached previously, cache it. Cache query by its identifier + parameters hash.
NB! Caching is done before response parsing - to throw exception on accessing cached erroneous response.
Merges paginated sub-queries if pagination is required for the query.
Parse and return response as JSON.
:param query: Dynamic query identifier.
:param kwargs: Parameters for substitution of variables in dynamic query.
@@ -190,13 +268,11 @@ class DownloadManager:
"""
key = f"{query}_{md5(dumps(kwargs, sort_keys=True).encode('utf-8')).digest()}"
if key not in DownloadManager._REMOTE_RESOURCES_CACHE:
res = await DownloadManager._client.post("https://api.github.com/graphql", json={
"query": Template(GITHUB_API_QUERIES[query]).substitute(kwargs)
})
if "$pagination" in GITHUB_API_QUERIES[query]:
res = await DownloadManager._fetch_graphql_paginated(query, **kwargs)
else:
res = await DownloadManager._fetch_graphql_query(query, **kwargs)
DownloadManager._REMOTE_RESOURCES_CACHE[key] = res
else:
res = DownloadManager._REMOTE_RESOURCES_CACHE[key]
if res.status_code == 200:
return res.json()
else:
raise Exception(f"Query '{query}' failed to run by returning code of {res.status_code}: {res.json()}")
return res

View File

@@ -19,11 +19,11 @@ class LinesOfCode:
async def calculateLoc(self):
result = self.repositoryData
yearly_data = {}
total = len(result['data']['user']['repositories']['edges'])
for ind, repo in enumerate(result['data']['user']['repositories']['edges']):
if repo['node']['name'] not in self.ignored_repos:
print(f"{ind}/{total}", "Retrieving repo:", repo['node']["owner"]["login"], repo['node']['name'])
await self.getCommitStat(repo['node'], yearly_data)
total = len(result['data']['user']['repositories']['nodes'])
for ind, repo in enumerate(result['data']['user']['repositories']['nodes']):
if repo['name'] not in self.ignored_repos:
print(f"{ind}/{total}", "Retrieving repo:", repo["owner"]["login"], repo['name'])
await self.getCommitStat(repo, yearly_data)
await sleep(0.7)
return yearly_data
@@ -43,13 +43,15 @@ class LinesOfCode:
return 4
async def getCommitStat(self, repoDetails, yearly_data):
commit_data = await DownloadManager.get_remote_graphql("repository_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], id=self.user.node_id)
if commit_data["data"]["repository"] is None:
branch_data = await DownloadManager.get_remote_graphql("repository_branches_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'])
if branch_data["data"]["repository"] is None:
print("\tSkipping:", repoDetails['name'])
return
for commit in [commit["node"] for branch in commit_data["data"]["repository"]["refs"]["edges"] for commit in branch["node"]["target"]["history"]["edges"]]:
for branch in branch_data["data"]["repository"]["refs"]["nodes"]:
commit_data = await DownloadManager.get_remote_graphql("repository_branch_commit_list", owner=repoDetails["owner"]["login"], name=repoDetails['name'], branch=branch["name"], id=self.user.node_id)
for commit in commit_data["data"]["repository"]["ref"]["target"]["history"]["nodes"]:
date = re.search(r'\d+-\d+-\d+', commit["committedDate"]).group(0)
curr_year = datetime.datetime.fromisoformat(date).year
quarter = self.getQuarter(date)
@@ -63,7 +65,6 @@ class LinesOfCode:
yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] = 0
yearly_data[curr_year][quarter][repoDetails['primaryLanguage']['name']] += (commit["additions"] - commit["deletions"])
def pushChart(self):
repo = self.g.get_repo(f"{self.user.login}/{self.user.login}")
committer = InputGitAuthor('readme-bot', '41898282+github-actions[bot]@users.noreply.github.com')

View File

@@ -139,9 +139,9 @@ async def generate_commit_list(tz):
if result["data"]["repository"] is None or result["data"]["repository"]["defaultBranchRef"] is None:
continue
committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["edges"]
committed_dates = result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"]
for committedDate in committed_dates:
date = datetime.datetime.strptime(committedDate["node"]["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz))
date = datetime.datetime.strptime(committedDate["committedDate"], "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=pytz.utc).astimezone(timezone(tz))
hour = date.hour
weekday = date.strftime('%A')
if 6 <= hour < 12:
@@ -266,10 +266,10 @@ async def get_waka_time_stats():
def generate_language_per_repo(result):
language_count = {}
total = 0
for repo in result['data']['user']['repositories']['edges']:
if repo['node']['primaryLanguage'] is None:
for repo in result['data']['user']['repositories']['nodes']:
if repo['primaryLanguage'] is None:
continue
language = repo['node']['primaryLanguage']['name']
language = repo['primaryLanguage']['name']
total += 1
if language not in language_count.keys():
language_count[language] = {}