Skip to content

Commit e1d5a3e

Browse files
Add publish stub (#8)
1 parent 3fa73a9 commit e1d5a3e

3 files changed

Lines changed: 218 additions & 196 deletions

File tree

devstats/__main__.py

Lines changed: 22 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -1,209 +1,33 @@
1+
import json
12
import os
2-
import requests
3+
import re
34
import sys
4-
import json
5-
import click
65
from glob import glob
7-
import re
8-
9-
try:
10-
token = os.environ["GRAPH_API_KEY"]
11-
except KeyError:
12-
print("You need to set GRAPH_API_KEY")
13-
print("But you shouldn't use this yet.")
14-
sys.exit()
15-
16-
endpoint = r"https://api.github.com/graphql"
17-
headers = {"Authorization": f"bearer {token}"}
18-
19-
20-
def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
21-
"""
22-
Load an 'issue' query from file and set the target repository, where
23-
the target repository has the format:
24-
25-
https://github.com/<repo_owner>/<repo_name>
26-
27-
Parameters
28-
----------
29-
fname : str
30-
Path to a text file containing a valid issue query according to the
31-
GitHub GraphQL schema.
32-
repo_owner : str
33-
Owner of target repository on GitHub. Default is 'numpy'.
34-
repo_name : str
35-
Name of target repository on GitHub. Default is 'numpy'.
36-
37-
Returns
38-
-------
39-
query : str
40-
Query loaded from file in text form suitable for ``send_query``.
41-
42-
Notes
43-
-----
44-
This function expects the query to have a specific form and will not work
45-
for general GitHub GraphQL queries. See ``examples/`` for some valid
46-
templated issue queries.
47-
"""
48-
with open(fname) as fh:
49-
query = fh.read()
50-
# Set target repo from template
51-
query = query.replace("_REPO_OWNER_", repo_owner)
52-
query = query.replace("_REPO_NAME_", repo_name)
53-
return query
54-
55-
56-
def send_query(query, query_type, cursor=None):
57-
"""
58-
Send a GraphQL query via requests.post
59-
60-
No validation is done on the query before sending. GitHub GraphQL is
61-
supported with the `cursor` argument.
62-
63-
Parameters
64-
----------
65-
query : str
66-
The GraphQL query to be sent
67-
query_type : {"issues", "pullRequests"}
68-
The object being queried according to the GitHub GraphQL schema.
69-
Currently only issues and pullRequests are supported
70-
cursor : str, optional
71-
If given, then the cursor is injected into the query to support
72-
GitHub's GraphQL pagination.
73-
74-
Returns
75-
-------
76-
dict
77-
The result of the query (json) parsed by `json.loads`
78-
79-
Notes
80-
-----
81-
This is intended mostly for internal use within `get_all_responses`.
82-
"""
83-
# TODO: Expand this, either by parsing the query type from the query
84-
# directly or manually adding more query_types to the set
85-
if query_type not in {"issues", "pullRequests"}:
86-
raise ValueError(
87-
"Only 'issues' and 'pullRequests' queries are currently supported"
88-
)
89-
# TODO: Generalize this
90-
# WARNING: The cursor injection depends on the specific structure of the
91-
# query, this is the main reason why query types are limited to issues/PRs
92-
if cursor is not None:
93-
cursor_insertion_key = query_type + "("
94-
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
95-
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
96-
# Build request payload
97-
payload = {"query": "".join(query.split("\n"))}
98-
response = requests.post(endpoint, json=payload, headers=headers)
99-
return json.loads(response.content)
100-
101-
102-
def get_all_responses(query, query_type):
103-
"""
104-
Helper function to bypass GitHub GraphQL API node limit.
105-
"""
106-
# Get data from a single response
107-
print(f"Retrieving first page...", end="", flush=True)
108-
initial_data = send_query(query, query_type)
109-
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
110-
111-
# Continue requesting data (with pagination) until all are acquired
112-
while len(data) < total_count:
113-
rdata = send_query(query, query_type, cursor=last_cursor)
114-
pdata, last_cursor, _ = parse_single_query(rdata, query_type)
115-
data.extend(pdata)
116-
print(
117-
f"OK\nRetrieving {len(data)} out of {total_count} values...",
118-
end="",
119-
flush=True,
120-
)
121-
print("OK")
122-
return data
123-
124-
125-
def parse_single_query(data, query_type):
126-
"""
127-
Parse the data returned by `send_query`
128-
129-
.. warning::
130-
131-
Like `send_query`, the logic here depends on the specific structure
132-
of the query (e.g. it must be an issue or PR query, and must have a
133-
total count).
134-
"""
135-
try:
136-
total_count = data["data"]["repository"][query_type]["totalCount"]
137-
data = data["data"]["repository"][query_type]["edges"]
138-
last_cursor = data[-1]["cursor"]
139-
except KeyError as e:
140-
print(data)
141-
raise e
142-
return data, last_cursor, total_count
1436

7+
import click
8+
import requests
1449

145-
class GithubGrabber:
146-
"""
147-
Pull down data via the GitHub APIv.4 given a valid GraphQL query.
148-
"""
149-
150-
def __init__(self, query_fname, query_type, repo_owner="numpy", repo_name="numpy"):
151-
"""
152-
Create an object to send/recv queries related to the issue tracker
153-
for the given repository via the GitHub API v.4.
154-
155-
The repository to query against is given by:
156-
https://github.com/<repo_owner>/<repo_name>
157-
158-
Parameters
159-
----------
160-
query_fname : str
161-
Path to a valid GraphQL query conforming to the GitHub GraphQL
162-
schema
163-
query_type : {"issues", "pullRequests"}
164-
Type of object that is being queried according to the GitHub GraphQL
165-
schema. Currently only "issues" and "pullRequests" are supported.
166-
repo_owner : str
167-
Repository owner. Default is "numpy"
168-
repo_name : str
169-
Repository name. Default is "numpy"
170-
"""
171-
self.query_fname = query_fname
172-
self.query_type = query_type # TODO: Parse this directly from query
173-
self.repo_owner = repo_owner
174-
self.repo_name = repo_name
175-
self.raw_data = None
176-
self.load_query()
177-
178-
def load_query(self):
179-
self.query = load_query_from_file(
180-
self.query_fname, self.repo_owner, self.repo_name
181-
)
182-
183-
def get(self):
184-
"""
185-
Get JSON-formatted raw data from the query.
186-
"""
187-
self.raw_data = get_all_responses(self.query, self.query_type)
10+
from .query import GithubGrabber
18811

189-
def dump(self, outfile):
190-
"""
191-
Dump raw json to `outfile`.
192-
"""
193-
if not self.raw_data:
194-
raise ValueError("raw_data is currently empty, nothing to dump")
19512

196-
with open(outfile, "w") as outf:
197-
print(f"Writing [{outfile}]")
198-
json.dump(self.raw_data, outf)
13+
@click.group()
14+
def cli():
15+
pass
19916

20017

201-
@click.command()
18+
@cli.command("query")
20219
@click.argument("repo_owner")
20320
@click.argument("repo_name")
204-
def main(repo_owner, repo_name):
21+
def query(repo_owner, repo_name):
20522
"""Download and save issue and pr data for `repo_owner`/`repo_name`."""
20623

24+
try:
25+
token = os.environ["GRAPH_API_KEY"]
26+
except KeyError:
27+
print("You need to set GRAPH_API_KEY")
28+
sys.exit()
29+
30+
headers = {"Authorization": f"bearer {token}"}
20731
query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))
20832

20933
for n, query in enumerate(query_files):
@@ -228,6 +52,7 @@ def main(repo_owner, repo_name):
22852
data = GithubGrabber(
22953
query,
23054
qtype,
55+
headers,
23156
repo_owner=repo_owner,
23257
repo_name=repo_name,
23358
)
@@ -236,5 +61,7 @@ def main(repo_owner, repo_name):
23661
data.dump(f"{repo_name}_{ftype.get(qtype, qtype)}.json")
23762

23863

239-
if __name__ == "__main__":
240-
main()
64+
@cli.command("publish")
65+
def publish():
66+
"""Generate myst report for `repo_owner`/`repo_name`."""
67+
click.echo("publish called")

0 commit comments

Comments
 (0)