Skip to content

Commit 3fa73a9

Browse files
Allow calling via python -m (#6)
1 parent a1d3b8d commit 3fa73a9

3 files changed

Lines changed: 241 additions & 241 deletions

File tree

devstats/__init__.py

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -1,240 +0,0 @@
1-
import os
2-
import requests
3-
import sys
4-
import json
5-
import click
6-
from glob import glob
7-
import re
8-
9-
try:
10-
token = os.environ["GRAPH_API_KEY"]
11-
except KeyError:
12-
print("You need to set GRAPH_API_KEY")
13-
print("But you shouldn't use this yet.")
14-
sys.exit()
15-
16-
endpoint = r"https://api.github.com/graphql"
17-
headers = {"Authorization": f"bearer {token}"}
18-
19-
20-
def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
21-
"""
22-
Load an 'issue' query from file and set the target repository, where
23-
the target repository has the format:
24-
25-
https://github.com/<repo_owner>/<repo_name>
26-
27-
Parameters
28-
----------
29-
fname : str
30-
Path to a text file containing a valid issue query according to the
31-
GitHub GraphQL schema.
32-
repo_owner : str
33-
Owner of target repository on GitHub. Default is 'numpy'.
34-
repo_name : str
35-
Name of target repository on GitHub. Default is 'numpy'.
36-
37-
Returns
38-
-------
39-
query : str
40-
Query loaded from file in text form suitable for ``send_query``.
41-
42-
Notes
43-
-----
44-
This function expects the query to have a specific form and will not work
45-
for general GitHub GraphQL queries. See ``examples/`` for some valid
46-
templated issue queries.
47-
"""
48-
with open(fname) as fh:
49-
query = fh.read()
50-
# Set target repo from template
51-
query = query.replace("_REPO_OWNER_", repo_owner)
52-
query = query.replace("_REPO_NAME_", repo_name)
53-
return query
54-
55-
56-
def send_query(query, query_type, cursor=None):
57-
"""
58-
Send a GraphQL query via requests.post
59-
60-
No validation is done on the query before sending. GitHub GraphQL is
61-
supported with the `cursor` argument.
62-
63-
Parameters
64-
----------
65-
query : str
66-
The GraphQL query to be sent
67-
query_type : {"issues", "pullRequests"}
68-
The object being queried according to the GitHub GraphQL schema.
69-
Currently only issues and pullRequests are supported
70-
cursor : str, optional
71-
If given, then the cursor is injected into the query to support
72-
GitHub's GraphQL pagination.
73-
74-
Returns
75-
-------
76-
dict
77-
The result of the query (json) parsed by `json.loads`
78-
79-
Notes
80-
-----
81-
This is intended mostly for internal use within `get_all_responses`.
82-
"""
83-
# TODO: Expand this, either by parsing the query type from the query
84-
# directly or manually adding more query_types to the set
85-
if query_type not in {"issues", "pullRequests"}:
86-
raise ValueError(
87-
"Only 'issues' and 'pullRequests' queries are currently supported"
88-
)
89-
# TODO: Generalize this
90-
# WARNING: The cursor injection depends on the specific structure of the
91-
# query, this is the main reason why query types are limited to issues/PRs
92-
if cursor is not None:
93-
cursor_insertion_key = query_type + "("
94-
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
95-
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
96-
# Build request payload
97-
payload = {"query": "".join(query.split("\n"))}
98-
response = requests.post(endpoint, json=payload, headers=headers)
99-
return json.loads(response.content)
100-
101-
102-
def get_all_responses(query, query_type):
103-
"""
104-
Helper function to bypass GitHub GraphQL API node limit.
105-
"""
106-
# Get data from a single response
107-
print(f"Retrieving first page...", end="", flush=True)
108-
initial_data = send_query(query, query_type)
109-
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
110-
111-
# Continue requesting data (with pagination) until all are acquired
112-
while len(data) < total_count:
113-
rdata = send_query(query, query_type, cursor=last_cursor)
114-
pdata, last_cursor, _ = parse_single_query(rdata, query_type)
115-
data.extend(pdata)
116-
print(
117-
f"OK\nRetrieving {len(data)} out of {total_count} values...",
118-
end="",
119-
flush=True,
120-
)
121-
print("OK")
122-
return data
123-
124-
125-
def parse_single_query(data, query_type):
126-
"""
127-
Parse the data returned by `send_query`
128-
129-
.. warning::
130-
131-
Like `send_query`, the logic here depends on the specific structure
132-
of the query (e.g. it must be an issue or PR query, and must have a
133-
total count).
134-
"""
135-
try:
136-
total_count = data["data"]["repository"][query_type]["totalCount"]
137-
data = data["data"]["repository"][query_type]["edges"]
138-
last_cursor = data[-1]["cursor"]
139-
except KeyError as e:
140-
print(data)
141-
raise e
142-
return data, last_cursor, total_count
143-
144-
145-
class GithubGrabber:
146-
"""
147-
Pull down data via the GitHub APIv.4 given a valid GraphQL query.
148-
"""
149-
150-
def __init__(self, query_fname, query_type, repo_owner="numpy", repo_name="numpy"):
151-
"""
152-
Create an object to send/recv queries related to the issue tracker
153-
for the given repository via the GitHub API v.4.
154-
155-
The repository to query against is given by:
156-
https://github.com/<repo_owner>/<repo_name>
157-
158-
Parameters
159-
----------
160-
query_fname : str
161-
Path to a valid GraphQL query conforming to the GitHub GraphQL
162-
schema
163-
query_type : {"issues", "pullRequests"}
164-
Type of object that is being queried according to the GitHub GraphQL
165-
schema. Currently only "issues" and "pullRequests" are supported.
166-
repo_owner : str
167-
Repository owner. Default is "numpy"
168-
repo_name : str
169-
Repository name. Default is "numpy"
170-
"""
171-
self.query_fname = query_fname
172-
self.query_type = query_type # TODO: Parse this directly from query
173-
self.repo_owner = repo_owner
174-
self.repo_name = repo_name
175-
self.raw_data = None
176-
self.load_query()
177-
178-
def load_query(self):
179-
self.query = load_query_from_file(
180-
self.query_fname, self.repo_owner, self.repo_name
181-
)
182-
183-
def get(self):
184-
"""
185-
Get JSON-formatted raw data from the query.
186-
"""
187-
self.raw_data = get_all_responses(self.query, self.query_type)
188-
189-
def dump(self, outfile):
190-
"""
191-
Dump raw json to `outfile`.
192-
"""
193-
if not self.raw_data:
194-
raise ValueError("raw_data is currently empty, nothing to dump")
195-
196-
with open(outfile, "w") as outf:
197-
print(f"Writing [{outfile}]")
198-
json.dump(self.raw_data, outf)
199-
200-
201-
@click.command()
202-
@click.argument("repo_owner")
203-
@click.argument("repo_name")
204-
def main(repo_owner, repo_name):
205-
"""Download and save issue and pr data for `repo_owner`/`repo_name`."""
206-
207-
query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))
208-
209-
for n, query in enumerate(query_files):
210-
if n != 0:
211-
print()
212-
213-
print(f"Query: [{os.path.basename(query)}] on [{repo_owner}/{repo_name}]")
214-
# Parse query type from gql
215-
gql = open(query).read()
216-
qtype_match = re.match(
217-
r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues)",
218-
gql,
219-
flags=re.MULTILINE,
220-
)
221-
if qtype_match is None:
222-
print(f"Could not determine gql query type for {query}")
223-
sys.exit(-1)
224-
else:
225-
qtype = qtype_match.group(1)
226-
227-
qname, qext = os.path.splitext(query)
228-
data = GithubGrabber(
229-
query,
230-
qtype,
231-
repo_owner=repo_owner,
232-
repo_name=repo_name,
233-
)
234-
data.get()
235-
ftype = {"issues": "issues", "pullRequests": "PRs"}
236-
data.dump(f"{repo_name}_{ftype.get(qtype, qtype)}.json")
237-
238-
239-
if __name__ == "__main__":
240-
main()

0 commit comments

Comments
 (0)