1+ import json
12import os
2- import requests
3+ import re
34import sys
4- import json
5- import click
65from glob import glob
7- import re
8-
9- try :
10- token = os .environ ["GRAPH_API_KEY" ]
11- except KeyError :
12- print ("You need to set GRAPH_API_KEY" )
13- print ("But you shouldn't use this yet." )
14- sys .exit ()
15-
16- endpoint = r"https://api.github.com/graphql"
17- headers = {"Authorization" : f"bearer { token } " }
18-
19-
20- def load_query_from_file (fname , repo_owner = "numpy" , repo_name = "numpy" ):
21- """
22- Load an 'issue' query from file and set the target repository, where
23- the target repository has the format:
24-
25- https://github.com/<repo_owner>/<repo_name>
26-
27- Parameters
28- ----------
29- fname : str
30- Path to a text file containing a valid issue query according to the
31- GitHub GraphQL schema.
32- repo_owner : str
33- Owner of target repository on GitHub. Default is 'numpy'.
34- repo_name : str
35- Name of target repository on GitHub. Default is 'numpy'.
36-
37- Returns
38- -------
39- query : str
40- Query loaded from file in text form suitable for ``send_query``.
41-
42- Notes
43- -----
44- This function expects the query to have a specific form and will not work
45- for general GitHub GraphQL queries. See ``examples/`` for some valid
46- templated issue queries.
47- """
48- with open (fname ) as fh :
49- query = fh .read ()
50- # Set target repo from template
51- query = query .replace ("_REPO_OWNER_" , repo_owner )
52- query = query .replace ("_REPO_NAME_" , repo_name )
53- return query
54-
55-
56- def send_query (query , query_type , cursor = None ):
57- """
58- Send a GraphQL query via requests.post
59-
60- No validation is done on the query before sending. GitHub GraphQL is
61- supported with the `cursor` argument.
62-
63- Parameters
64- ----------
65- query : str
66- The GraphQL query to be sent
67- query_type : {"issues", "pullRequests"}
68- The object being queried according to the GitHub GraphQL schema.
69- Currently only issues and pullRequests are supported
70- cursor : str, optional
71- If given, then the cursor is injected into the query to support
72- GitHub's GraphQL pagination.
73-
74- Returns
75- -------
76- dict
77- The result of the query (json) parsed by `json.loads`
78-
79- Notes
80- -----
81- This is intended mostly for internal use within `get_all_responses`.
82- """
83- # TODO: Expand this, either by parsing the query type from the query
84- # directly or manually adding more query_types to the set
85- if query_type not in {"issues" , "pullRequests" }:
86- raise ValueError (
87- "Only 'issues' and 'pullRequests' queries are currently supported"
88- )
89- # TODO: Generalize this
90- # WARNING: The cursor injection depends on the specific structure of the
91- # query, this is the main reason why query types are limited to issues/PRs
92- if cursor is not None :
93- cursor_insertion_key = query_type + "("
94- cursor_ind = query .find (cursor_insertion_key ) + len (cursor_insertion_key )
95- query = query [:cursor_ind ] + f'after:"{ cursor } ", ' + query [cursor_ind :]
96- # Build request payload
97- payload = {"query" : "" .join (query .split ("\n " ))}
98- response = requests .post (endpoint , json = payload , headers = headers )
99- return json .loads (response .content )
100-
101-
102- def get_all_responses (query , query_type ):
103- """
104- Helper function to bypass GitHub GraphQL API node limit.
105- """
106- # Get data from a single response
107- print (f"Retrieving first page..." , end = "" , flush = True )
108- initial_data = send_query (query , query_type )
109- data , last_cursor , total_count = parse_single_query (initial_data , query_type )
110-
111- # Continue requesting data (with pagination) until all are acquired
112- while len (data ) < total_count :
113- rdata = send_query (query , query_type , cursor = last_cursor )
114- pdata , last_cursor , _ = parse_single_query (rdata , query_type )
115- data .extend (pdata )
116- print (
117- f"OK\n Retrieving { len (data )} out of { total_count } values..." ,
118- end = "" ,
119- flush = True ,
120- )
121- print ("OK" )
122- return data
123-
124-
125- def parse_single_query (data , query_type ):
126- """
127- Parse the data returned by `send_query`
128-
129- .. warning::
130-
131- Like `send_query`, the logic here depends on the specific structure
132- of the query (e.g. it must be an issue or PR query, and must have a
133- total count).
134- """
135- try :
136- total_count = data ["data" ]["repository" ][query_type ]["totalCount" ]
137- data = data ["data" ]["repository" ][query_type ]["edges" ]
138- last_cursor = data [- 1 ]["cursor" ]
139- except KeyError as e :
140- print (data )
141- raise e
142- return data , last_cursor , total_count
1436
7+ import click
8+ import requests
1449
145- class GithubGrabber :
146- """
147- Pull down data via the GitHub APIv.4 given a valid GraphQL query.
148- """
149-
150- def __init__ (self , query_fname , query_type , repo_owner = "numpy" , repo_name = "numpy" ):
151- """
152- Create an object to send/recv queries related to the issue tracker
153- for the given repository via the GitHub API v.4.
154-
155- The repository to query against is given by:
156- https://github.com/<repo_owner>/<repo_name>
157-
158- Parameters
159- ----------
160- query_fname : str
161- Path to a valid GraphQL query conforming to the GitHub GraphQL
162- schema
163- query_type : {"issues", "pullRequests"}
164- Type of object that is being queried according to the GitHub GraphQL
165- schema. Currently only "issues" and "pullRequests" are supported.
166- repo_owner : str
167- Repository owner. Default is "numpy"
168- repo_name : str
169- Repository name. Default is "numpy"
170- """
171- self .query_fname = query_fname
172- self .query_type = query_type # TODO: Parse this directly from query
173- self .repo_owner = repo_owner
174- self .repo_name = repo_name
175- self .raw_data = None
176- self .load_query ()
177-
178- def load_query (self ):
179- self .query = load_query_from_file (
180- self .query_fname , self .repo_owner , self .repo_name
181- )
182-
183- def get (self ):
184- """
185- Get JSON-formatted raw data from the query.
186- """
187- self .raw_data = get_all_responses (self .query , self .query_type )
10+ from .query import GithubGrabber
18811
189- def dump (self , outfile ):
190- """
191- Dump raw json to `outfile`.
192- """
193- if not self .raw_data :
194- raise ValueError ("raw_data is currently empty, nothing to dump" )
19512
196- with open ( outfile , "w" ) as outf :
197- print ( f"Writing [ { outfile } ]" )
198- json . dump ( self . raw_data , outf )
13+ @ click . group ()
14+ def cli ():
15+ pass
19916
20017
201- @click .command ()
18+ @cli .command ("query" )
20219@click .argument ("repo_owner" )
20320@click .argument ("repo_name" )
204- def main (repo_owner , repo_name ):
21+ def query (repo_owner , repo_name ):
20522 """Download and save issue and pr data for `repo_owner`/`repo_name`."""
20623
24+ try :
25+ token = os .environ ["GRAPH_API_KEY" ]
26+ except KeyError :
27+ print ("You need to set GRAPH_API_KEY" )
28+ sys .exit ()
29+
30+ headers = {"Authorization" : f"bearer { token } " }
20731 query_files = glob (os .path .join (os .path .dirname (__file__ ), "queries/*.gql" ))
20832
20933 for n , query in enumerate (query_files ):
@@ -228,6 +52,7 @@ def main(repo_owner, repo_name):
22852 data = GithubGrabber (
22953 query ,
23054 qtype ,
55+ headers ,
23156 repo_owner = repo_owner ,
23257 repo_name = repo_name ,
23358 )
@@ -236,5 +61,7 @@ def main(repo_owner, repo_name):
23661 data .dump (f"{ repo_name } _{ ftype .get (qtype , qtype )} .json" )
23762
23863
239- if __name__ == "__main__" :
240- main ()
64+ @cli .command ("publish" )
65+ def publish ():
66+ """Generate myst report for `repo_owner`/`repo_name`."""
67+ click .echo ("publish called" )
0 commit comments