@@ -24,7 +24,7 @@ A snapshot of the development on the {{ project }} project.
2424query_date = np.datetime64("2020-01-01 00:00:00")
2525
2626# Load data
27- with open("{{ project }}_issues.json", "r") as fh:
27+ with open("devstats-data/ {{ project }}_issues.json", "r") as fh:
2828 issues = [item["node"] for item in json.loads(fh.read())]
2929
3030glue("devstats-data/{{ project }}_query_date", str(query_date.astype("M8[D]")))
@@ -48,12 +48,36 @@ glue("devstats-data/{{ project }}_query_date", str(query_date.astype("M8[D]")))
4848---
4949tags: [hide-input]
5050---
51- with open("devstats-data/{{ project }}_prs .json", "r") as fh:
51+ with open("devstats-data/{{ project }}_PRs .json", "r") as fh:
5252 prs = [item["node"] for item in json.loads(fh.read())]
53- ```
54-
55- ``` {include} prs_filter.md
5653
54+ # Filters
55+
56+ # The following filters are applied to the PRs for the following analysis:
57+ #
58+ # - Only PRs to the default development branch (e.g `main`)[^master_to_main]
59+ # are considered.
60+ # - Only PRs from users with _active_ GitHub accounts are considered. For example,
61+ # if a user opened a Pull Request in 2016, but then deleted their GitHub account
62+ # in 2017, then this PR is excluded from the analysis.
63+ # - PRs opened by dependabot are excluded.
64+
65+ # Only look at PRs to the main development branch - ignore backports,
66+ # gh-pages, etc.
67+ default_branches = {"main", "master"} # Account for default branch update
68+ prs = [pr for pr in prs if pr["baseRefName"] in default_branches]
69+
70+ # Drop data where PR author is unknown (e.g. github account no longer exists)
71+ prs = [pr for pr in prs if pr["author"]] # Failed author query results in None
72+
73+ # Filter out PRs by bots
74+ bot_filter = {
75+ "dependabot-preview",
76+ "github-actions",
77+ "meeseeksmachine",
78+ "pre-commit-ci[bot]"
79+ }
80+ prs = [pr for pr in prs if pr["author"]["login"] not in bot_filter]
5781```
5882
5983``` {include} prs_merged_over_time.md
0 commit comments