-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathlink_parser.py
More file actions
102 lines (78 loc) · 2.88 KB
/
link_parser.py
File metadata and controls
102 lines (78 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import argparse
import re
import sys
from dataclasses import dataclass
from datetime import datetime
PARSING_STATUSES = ["broken"]
@dataclass
class BrokenLink:
location: str
line_nr: str
reasoning: str
def parse_broken_links(log: str) -> list[BrokenLink]:
broken_links: list[BrokenLink] = []
lines = log.strip().split("\n")
for line in lines:
parts = line.split(") ")
if len(parts) < 2:
continue
location_part = parts[0].replace("(", "").strip()
location = location_part.split(":")[0].strip()
line_nr = location_part.split("line")[-1].strip()
status_and_url_part = parts[1]
if not any(status in status_and_url_part for status in PARSING_STATUSES):
continue
status_and_url = status_and_url_part.split(" - ")
if len(status_and_url) < 2:
continue
reasoning = status_and_url[1].strip()
broken_links.append(
BrokenLink(
location=location,
line_nr=line_nr,
reasoning=reasoning,
)
)
return broken_links
def generate_markdown_table(broken_links: list[BrokenLink]) -> str:
table = "| Location | Line Number | Reasoning |\n"
table += "|----------|-------------|-----------|\n"
for link in broken_links:
table += f"| {link.location} | {link.line_nr} | {link.reasoning} |\n"
return table
def generate_issue_body(broken_links: list[BrokenLink]) -> str:
markdown_table = generate_markdown_table(broken_links)
return f"""
# Broken Links Report.
**Last updated: {datetime.now().strftime("%d-%m-%Y %H:%M")}**
The following broken links were detected in the documentation:
{markdown_table}
Please investigate and fix these issues to ensure all links are functional.
Thank you!
> To test locally if all link issues are resolved use `bazel run //:docs_link_check`
---
This issue will be auto updated regularly if link issues are found.
You may close it if you wish.
Though a new one will be created if link issues are still present.
"""
def strip_ansi_codes(text: str) -> str:
"""Remove ANSI escape sequences from text"""
ansi_escape = re.compile(r"\x1b\[[0-9;]*m")
return ansi_escape.sub("", text)
if __name__ == "__main__":
arg = argparse.ArgumentParser(
description="Parse broken links from Sphinx log and generate issue body."
)
arg.add_argument("logfile", type=str, help="Path to the Sphinx log file.")
args = arg.parse_args()
with open(args.logfile) as f:
log_content_raw = f.read()
log_content = strip_ansi_codes(log_content_raw)
broken_links = parse_broken_links(log_content)
if not broken_links:
# Nothing broken found, can exit early
sys.exit(0)
issue_body = generate_issue_body(broken_links)
if broken_links:
with open("issue_body.md", "w") as out:
out.write(issue_body)