|
1 | | -name: LanguageTool (reviewdog) |
| 1 | +name: LanguageTool (PR review) |
2 | 2 |
|
3 | 3 | on: |
4 | 4 | pull_request_target: |
|
9 | 9 | permissions: |
10 | 10 | contents: read |
11 | 11 | pull-requests: write |
| 12 | + issues: write |
12 | 13 |
|
13 | 14 | concurrency: |
14 | | - group: languagetool-${{ github.event.pull_request.number || github.event.issue.number }} |
| 15 | + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} |
15 | 16 | cancel-in-progress: true |
16 | 17 |
|
| 18 | +env: |
| 19 | + LT_LANGUAGE: en-US |
| 20 | + RERUN_LABEL: languagetool:rerun |
| 21 | + LT_PORT: "8010" |
| 22 | + |
17 | 23 | jobs: |
18 | | - languagetool: |
| 24 | + # 1) Comments do NOT run reviewdog. They only tag the PR. |
| 25 | + rerun_on_comment: |
| 26 | + if: | |
| 27 | + github.event_name == 'issue_comment' && |
| 28 | + github.event.issue.pull_request && |
| 29 | + startsWith(github.event.comment.body, '/languagetool') && |
| 30 | + (github.event.comment.author_association == 'MEMBER' || |
| 31 | + github.event.comment.author_association == 'OWNER' || |
| 32 | + github.event.comment.author_association == 'COLLABORATOR') |
19 | 33 | runs-on: ubuntu-latest |
20 | | - |
21 | 34 | steps: |
22 | | - - name: Decide whether to run + gather PR info |
23 | | - id: meta |
| 35 | + - name: Add rerun label to PR |
24 | 36 | uses: actions/github-script@v7 |
25 | 37 | with: |
26 | 38 | script: | |
27 | | - const eventName = context.eventName; |
| 39 | + const owner = context.repo.owner; |
| 40 | + const repo = context.repo.repo; |
| 41 | + const issue_number = context.issue.number; |
| 42 | + const label = process.env.RERUN_LABEL; |
28 | 43 |
|
29 | | - async function getPerm(username) { |
30 | | - const res = await github.rest.repos.getCollaboratorPermissionLevel({ |
31 | | - owner: context.repo.owner, |
32 | | - repo: context.repo.repo, |
33 | | - username, |
| 44 | + // Ensure the label exists |
| 45 | + try { |
| 46 | + await github.rest.issues.getLabel({ owner, repo, name: label }); |
| 47 | + } catch (e) { |
| 48 | + await github.rest.issues.createLabel({ |
| 49 | + owner, repo, name: label, color: '0e8a16', |
| 50 | + description: 'Rerun LanguageTool on this PR' |
34 | 51 | }); |
35 | | - return res.data.permission; // admin|maintain|write|triage|read|none |
36 | | - } |
37 | | -
|
38 | | - let run = false; |
39 | | - let prNumber = null; |
40 | | - let pr = null; |
41 | | -
|
42 | | - if (eventName === "pull_request_target") { |
43 | | - pr = context.payload.pull_request; |
44 | | - prNumber = pr.number; |
45 | | -
|
46 | | - if (context.payload.action === "labeled") { |
47 | | - run = (context.payload.label?.name === "languagetool:rerun"); |
48 | | - } else { |
49 | | - // opened / reopened |
50 | | - run = true; |
51 | | - } |
52 | | - } else if (eventName === "issue_comment") { |
53 | | - // only run for PR comments |
54 | | - if (!context.payload.issue?.pull_request) { |
55 | | - run = false; |
56 | | - } else { |
57 | | - const body = (context.payload.comment?.body || "").trim(); |
58 | | - const wants = body.startsWith("/languagetool"); |
59 | | - if (!wants) { |
60 | | - run = false; |
61 | | - } else { |
62 | | - const perm = await getPerm(context.payload.comment.user.login); |
63 | | - run = ["admin", "maintain", "write"].includes(perm); |
64 | | - } |
65 | | -
|
66 | | - prNumber = context.payload.issue.number; |
67 | | - const prRes = await github.rest.pulls.get({ |
68 | | - owner: context.repo.owner, |
69 | | - repo: context.repo.repo, |
70 | | - pull_number: prNumber, |
71 | | - }); |
72 | | - pr = prRes.data; |
73 | | - } |
74 | 52 | } |
75 | 53 |
|
76 | | - core.setOutput("run", run ? "true" : "false"); |
77 | | - if (!pr) return; |
78 | | -
|
79 | | - core.setOutput("pr_number", String(prNumber)); |
80 | | - core.setOutput("head_sha", pr.head.sha); |
81 | | - core.setOutput("base_sha", pr.base.sha); |
82 | | - core.setOutput("head_repo", pr.head.repo.full_name); |
83 | | - core.setOutput("base_repo", pr.base.repo.full_name); |
| 54 | + await github.rest.issues.addLabels({ |
| 55 | + owner, repo, issue_number, labels: [label] |
| 56 | + }); |
84 | 57 |
|
85 | | - - name: Stop early if not requested |
86 | | - if: steps.meta.outputs.run != 'true' |
87 | | - run: echo "Not running LanguageTool." |
| 58 | + # 2) Actual PR run: opened/reopened OR labeled with rerun label |
| 59 | + languagetool: |
| 60 | + if: | |
| 61 | + github.event_name == 'pull_request_target' && |
| 62 | + ( |
| 63 | + github.event.action == 'opened' || |
| 64 | + github.event.action == 'reopened' || |
| 65 | + (github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun') |
| 66 | + ) |
| 67 | + runs-on: ubuntu-latest |
88 | 68 |
|
| 69 | + steps: |
89 | 70 | - name: Checkout PR head (safe) |
90 | | - if: steps.meta.outputs.run == 'true' |
91 | 71 | uses: actions/checkout@v4 |
92 | 72 | with: |
93 | | - repository: ${{ steps.meta.outputs.head_repo }} |
94 | | - ref: ${{ steps.meta.outputs.head_sha }} |
| 73 | + repository: ${{ github.event.pull_request.head.repo.full_name }} |
| 74 | + ref: ${{ github.event.pull_request.head.sha }} |
95 | 75 | fetch-depth: 0 |
96 | 76 | persist-credentials: false |
97 | 77 | submodules: false |
98 | 78 |
|
99 | 79 | - name: Fetch base SHA for diffing |
100 | | - if: steps.meta.outputs.run == 'true' |
101 | 80 | run: | |
102 | 81 | set -euo pipefail |
103 | | - git remote add upstream "https://github.com/${{ steps.meta.outputs.base_repo }}.git" || true |
104 | | - git fetch --no-tags --depth=1 upstream "${{ steps.meta.outputs.base_sha }}" |
| 82 | + git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true |
| 83 | + git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}" |
105 | 84 |
|
106 | 85 | - name: Setup Python |
107 | | - if: steps.meta.outputs.run == 'true' |
108 | 86 | uses: actions/setup-python@v5 |
109 | 87 | with: |
110 | 88 | python-version: "3.11" |
111 | 89 |
|
112 | | - - name: Install Python deps |
113 | | - if: steps.meta.outputs.run == 'true' |
| 90 | + - name: Install deps |
114 | 91 | run: | |
115 | 92 | python -m pip install --upgrade pip |
116 | 93 | python -m pip install requests |
117 | 94 |
|
118 | 95 | - name: Setup reviewdog |
119 | | - if: steps.meta.outputs.run == 'true' |
120 | 96 | uses: reviewdog/action-setup@v1 |
121 | 97 | with: |
122 | 98 | reviewdog_version: latest |
123 | 99 |
|
124 | 100 | - name: Start LanguageTool server |
125 | | - if: steps.meta.outputs.run == 'true' |
126 | 101 | run: | |
127 | 102 | set -euo pipefail |
128 | | - docker run -d --rm --name languagetool -p 8010:8010 erikvl87/languagetool:latest |
| 103 | + docker run -d --rm --name languagetool -p "${LT_PORT}:8010" erikvl87/languagetool:latest |
129 | 104 |
|
130 | | - # Wait until ready |
| 105 | + # Wait until ready (avoid connection reset during warmup) |
131 | 106 | for i in $(seq 1 60); do |
132 | | - if curl -fsS "http://localhost:8010/v2/languages" >/dev/null; then |
| 107 | + if curl -fsS "http://localhost:${LT_PORT}/v2/languages" >/dev/null; then |
133 | 108 | echo "LanguageTool is up." |
134 | 109 | exit 0 |
135 | 110 | fi |
136 | 111 | sleep 2 |
137 | 112 | done |
138 | 113 |
|
139 | | - echo "LanguageTool did not become ready in time" >&2 |
| 114 | + echo "LanguageTool did not become ready" >&2 |
140 | 115 | docker logs languagetool || true |
141 | 116 | exit 1 |
142 | 117 |
|
143 | | - - name: Run LanguageTool and comment on PR |
144 | | - if: steps.meta.outputs.run == 'true' |
| 118 | + - name: Run LanguageTool -> reviewdog PR review comments |
145 | 119 | env: |
146 | 120 | REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 121 | + BASE_SHA: ${{ github.event.pull_request.base.sha }} |
| 122 | + HEAD_SHA: ${{ github.event.pull_request.head.sha }} |
147 | 123 | run: | |
148 | 124 | set -euo pipefail |
149 | | - python .github/scripts/languagetool_reviewdog.py \ |
150 | | - --api-url "http://localhost:8010/v2/check" \ |
151 | | - --language "en-US" \ |
152 | | - --base-sha "${{ steps.meta.outputs.base_sha }}" \ |
153 | | - --head-sha "${{ steps.meta.outputs.head_sha }}" \ |
154 | | - --dictionary ".languagetool/words.txt" \ |
155 | | - | reviewdog -f=rdjson \ |
156 | | - -name="LanguageTool" \ |
157 | | - -reporter="github-pr-review" \ |
158 | | - -filter-mode="file" \ |
159 | | - -fail-level="none" \ |
160 | | - -level="warning" |
161 | | -
|
162 | | - - name: Remove rerun label (so it can be added again later) |
163 | | - if: steps.meta.outputs.run == 'true' && github.event_name == 'pull_request_target' && github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' |
| 125 | +
|
| 126 | + # Inline python: produce rdjson for reviewdog (no repo script file needed) |
| 127 | + python - <<'PY' > /tmp/rd.json |
| 128 | + import json, os, re, subprocess |
| 129 | + import requests |
| 130 | +
|
| 131 | + API_URL = f"http://localhost:{os.environ['LT_PORT']}/v2/check" |
| 132 | + LANGUAGE = os.environ.get("LT_LANGUAGE", "en-US") |
| 133 | + BASE_SHA = os.environ["BASE_SHA"] |
| 134 | + HEAD_SHA = os.environ["HEAD_SHA"] |
| 135 | + DICT_PATH = ".languagetool/words.txt" |
| 136 | + MAX_SUG = 3 |
| 137 | + MAX_TEXT = 300_000 # avoid huge posts |
| 138 | +
|
| 139 | + def sh(*args): |
| 140 | + return subprocess.check_output(args, text=True).strip() |
| 141 | +
|
| 142 | + def normalize_word(s: str) -> str: |
| 143 | + s = re.sub(r"^[\W_]+|[\W_]+$", "", s, flags=re.UNICODE) |
| 144 | + return s.lower() |
| 145 | +
|
| 146 | + def load_dict(path): |
| 147 | + if not os.path.exists(path): |
| 148 | + return set() |
| 149 | + out = set() |
| 150 | + with open(path, "r", encoding="utf-8") as f: |
| 151 | + for line in f: |
| 152 | + line = line.strip() |
| 153 | + if not line or line.startswith("#"): |
| 154 | + continue |
| 155 | + out.add(line.lower()) |
| 156 | + return out |
| 157 | +
|
| 158 | + def offset_to_line_col(text, offset): |
| 159 | + line = text.count("\n", 0, offset) + 1 |
| 160 | + last_nl = text.rfind("\n", 0, offset) |
| 161 | + col = offset - (last_nl + 1) + 1 |
| 162 | + return line, col |
| 163 | +
|
| 164 | + def changed_files(base, head): |
| 165 | + out = sh("git", "diff", "--name-only", base, head) |
| 166 | + return [x.strip() for x in out.splitlines() if x.strip()] |
| 167 | +
|
| 168 | + def is_text_file(path): |
| 169 | + ext = os.path.splitext(path)[1].lower() |
| 170 | + return ext in {".md",".txt",".rst",".adoc",".asciidoc",".tex"} |
| 171 | +
|
| 172 | + dict_words = load_dict(DICT_PATH) |
| 173 | + files = [f for f in changed_files(BASE_SHA, HEAD_SHA) if os.path.exists(f) and is_text_file(f)] |
| 174 | + diagnostics = [] |
| 175 | +
|
| 176 | + for path in files: |
| 177 | + try: |
| 178 | + content = open(path, "r", encoding="utf-8").read() |
| 179 | + except UnicodeDecodeError: |
| 180 | + content = open(path, "r", encoding="utf-8", errors="replace").read() |
| 181 | +
|
| 182 | + if not content.strip(): |
| 183 | + continue |
| 184 | +
|
| 185 | + if len(content) > MAX_TEXT: |
| 186 | + content = content[:MAX_TEXT] |
| 187 | +
|
| 188 | + try: |
| 189 | + r = requests.post(API_URL, data={"language": LANGUAGE, "text": content}, timeout=60) |
| 190 | + r.raise_for_status() |
| 191 | + data = r.json() |
| 192 | + except Exception as e: |
| 193 | + diagnostics.append({ |
| 194 | + "message": f"LanguageTool API error for {path}: {e}", |
| 195 | + "location": {"path": path, "range": {"start": {"line": 1, "column": 1}}}, |
| 196 | + "severity": "WARNING", |
| 197 | + }) |
| 198 | + continue |
| 199 | +
|
| 200 | + for m in data.get("matches", []): |
| 201 | + offset = int(m.get("offset", 0)) |
| 202 | + length = int(m.get("length", 0)) |
| 203 | + bad = content[offset:offset+length] |
| 204 | + rule = m.get("rule", {}) or {} |
| 205 | + rule_id = rule.get("id") or "UNKNOWN_RULE" |
| 206 | + category = (rule.get("category", {}) or {}).get("id", "") |
| 207 | +
|
| 208 | + # Custom dictionary: ignore spelling-ish matches when token is in words.txt |
| 209 | + bad_norm = normalize_word(bad) |
| 210 | + if dict_words and bad_norm: |
| 211 | + looks_like_spelling = (category.upper() == "TYPOS") or ("MORFOLOGIK" in str(rule_id).upper()) |
| 212 | + if looks_like_spelling and (bad_norm in dict_words): |
| 213 | + continue |
| 214 | +
|
| 215 | + sl, sc = offset_to_line_col(content, offset) |
| 216 | + el, ec = offset_to_line_col(content, offset + max(length, 0)) |
| 217 | +
|
| 218 | + suggestions = [] |
| 219 | + for repl in (m.get("replacements") or [])[:MAX_SUG]: |
| 220 | + v = repl.get("value") |
| 221 | + if not v: |
| 222 | + continue |
| 223 | + suggestions.append({ |
| 224 | + "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, |
| 225 | + "text": v, |
| 226 | + }) |
| 227 | +
|
| 228 | + code = {"value": rule_id} |
| 229 | + urls = rule.get("urls") or [] |
| 230 | + if urls and isinstance(urls, list): |
| 231 | + u = urls[0].get("value") |
| 232 | + if u: |
| 233 | + code["url"] = u |
| 234 | +
|
| 235 | + diagnostics.append({ |
| 236 | + "message": m.get("message") or "LanguageTool finding", |
| 237 | + "location": { |
| 238 | + "path": path, |
| 239 | + "range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}}, |
| 240 | + }, |
| 241 | + "severity": "WARNING", |
| 242 | + "code": code, |
| 243 | + **({"suggestions": suggestions} if suggestions else {}), |
| 244 | + }) |
| 245 | +
|
| 246 | + print(json.dumps({ |
| 247 | + "source": {"name": "LanguageTool", "url": "https://languagetool.org"}, |
| 248 | + "diagnostics": diagnostics |
| 249 | + })) |
| 250 | + PY |
| 251 | +
|
| 252 | + reviewdog -f=rdjson \ |
| 253 | + -name="LanguageTool" \ |
| 254 | + -reporter="github-pr-review" \ |
| 255 | + -filter-mode="diff_context" \ |
| 256 | + -fail-level="none" \ |
| 257 | + -level="warning" < /tmp/rd.json |
| 258 | +
|
| 259 | + - name: Remove rerun label |
| 260 | + if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun' |
| 261 | + continue-on-error: true |
164 | 262 | uses: actions/github-script@v7 |
165 | 263 | with: |
166 | 264 | script: | |
167 | 265 | await github.rest.issues.removeLabel({ |
168 | 266 | owner: context.repo.owner, |
169 | 267 | repo: context.repo.repo, |
170 | 268 | issue_number: context.payload.pull_request.number, |
171 | | - name: "languagetool:rerun", |
| 269 | + name: process.env.RERUN_LABEL, |
172 | 270 | }); |
173 | 271 |
|
174 | 272 | - name: Stop LanguageTool |
175 | | - if: always() && steps.meta.outputs.run == 'true' |
| 273 | + if: always() |
176 | 274 | run: docker stop languagetool || true |
0 commit comments