Skip to content

Commit ebfebd1

Browse files
authored
Update languagetool-pr.yml
1 parent 6cfe2b6 commit ebfebd1

1 file changed

Lines changed: 194 additions & 96 deletions

File tree

Lines changed: 194 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: LanguageTool (reviewdog)
1+
name: LanguageTool (PR review)
22

33
on:
44
pull_request_target:
@@ -9,168 +9,266 @@ on:
99
permissions:
1010
contents: read
1111
pull-requests: write
12+
issues: write
1213

1314
concurrency:
14-
group: languagetool-${{ github.event.pull_request.number || github.event.issue.number }}
15+
group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }}
1516
cancel-in-progress: true
1617

18+
env:
19+
LT_LANGUAGE: en-US
20+
RERUN_LABEL: languagetool:rerun
21+
LT_PORT: "8010"
22+
1723
jobs:
18-
languagetool:
24+
# 1) Comments do NOT run reviewdog. They only tag the PR.
25+
rerun_on_comment:
26+
if: |
27+
github.event_name == 'issue_comment' &&
28+
github.event.issue.pull_request &&
29+
startsWith(github.event.comment.body, '/languagetool') &&
30+
(github.event.comment.author_association == 'MEMBER' ||
31+
github.event.comment.author_association == 'OWNER' ||
32+
github.event.comment.author_association == 'COLLABORATOR')
1933
runs-on: ubuntu-latest
20-
2134
steps:
22-
- name: Decide whether to run + gather PR info
23-
id: meta
35+
- name: Add rerun label to PR
2436
uses: actions/github-script@v7
2537
with:
2638
script: |
27-
const eventName = context.eventName;
39+
const owner = context.repo.owner;
40+
const repo = context.repo.repo;
41+
const issue_number = context.issue.number;
42+
const label = process.env.RERUN_LABEL;
2843
29-
async function getPerm(username) {
30-
const res = await github.rest.repos.getCollaboratorPermissionLevel({
31-
owner: context.repo.owner,
32-
repo: context.repo.repo,
33-
username,
44+
// Ensure the label exists
45+
try {
46+
await github.rest.issues.getLabel({ owner, repo, name: label });
47+
} catch (e) {
48+
await github.rest.issues.createLabel({
49+
owner, repo, name: label, color: '0e8a16',
50+
description: 'Rerun LanguageTool on this PR'
3451
});
35-
return res.data.permission; // admin|maintain|write|triage|read|none
36-
}
37-
38-
let run = false;
39-
let prNumber = null;
40-
let pr = null;
41-
42-
if (eventName === "pull_request_target") {
43-
pr = context.payload.pull_request;
44-
prNumber = pr.number;
45-
46-
if (context.payload.action === "labeled") {
47-
run = (context.payload.label?.name === "languagetool:rerun");
48-
} else {
49-
// opened / reopened
50-
run = true;
51-
}
52-
} else if (eventName === "issue_comment") {
53-
// only run for PR comments
54-
if (!context.payload.issue?.pull_request) {
55-
run = false;
56-
} else {
57-
const body = (context.payload.comment?.body || "").trim();
58-
const wants = body.startsWith("/languagetool");
59-
if (!wants) {
60-
run = false;
61-
} else {
62-
const perm = await getPerm(context.payload.comment.user.login);
63-
run = ["admin", "maintain", "write"].includes(perm);
64-
}
65-
66-
prNumber = context.payload.issue.number;
67-
const prRes = await github.rest.pulls.get({
68-
owner: context.repo.owner,
69-
repo: context.repo.repo,
70-
pull_number: prNumber,
71-
});
72-
pr = prRes.data;
73-
}
7452
}
7553
76-
core.setOutput("run", run ? "true" : "false");
77-
if (!pr) return;
78-
79-
core.setOutput("pr_number", String(prNumber));
80-
core.setOutput("head_sha", pr.head.sha);
81-
core.setOutput("base_sha", pr.base.sha);
82-
core.setOutput("head_repo", pr.head.repo.full_name);
83-
core.setOutput("base_repo", pr.base.repo.full_name);
54+
await github.rest.issues.addLabels({
55+
owner, repo, issue_number, labels: [label]
56+
});
8457
85-
- name: Stop early if not requested
86-
if: steps.meta.outputs.run != 'true'
87-
run: echo "Not running LanguageTool."
58+
# 2) Actual PR run: opened/reopened OR labeled with rerun label
59+
languagetool:
60+
if: |
61+
github.event_name == 'pull_request_target' &&
62+
(
63+
github.event.action == 'opened' ||
64+
github.event.action == 'reopened' ||
65+
(github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun')
66+
)
67+
runs-on: ubuntu-latest
8868

69+
steps:
8970
- name: Checkout PR head (safe)
90-
if: steps.meta.outputs.run == 'true'
9171
uses: actions/checkout@v4
9272
with:
93-
repository: ${{ steps.meta.outputs.head_repo }}
94-
ref: ${{ steps.meta.outputs.head_sha }}
73+
repository: ${{ github.event.pull_request.head.repo.full_name }}
74+
ref: ${{ github.event.pull_request.head.sha }}
9575
fetch-depth: 0
9676
persist-credentials: false
9777
submodules: false
9878

9979
- name: Fetch base SHA for diffing
100-
if: steps.meta.outputs.run == 'true'
10180
run: |
10281
set -euo pipefail
103-
git remote add upstream "https://github.com/${{ steps.meta.outputs.base_repo }}.git" || true
104-
git fetch --no-tags --depth=1 upstream "${{ steps.meta.outputs.base_sha }}"
82+
git remote add upstream "https://github.com/${{ github.event.pull_request.base.repo.full_name }}.git" || true
83+
git fetch --no-tags --depth=1 upstream "${{ github.event.pull_request.base.sha }}"
10584
10685
- name: Setup Python
107-
if: steps.meta.outputs.run == 'true'
10886
uses: actions/setup-python@v5
10987
with:
11088
python-version: "3.11"
11189

112-
- name: Install Python deps
113-
if: steps.meta.outputs.run == 'true'
90+
- name: Install deps
11491
run: |
11592
python -m pip install --upgrade pip
11693
python -m pip install requests
11794
11895
- name: Setup reviewdog
119-
if: steps.meta.outputs.run == 'true'
12096
uses: reviewdog/action-setup@v1
12197
with:
12298
reviewdog_version: latest
12399

124100
- name: Start LanguageTool server
125-
if: steps.meta.outputs.run == 'true'
126101
run: |
127102
set -euo pipefail
128-
docker run -d --rm --name languagetool -p 8010:8010 erikvl87/languagetool:latest
103+
docker run -d --rm --name languagetool -p "${LT_PORT}:8010" erikvl87/languagetool:latest
129104
130-
# Wait until ready
105+
# Wait until ready (avoid connection reset during warmup)
131106
for i in $(seq 1 60); do
132-
if curl -fsS "http://localhost:8010/v2/languages" >/dev/null; then
107+
if curl -fsS "http://localhost:${LT_PORT}/v2/languages" >/dev/null; then
133108
echo "LanguageTool is up."
134109
exit 0
135110
fi
136111
sleep 2
137112
done
138113
139-
echo "LanguageTool did not become ready in time" >&2
114+
echo "LanguageTool did not become ready" >&2
140115
docker logs languagetool || true
141116
exit 1
142117
143-
- name: Run LanguageTool and comment on PR
144-
if: steps.meta.outputs.run == 'true'
118+
- name: Run LanguageTool -> reviewdog PR review comments
145119
env:
146120
REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
121+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
122+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
147123
run: |
148124
set -euo pipefail
149-
python .github/scripts/languagetool_reviewdog.py \
150-
--api-url "http://localhost:8010/v2/check" \
151-
--language "en-US" \
152-
--base-sha "${{ steps.meta.outputs.base_sha }}" \
153-
--head-sha "${{ steps.meta.outputs.head_sha }}" \
154-
--dictionary ".languagetool/words.txt" \
155-
| reviewdog -f=rdjson \
156-
-name="LanguageTool" \
157-
-reporter="github-pr-review" \
158-
-filter-mode="file" \
159-
-fail-level="none" \
160-
-level="warning"
161-
162-
- name: Remove rerun label (so it can be added again later)
163-
if: steps.meta.outputs.run == 'true' && github.event_name == 'pull_request_target' && github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun'
125+
126+
# Inline python: produce rdjson for reviewdog (no repo script file needed)
127+
python - <<'PY' > /tmp/rd.json
128+
import json, os, re, subprocess
129+
import requests
130+
131+
API_URL = f"http://localhost:{os.environ['LT_PORT']}/v2/check"
132+
LANGUAGE = os.environ.get("LT_LANGUAGE", "en-US")
133+
BASE_SHA = os.environ["BASE_SHA"]
134+
HEAD_SHA = os.environ["HEAD_SHA"]
135+
DICT_PATH = ".languagetool/words.txt"
136+
MAX_SUG = 3
137+
MAX_TEXT = 300_000 # avoid huge posts
138+
139+
def sh(*args):
140+
return subprocess.check_output(args, text=True).strip()
141+
142+
def normalize_word(s: str) -> str:
143+
s = re.sub(r"^[\W_]+|[\W_]+$", "", s, flags=re.UNICODE)
144+
return s.lower()
145+
146+
def load_dict(path):
147+
if not os.path.exists(path):
148+
return set()
149+
out = set()
150+
with open(path, "r", encoding="utf-8") as f:
151+
for line in f:
152+
line = line.strip()
153+
if not line or line.startswith("#"):
154+
continue
155+
out.add(line.lower())
156+
return out
157+
158+
def offset_to_line_col(text, offset):
159+
line = text.count("\n", 0, offset) + 1
160+
last_nl = text.rfind("\n", 0, offset)
161+
col = offset - (last_nl + 1) + 1
162+
return line, col
163+
164+
def changed_files(base, head):
165+
out = sh("git", "diff", "--name-only", base, head)
166+
return [x.strip() for x in out.splitlines() if x.strip()]
167+
168+
def is_text_file(path):
169+
ext = os.path.splitext(path)[1].lower()
170+
return ext in {".md",".txt",".rst",".adoc",".asciidoc",".tex"}
171+
172+
dict_words = load_dict(DICT_PATH)
173+
files = [f for f in changed_files(BASE_SHA, HEAD_SHA) if os.path.exists(f) and is_text_file(f)]
174+
diagnostics = []
175+
176+
for path in files:
177+
try:
178+
content = open(path, "r", encoding="utf-8").read()
179+
except UnicodeDecodeError:
180+
content = open(path, "r", encoding="utf-8", errors="replace").read()
181+
182+
if not content.strip():
183+
continue
184+
185+
if len(content) > MAX_TEXT:
186+
content = content[:MAX_TEXT]
187+
188+
try:
189+
r = requests.post(API_URL, data={"language": LANGUAGE, "text": content}, timeout=60)
190+
r.raise_for_status()
191+
data = r.json()
192+
except Exception as e:
193+
diagnostics.append({
194+
"message": f"LanguageTool API error for {path}: {e}",
195+
"location": {"path": path, "range": {"start": {"line": 1, "column": 1}}},
196+
"severity": "WARNING",
197+
})
198+
continue
199+
200+
for m in data.get("matches", []):
201+
offset = int(m.get("offset", 0))
202+
length = int(m.get("length", 0))
203+
bad = content[offset:offset+length]
204+
rule = m.get("rule", {}) or {}
205+
rule_id = rule.get("id") or "UNKNOWN_RULE"
206+
category = (rule.get("category", {}) or {}).get("id", "")
207+
208+
# Custom dictionary: ignore spelling-ish matches when token is in words.txt
209+
bad_norm = normalize_word(bad)
210+
if dict_words and bad_norm:
211+
looks_like_spelling = (category.upper() == "TYPOS") or ("MORFOLOGIK" in str(rule_id).upper())
212+
if looks_like_spelling and (bad_norm in dict_words):
213+
continue
214+
215+
sl, sc = offset_to_line_col(content, offset)
216+
el, ec = offset_to_line_col(content, offset + max(length, 0))
217+
218+
suggestions = []
219+
for repl in (m.get("replacements") or [])[:MAX_SUG]:
220+
v = repl.get("value")
221+
if not v:
222+
continue
223+
suggestions.append({
224+
"range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}},
225+
"text": v,
226+
})
227+
228+
code = {"value": rule_id}
229+
urls = rule.get("urls") or []
230+
if urls and isinstance(urls, list):
231+
u = urls[0].get("value")
232+
if u:
233+
code["url"] = u
234+
235+
diagnostics.append({
236+
"message": m.get("message") or "LanguageTool finding",
237+
"location": {
238+
"path": path,
239+
"range": {"start": {"line": sl, "column": sc}, "end": {"line": el, "column": ec}},
240+
},
241+
"severity": "WARNING",
242+
"code": code,
243+
**({"suggestions": suggestions} if suggestions else {}),
244+
})
245+
246+
print(json.dumps({
247+
"source": {"name": "LanguageTool", "url": "https://languagetool.org"},
248+
"diagnostics": diagnostics
249+
}))
250+
PY
251+
252+
reviewdog -f=rdjson \
253+
-name="LanguageTool" \
254+
-reporter="github-pr-review" \
255+
-filter-mode="diff_context" \
256+
-fail-level="none" \
257+
-level="warning" < /tmp/rd.json
258+
259+
- name: Remove rerun label
260+
if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:rerun'
261+
continue-on-error: true
164262
uses: actions/github-script@v7
165263
with:
166264
script: |
167265
await github.rest.issues.removeLabel({
168266
owner: context.repo.owner,
169267
repo: context.repo.repo,
170268
issue_number: context.payload.pull_request.number,
171-
name: "languagetool:rerun",
269+
name: process.env.RERUN_LABEL,
172270
});
173271
174272
- name: Stop LanguageTool
175-
if: always() && steps.meta.outputs.run == 'true'
273+
if: always()
176274
run: docker stop languagetool || true

0 commit comments

Comments
 (0)