1+ # .github/workflows/languagetool.yml
12name : LanguageTool (PR comment)
23
34on :
2021 RERUN_LABEL : languagetool:rerun
2122
2223jobs :
23- # Comment command -> toggle a label to trigger the PR job
24+ # Comment command -> toggles a label to trigger the PR job
2425 rerun_on_comment :
2526 if : |
2627 github.event_name == 'issue_comment' &&
4041 const issue_number = context.issue.number;
4142 const label = process.env.RERUN_LABEL;
4243
43- // ensure label exists
44+ // Ensure label exists
4445 try {
4546 await github.rest.issues.getLabel({ owner, repo, name: label });
4647 } catch {
5051 });
5152 }
5253
53- // remove if present (ignore if missing), then add to force a "labeled" event
54- try { await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label }); } catch {}
54+ // Remove if present (ignore errors), then add to force a new "labeled" event
55+ try {
56+ await github.rest.issues.removeLabel({ owner, repo, issue_number, name: label });
57+ } catch {}
5558 await github.rest.issues.addLabels({ owner, repo, issue_number, labels: [label] });
5659
5760 languagetool :
@@ -86,32 +89,36 @@ jobs:
8689 distribution : temurin
8790 java-version : " 17"
8891
89- - name : Download LanguageTool snapshot (CLI )
92+ - name : Download LanguageTool CLI (latest snapshot )
9093 run : |
9194 set -euo pipefail
9295 curl -fsSL -o lt.zip "https://internal1.languagetool.org/snapshots/LanguageTool-latest-snapshot.zip"
9396 rm -rf .lt
9497 mkdir -p .lt
9598 unzip -q lt.zip -d .lt
96- # locate the command line jar
99+
97100 LT_JAR="$(ls -1 .lt/**/languagetool-commandline.jar 2>/dev/null | head -n1 || true)"
98101 if [ -z "${LT_JAR}" ]; then
99102 echo "Could not find languagetool-commandline.jar in snapshot" >&2
100- find .lt -maxdepth 3 -type f -name "*languagetool*jar" -print >&2 || true
103+ find .lt -maxdepth 4 -type f -name "*languagetool*jar" -print >&2 || true
101104 exit 1
102105 fi
106+
103107 echo "LT_JAR=${LT_JAR}" >> "$GITHUB_ENV"
104108
105- - name : Run LanguageTool on changed text files and build PR comment
109+ - name : Run LanguageTool + build PR comment
106110 env :
107111 BASE_SHA : ${{ github.event.pull_request.base.sha }}
108112 HEAD_SHA : ${{ github.event.pull_request.head.sha }}
109113 run : |
110114 set -euo pipefail
111115
112- # Choose which files to check (edit this regex to include more types)
116+ # jq is present on ubuntu-latest, but install if your runner image differs
117+ command -v jq >/dev/null || (sudo apt-get update && sudo apt-get install -y jq)
118+
119+ # Choose files to check
113120 mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
114- | grep -E '\.(md|txt|rst|adoc|asciidoc|tex|mdx )$' || true)
121+ | grep -E '\.(md|mdx| txt|rst|adoc|asciidoc|tex)$' || true)
115122
116123 # Load custom words (optional)
117124 WORDS_FILE=".languagetool/words.txt"
@@ -134,19 +141,21 @@ jobs:
134141 for f in "${FILES[@]}"; do
135142 [ -f "$f" ] || continue
136143
137- # LT prints banner lines before JSON sometimes ; keep JSON only
144+ # LT can print banner lines; keep JSON only (accepts either { or [)
138145 java -jar "$LT_JAR" -l "${LT_LANGUAGE}" --json "$f" 2>/dev/null \
139- | sed -n '/^{ /,$p' > lt.json || true
146+ | sed -n '/^[{[] /,$p' > lt.json || true
140147
141148 # Extract issues and filter spelling-ish matches for custom words
142149 jq -c \
143150 --arg file "$f" \
144151 --argjson words "$WORDS_JSON" '
145152 def badtoken:
146- (.context.text
147- | .[.context.offset:(.context.offset + .context.length)]
148- | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"")
149- | ascii_downcase);
153+ (.context.offset // 0) as $o
154+ | (.context.length // 0) as $l
155+ | (.context.text // "") as $t
156+ | ($t[$o:($o+$l)]
157+ | gsub("^[^[:alnum:]]+|[^[:alnum:]]+$";"")
158+ | ascii_downcase);
150159
151160 (.matches // [])
152161 | map(
@@ -155,7 +164,6 @@ jobs:
155164 | ($m.rule.category.id // "") as $cat
156165 | (badtoken) as $bt
157166 | select(
158- # drop spelling-ish warnings when the token is in our custom list
159167 ( (( $cat == "TYPOS") or ($rid|test("MORFOLOGIK";"i")) )
160168 and (($words|index($bt)) != null)
161169 ) | not
@@ -179,12 +187,11 @@ jobs:
179187 const fs = require("fs");
180188
181189 const marker = "<!-- languagetool-report -->";
182- const lines = fs.readFileSync("results.jsonl","utf8").trim().split("\n").filter(Boolean);
190+ const raw = fs.readFileSync("results.jsonl","utf8").trim();
191+ const lines = raw ? raw.split("\n").filter(Boolean) : [];
183192 const parsed = lines.map(l => JSON.parse(l));
184193
185- const checkedFiles = parsed
186- .map(p => p.file)
187- .filter(f => f && f !== "(none)");
194+ const checkedFiles = parsed.map(p => p.file).filter(f => f && f !== "(none)");
188195 const byFile = parsed
189196 .filter(p => Array.isArray(p.issues) && p.issues.length > 0)
190197 .reduce((acc, p) => { acc[p.file] = p.issues; return acc; }, {});
@@ -196,19 +203,19 @@ jobs:
196203 ## LanguageTool report
197204
198205 **Language:** \`${process.env.LT_LANGUAGE || "en-US"}\`
199- **Checked files:** ${checkedFiles.length ? checkedFiles.length : 0 }
206+ **Checked files:** ${checkedFiles.length}
200207 **Findings:** ${total}
201208 `;
202209
203210 if (!checkedFiles.length) {
204- body += `\nNo supported text files changed in this PR (based on the file extensions configured ).\n`;
211+ body += `\nNo supported text files changed in this PR (based on configured extensions).\n`;
205212 } else if (total === 0) {
206213 body += `\n✅ No issues found in the changed text files.\n`;
207214 } else {
208215 body += `\n---\n`;
209216 for (const [file, issues] of Object.entries(byFile)) {
210217 body += `\n### ${file}\n`;
211- for (const it of issues.slice(0, 200)) { // cap to avoid huge comments
218+ for (const it of issues.slice(0, 200)) {
212219 const ctx = (it.context || "").replace(/\s+/g, " ").trim();
213220 const snippet = ctx ? `\n> ${ctx}\n` : "";
214221 const sug = (it.replacements && it.replacements.length)
0 commit comments