44 pull_request :
55 types : [opened, synchronize, reopened, ready_for_review]
66
7+ permissions :
8+ contents : read
9+ pull-requests : write
10+
711jobs :
812 languagetool :
913 runs-on : ubuntu-latest
@@ -28,17 +32,19 @@ jobs:
2832 unzip -q LT.zip
2933 echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
3034
31- - name : Run LanguageTool on changed PR files (cleaned)
35+ - name : Run LanguageTool on changed PR files + comment summary
3236 env :
3337 BASE_SHA : ${{ github.event.pull_request.base.sha }}
3438 HEAD_SHA : ${{ github.event.pull_request.head.sha }}
39+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
40+ PR_NUMBER : ${{ github.event.pull_request.number }}
41+ REPO : ${{ github.repository }}
3542 run : |
3643 set -euo pipefail
3744
3845 echo "Base: $BASE_SHA"
3946 echo "Head: $HEAD_SHA"
4047
41- # File types to check (add/remove as needed)
4248 mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
4349 | grep -E '\.(md|mdx|rst|txt)$' || true)
4450
@@ -54,46 +60,107 @@ jobs:
5460 LANG="en-US"
5561
5662 issues=0
63+ REPORT_FILE="$(mktemp)"
64+ : > "$REPORT_FILE"
5765
5866 for f in "${FILES[@]}"; do
5967 echo "-----"
6068 echo "Checking: $f"
6169
6270 tmp="$(mktemp)"
6371
64- # Keep "nearly all" errors but remove the biggest source of noise:
65- # - YAML frontmatter at top of file (--- ... ---)
66- # - fenced code blocks (``` ... ```)
67- #
68- # Everything else is checked (including normal prose in MDX).
69- awk '
70- BEGIN { fm=0; code=0; }
71- NR==1 && $0=="---" { fm=1; next }
72- fm==1 && $0=="---" { fm=0; next }
73- fm==1 { next }
74-
75- /^```/ { code = !code; next }
76- code==1 { next }
77-
78- { print }
79- ' "$f" > "$tmp"
80-
81- # LanguageTool CLI prints findings to stdout; it typically exits 0 even with findings,
82- # so we detect findings by whether output is non-empty.
72+ # Robust preprocessing (won't fail the job if it errors; falls back to original file)
73+ if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY'
74+ import re, sys
75+ path = sys.argv[1]
76+ text = open(path, "r", encoding="utf-8", errors="replace").read()
77+
78+ # Remove YAML frontmatter at top
79+ if text.startswith("---\n") :
80+ m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
81+ if m :
82+ text = text[m.end():]
83+
84+ # Remove fenced code blocks
85+ text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M)
86+
87+ # Remove inline code spans
88+ text = re.sub(r"`[^`]*`", " ", text)
89+
90+ # Neutralize common technical tokens (paths, filenames/exts, long identifiers)
91+ text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text)
92+
93+ exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)"
94+ text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I)
95+
96+ text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text)
97+
98+ text = re.sub(r"[ \t]+", " ", text)
99+ sys.stdout.write(text)
100+ PY
101+ then
102+ cp "$f" "$tmp"
103+ fi
104+
83105 out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)"
84106 rm -f "$tmp"
85107
86108 if [ -n "$out" ]; then
87109 issues=1
88110 echo "$out"
111+ {
112+ echo "FILE : $f"
113+ echo "$out"
114+ echo
115+ } >> "$REPORT_FILE"
89116 else
90117 echo "OK"
91118 fi
92119 done
93120
121+ # Build PR comment body (upsert by marker)
122+ MARKER="<!-- languagetool-report -->"
123+
94124 if [ "$issues" -ne 0 ]; then
95- echo "LanguageTool found issues."
96- exit 1
125+ BODY_FILE="$(mktemp)"
126+ {
127+ echo "$MARKER"
128+ echo "### LanguageTool findings"
129+ echo
130+ echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._"
131+ echo
132+ echo '```'
133+ cat "$REPORT_FILE"
134+ echo '```'
135+ } > "$BODY_FILE"
136+
137+ # Find existing comment with marker (if any) and update it; otherwise create a new one
138+ COMMENTS_JSON="$(mktemp)"
139+ gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON"
140+
141+ COMMENT_ID="$(python3 - << 'PY'
142+ import json, sys
143+ data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
144+ for c in data :
145+ if "<!-- languagetool-report -->" in (c.get("body") or "") :
146+ print(c["id"])
147+ break
148+ PY
149+ " $COMMENTS_JSON" )"
150+
151+ if [ -n "${COMMENT_ID:-}" ]; then
152+ gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null
153+ echo "Updated existing LanguageTool comment."
154+ else
155+ gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null
156+ echo "Posted new LanguageTool comment."
157+ fi
158+ else
159+ echo "No LanguageTool issues found."
97160 fi
98161
99- echo "No LanguageTool issues found."
162+ rm -f "$REPORT_FILE" || true
163+
164+ if [ "$issues" -ne 0 ]; then
165+ exit 1
166+ fi
0 commit comments