1- name : LanguageTool (PR)
1+ name : LanguageTool
22
33on :
44 pull_request :
55 types : [opened, synchronize, reopened, ready_for_review]
6+ push :
7+ branches : ["**"]
68
79permissions :
810 contents : read
@@ -32,20 +34,35 @@ jobs:
3234 unzip -q LT.zip
3335 echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
3436
35- - name : Run LanguageTool on changed PR files + comment summary
37+ - name : Run LanguageTool ( changed files) and comment on PR
3638 env :
37- BASE_SHA : ${{ github.event.pull_request.base.sha }}
38- HEAD_SHA : ${{ github.event.pull_request.head.sha }}
3939 GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
40- PR_NUMBER : ${{ github.event.pull_request.number }}
4140 REPO : ${{ github.repository }}
41+ EVENT_NAME : ${{ github.event_name }}
42+ PR_NUMBER : ${{ github.event.pull_request.number }}
43+ BASE_SHA : ${{ github.event.pull_request.base.sha }}
44+ HEAD_SHA : ${{ github.event.pull_request.head.sha }}
4245 run : |
4346 set -euo pipefail
4447
45- echo "Base: $BASE_SHA"
46- echo "Head: $HEAD_SHA"
48+ # Only comment when this run is for a PR event
49+ IS_PR=0
50+ if [ "$EVENT_NAME" = "pull_request" ]; then
51+ IS_PR=1
52+ fi
53+
54+ if [ "$IS_PR" -eq 1 ]; then
55+ echo "Base: $BASE_SHA"
56+ echo "Head: $HEAD_SHA"
57+ DIFF_BASE="$BASE_SHA"
58+ DIFF_HEAD="$HEAD_SHA"
59+ else
60+ echo "Push run: comparing against previous commit"
61+ DIFF_BASE="${GITHUB_SHA}^"
62+ DIFF_HEAD="${GITHUB_SHA}"
63+ fi
4764
48- mapfile -t FILES < <(git diff --name-only "$BASE_SHA " "$HEAD_SHA " \
65+ mapfile -t FILES < <(git diff --name-only "$DIFF_BASE " "$DIFF_HEAD " \
4966 | grep -E '\.(md|mdx|rst|txt)$' || true)
5067
5168 if [ "${#FILES[@]}" -eq 0 ]; then
@@ -69,36 +86,40 @@ jobs:
6986
7087 tmp="$(mktemp)"
7188
72- # Robust preprocessing (won't fail the job if it errors; falls back to original file)
73- if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY'
89+ # Preprocess with Python. If preprocessing fails, fall back to the original file.
90+ if python3 - "$f" > "$tmp" 2>/dev/null <<'PY'
7491import re, sys
7592path = sys.argv[1]
7693text = open(path, "r", encoding="utf-8", errors="replace").read()
7794
7895# Remove YAML frontmatter at top
7996if text.startswith("---\n") :
80- m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
81- if m :
82- text = text[m.end():]
97+ m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
98+ if m :
99+ text = text[m.end():]
83100
84101# Remove fenced code blocks
85102text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M)
86103
87104# Remove inline code spans
88105text = re.sub(r"`[^`]*`", " ", text)
89106
90- # Neutralize common technical tokens (paths, filenames/exts, long identifiers)
107+ # Neutralize path-ish tokens
91108text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text)
92109
110+ # Neutralize common filename tokens with extensions
93111exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)"
94112text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I)
95113
114+ # Neutralize very long identifier-ish tokens
96115text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text)
97116
98117text = re.sub(r"[ \t]+", " ", text)
99118sys.stdout.write(text)
100119PY
101120 then
121+ :
122+ else
102123 cp "$f" "$tmp"
103124 fi
104125
118139 fi
119140 done
120141
121- # Build PR comment body (upsert by marker)
122- MARKER="<!-- languagetool-report -->"
123-
124- if [ "$issues" -ne 0 ]; then
142+ # If PR: upsert a single comment with a marker
143+ if [ "$IS_PR" -eq 1 ]; then
144+ MARKER="<!-- languagetool-report -->"
125145 BODY_FILE="$(mktemp)"
126- {
127- echo "$MARKER"
128- echo "### LanguageTool findings"
129- echo
130- echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._"
131- echo
132- echo '```'
133- cat "$REPORT_FILE"
134- echo '```'
135- } > "$BODY_FILE"
136-
137- # Find existing comment with marker (if any) and update it; otherwise create a new one
146+
147+ if [ "$issues" -ne 0 ]; then
148+ {
149+ echo "$MARKER"
150+ echo "### LanguageTool findings"
151+ echo
152+ echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._"
153+ echo
154+ echo '```'
155+ cat "$REPORT_FILE"
156+ echo '```'
157+ } > "$BODY_FILE"
158+ else
159+ {
160+ echo "$MARKER"
161+ echo "### LanguageTool findings"
162+ echo
163+ echo "✅ No issues found in changed files."
164+ } > "$BODY_FILE"
165+ fi
166+
138167 COMMENTS_JSON="$(mktemp)"
139168 gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON"
140169
141- COMMENT_ID="$(python3 - << 'PY'
170+ COMMENT_ID="$(python3 - "$COMMENTS_JSON" << 'PY'
142171import json, sys
143172data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
144173for c in data :
145- if "<!-- languagetool-report -->" in (c.get("body") or "") :
146- print(c["id"])
147- break
174+ if "<!-- languagetool-report -->" in (c.get("body") or "") :
175+ print(c["id"])
176+ break
148177PY
149- " $COMMENTS_JSON " )"
178+ )"
150179
151180 if [ -n "${COMMENT_ID:-}" ]; then
152181 gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null
155184 gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null
156185 echo "Posted new LanguageTool comment."
157186 fi
158- else
159- echo "No LanguageTool issues found."
160187 fi
161188
162- rm -f "$REPORT_FILE" || true
163-
164189 if [ "$issues" -ne 0 ]; then
165190 exit 1
166191 fi
0 commit comments