Skip to content

Commit 2c9d531

Browse files
authored
Update languagetool-pr.yml
1 parent 072c78e commit 2c9d531

1 file changed

Lines changed: 64 additions & 39 deletions

File tree

.github/workflows/languagetool-pr.yml

Lines changed: 64 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
name: LanguageTool (PR)
1+
name: LanguageTool
22

33
on:
44
pull_request:
55
types: [opened, synchronize, reopened, ready_for_review]
6+
push:
7+
branches: ["**"]
68

79
permissions:
810
contents: read
@@ -32,20 +34,35 @@ jobs:
3234
unzip -q LT.zip
3335
echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
3436
35-
- name: Run LanguageTool on changed PR files + comment summary
37+
- name: Run LanguageTool (changed files) and comment on PR
3638
env:
37-
BASE_SHA: ${{ github.event.pull_request.base.sha }}
38-
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
3939
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40-
PR_NUMBER: ${{ github.event.pull_request.number }}
4140
REPO: ${{ github.repository }}
41+
EVENT_NAME: ${{ github.event_name }}
42+
PR_NUMBER: ${{ github.event.pull_request.number }}
43+
BASE_SHA: ${{ github.event.pull_request.base.sha }}
44+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
4245
run: |
4346
set -euo pipefail
4447
45-
echo "Base: $BASE_SHA"
46-
echo "Head: $HEAD_SHA"
48+
# Only comment when this run is for a PR event
49+
IS_PR=0
50+
if [ "$EVENT_NAME" = "pull_request" ]; then
51+
IS_PR=1
52+
fi
53+
54+
if [ "$IS_PR" -eq 1 ]; then
55+
echo "Base: $BASE_SHA"
56+
echo "Head: $HEAD_SHA"
57+
DIFF_BASE="$BASE_SHA"
58+
DIFF_HEAD="$HEAD_SHA"
59+
else
60+
echo "Push run: comparing against previous commit"
61+
DIFF_BASE="${GITHUB_SHA}^"
62+
DIFF_HEAD="${GITHUB_SHA}"
63+
fi
4764
48-
mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
65+
mapfile -t FILES < <(git diff --name-only "$DIFF_BASE" "$DIFF_HEAD" \
4966
| grep -E '\.(md|mdx|rst|txt)$' || true)
5067
5168
if [ "${#FILES[@]}" -eq 0 ]; then
@@ -69,36 +86,40 @@ jobs:
6986
7087
tmp="$(mktemp)"
7188
72-
# Robust preprocessing (won't fail the job if it errors; falls back to original file)
73-
if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY'
89+
# Preprocess with Python. If preprocessing fails, fall back to the original file.
90+
if python3 - "$f" > "$tmp" 2>/dev/null <<'PY'
7491
import re, sys
7592
path = sys.argv[1]
7693
text = open(path, "r", encoding="utf-8", errors="replace").read()
7794

7895
# Remove YAML frontmatter at top
7996
if text.startswith("---\n"):
80-
m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
81-
if m:
82-
text = text[m.end():]
97+
m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
98+
if m:
99+
text = text[m.end():]
83100

84101
# Remove fenced code blocks
85102
text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M)
86103

87104
# Remove inline code spans
88105
text = re.sub(r"`[^`]*`", " ", text)
89106

90-
# Neutralize common technical tokens (paths, filenames/exts, long identifiers)
107+
# Neutralize path-ish tokens
91108
text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text)
92109

110+
# Neutralize common filename tokens with extensions
93111
exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)"
94112
text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I)
95113

114+
# Neutralize very long identifier-ish tokens
96115
text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text)
97116

98117
text = re.sub(r"[ \t]+", " ", text)
99118
sys.stdout.write(text)
100119
PY
101120
then
121+
:
122+
else
102123
cp "$f" "$tmp"
103124
fi
104125

@@ -118,35 +139,43 @@ PY
118139
fi
119140
done
120141

121-
# Build PR comment body (upsert by marker)
122-
MARKER="<!-- languagetool-report -->"
123-
124-
if [ "$issues" -ne 0 ]; then
142+
# If PR: upsert a single comment with a marker
143+
if [ "$IS_PR" -eq 1 ]; then
144+
MARKER="<!-- languagetool-report -->"
125145
BODY_FILE="$(mktemp)"
126-
{
127-
echo "$MARKER"
128-
echo "### LanguageTool findings"
129-
echo
130-
echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._"
131-
echo
132-
echo '```'
133-
cat "$REPORT_FILE"
134-
echo '```'
135-
} > "$BODY_FILE"
136-
137-
# Find existing comment with marker (if any) and update it; otherwise create a new one
146+
147+
if [ "$issues" -ne 0 ]; then
148+
{
149+
echo "$MARKER"
150+
echo "### LanguageTool findings"
151+
echo
152+
echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._"
153+
echo
154+
echo '```'
155+
cat "$REPORT_FILE"
156+
echo '```'
157+
} > "$BODY_FILE"
158+
else
159+
{
160+
echo "$MARKER"
161+
echo "### LanguageTool findings"
162+
echo
163+
echo "✅ No issues found in changed files."
164+
} > "$BODY_FILE"
165+
fi
166+
138167
COMMENTS_JSON="$(mktemp)"
139168
gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON"
140169

141-
COMMENT_ID="$(python3 - << 'PY'
170+
COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY'
142171
import json, sys
143172
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
144173
for c in data:
145-
if "<!-- languagetool-report -->" in (c.get("body") or ""):
146-
print(c["id"])
147-
break
174+
if "<!-- languagetool-report -->" in (c.get("body") or ""):
175+
print(c["id"])
176+
break
148177
PY
149-
"$COMMENTS_JSON")"
178+
)"
150179

151180
if [ -n "${COMMENT_ID:-}" ]; then
152181
gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null
@@ -155,12 +184,8 @@ PY
155184
gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null
156185
echo "Posted new LanguageTool comment."
157186
fi
158-
else
159-
echo "No LanguageTool issues found."
160187
fi
161188

162-
rm -f "$REPORT_FILE" || true
163-
164189
if [ "$issues" -ne 0 ]; then
165190
exit 1
166191
fi

0 commit comments

Comments
 (0)