Skip to content

Commit 072c78e

Browse files
authored
Update languagetool-pr.yml
1 parent c00b21e commit 072c78e

1 file changed

Lines changed: 91 additions & 24 deletions

File tree

.github/workflows/languagetool-pr.yml

Lines changed: 91 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ on:
44
pull_request:
55
types: [opened, synchronize, reopened, ready_for_review]
66

7+
permissions:
8+
contents: read
9+
pull-requests: write
10+
711
jobs:
812
languagetool:
913
runs-on: ubuntu-latest
@@ -28,17 +32,19 @@ jobs:
2832
unzip -q LT.zip
2933
echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
3034
31-
- name: Run LanguageTool on changed PR files (cleaned)
35+
- name: Run LanguageTool on changed PR files + comment summary
3236
env:
3337
BASE_SHA: ${{ github.event.pull_request.base.sha }}
3438
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
39+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40+
PR_NUMBER: ${{ github.event.pull_request.number }}
41+
REPO: ${{ github.repository }}
3542
run: |
3643
set -euo pipefail
3744
3845
echo "Base: $BASE_SHA"
3946
echo "Head: $HEAD_SHA"
4047
41-
# File types to check (add/remove as needed)
4248
mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
4349
| grep -E '\.(md|mdx|rst|txt)$' || true)
4450
@@ -54,46 +60,107 @@ jobs:
5460
LANG="en-US"
5561
5662
issues=0
63+
REPORT_FILE="$(mktemp)"
64+
: > "$REPORT_FILE"
5765
5866
for f in "${FILES[@]}"; do
5967
echo "-----"
6068
echo "Checking: $f"
6169
6270
tmp="$(mktemp)"
6371
64-
# Keep "nearly all" errors but remove the biggest source of noise:
65-
# - YAML frontmatter at top of file (--- ... ---)
66-
# - fenced code blocks (``` ... ```)
67-
#
68-
# Everything else is checked (including normal prose in MDX).
69-
awk '
70-
BEGIN { fm=0; code=0; }
71-
NR==1 && $0=="---" { fm=1; next }
72-
fm==1 && $0=="---" { fm=0; next }
73-
fm==1 { next }
74-
75-
/^```/ { code = !code; next }
76-
code==1 { next }
77-
78-
{ print }
79-
' "$f" > "$tmp"
80-
81-
# LanguageTool CLI prints findings to stdout; it typically exits 0 even with findings,
82-
# so we detect findings by whether output is non-empty.
72+
# Robust preprocessing (won't fail the job if it errors; falls back to original file)
73+
if ! python3 - "$f" > "$tmp" 2>/dev/null << 'PY'
74+
import re, sys
75+
path = sys.argv[1]
76+
text = open(path, "r", encoding="utf-8", errors="replace").read()
77+
78+
# Remove YAML frontmatter at top
79+
if text.startswith("---\n"):
80+
m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
81+
if m:
82+
text = text[m.end():]
83+
84+
# Remove fenced code blocks
85+
text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M)
86+
87+
# Remove inline code spans
88+
text = re.sub(r"`[^`]*`", " ", text)
89+
90+
# Neutralize common technical tokens (paths, filenames/exts, long identifiers)
91+
text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text)
92+
93+
exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)"
94+
text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I)
95+
96+
text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text)
97+
98+
text = re.sub(r"[ \t]+", " ", text)
99+
sys.stdout.write(text)
100+
PY
101+
then
102+
cp "$f" "$tmp"
103+
fi
104+
83105
out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)"
84106
rm -f "$tmp"
85107

86108
if [ -n "$out" ]; then
87109
issues=1
88110
echo "$out"
111+
{
112+
echo "FILE: $f"
113+
echo "$out"
114+
echo
115+
} >> "$REPORT_FILE"
89116
else
90117
echo "OK"
91118
fi
92119
done
93120

121+
# Build PR comment body (upsert by marker)
122+
MARKER="<!-- languagetool-report -->"
123+
94124
if [ "$issues" -ne 0 ]; then
95-
echo "LanguageTool found issues."
96-
exit 1
125+
BODY_FILE="$(mktemp)"
126+
{
127+
echo "$MARKER"
128+
echo "### LanguageTool findings"
129+
echo
130+
echo "_Checked files changed in this PR (frontmatter + code blocks removed; inline code stripped)._"
131+
echo
132+
echo '```'
133+
cat "$REPORT_FILE"
134+
echo '```'
135+
} > "$BODY_FILE"
136+
137+
# Find existing comment with marker (if any) and update it; otherwise create a new one
138+
COMMENTS_JSON="$(mktemp)"
139+
gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100" > "$COMMENTS_JSON"
140+
141+
COMMENT_ID="$(python3 - << 'PY'
142+
import json, sys
143+
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
144+
for c in data:
145+
if "<!-- languagetool-report -->" in (c.get("body") or ""):
146+
print(c["id"])
147+
break
148+
PY
149+
"$COMMENTS_JSON")"
150+
151+
if [ -n "${COMMENT_ID:-}" ]; then
152+
gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null
153+
echo "Updated existing LanguageTool comment."
154+
else
155+
gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null
156+
echo "Posted new LanguageTool comment."
157+
fi
158+
else
159+
echo "No LanguageTool issues found."
97160
fi
98161

99-
echo "No LanguageTool issues found."
162+
rm -f "$REPORT_FILE" || true
163+
164+
if [ "$issues" -ne 0 ]; then
165+
exit 1
166+
fi

0 commit comments

Comments
 (0)