Skip to content

Commit 1ff1ed4

Browse files
authored
Refactor LanguageTool workflow for PR review
Updated the LanguageTool workflow to trigger on pull request target events and added rerun functionality via issue comments.
1 parent 3114311 commit 1ff1ed4

1 file changed

Lines changed: 133 additions & 157 deletions

File tree

Lines changed: 133 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -1,180 +1,156 @@
1-
name: LanguageTool (PR)
1+
name: LanguageTool (PR review)
22

33
on:
4-
pull_request:
5-
types: [opened, synchronize, reopened, ready_for_review]
4+
# Run once when the PR is opened/re-opened.
5+
pull_request_target:
6+
types: [opened, reopened, labeled]
7+
8+
# Allow maintainers to rerun by commenting "/languagetool"
9+
issue_comment:
10+
types: [created]
611

712
permissions:
813
contents: read
914
pull-requests: write
15+
issues: write
16+
17+
concurrency:
18+
group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }}
19+
cancel-in-progress: true
20+
21+
env:
22+
LT_LANGUAGE: en-US
23+
RERUN_LABEL: languagetool:run
1024

1125
jobs:
26+
# Comment command -> adds a label -> label event triggers the real run
27+
rerun_on_comment:
28+
if: |
29+
github.event_name == 'issue_comment' &&
30+
github.event.issue.pull_request &&
31+
contains(github.event.comment.body, '/languagetool') &&
32+
(github.event.comment.author_association == 'MEMBER' ||
33+
github.event.comment.author_association == 'OWNER' ||
34+
github.event.comment.author_association == 'COLLABORATOR')
35+
runs-on: ubuntu-latest
36+
steps:
37+
- name: Add rerun label to PR
38+
uses: actions/github-script@v7
39+
with:
40+
script: |
41+
const label = process.env.RERUN_LABEL;
42+
const owner = context.repo.owner;
43+
const repo = context.repo.repo;
44+
const issue_number = context.issue.number; // PR number for issue_comment
45+
46+
// Ensure label exists (create if missing)
47+
try {
48+
await github.rest.issues.getLabel({ owner, repo, name: label });
49+
} catch (e) {
50+
await github.rest.issues.createLabel({
51+
owner,
52+
repo,
53+
name: label,
54+
color: '0e8a16',
55+
description: 'Rerun LanguageTool on this PR'
56+
});
57+
}
58+
59+
await github.rest.issues.addLabels({
60+
owner,
61+
repo,
62+
issue_number,
63+
labels: [label]
64+
});
65+
1266
languagetool:
67+
if: |
68+
github.event_name == 'pull_request_target' &&
69+
(
70+
github.event.action == 'opened' ||
71+
github.event.action == 'reopened' ||
72+
(github.event.action == 'labeled' && github.event.label.name == 'languagetool:run')
73+
)
1374
runs-on: ubuntu-latest
75+
1476
steps:
15-
- name: Checkout
77+
- name: Checkout PR (head SHA)
1678
uses: actions/checkout@v4
1779
with:
80+
ref: ${{ github.event.pull_request.head.sha }}
1881
fetch-depth: 0
1982

20-
- name: Set up Java
21-
uses: actions/setup-java@v4
22-
with:
23-
distribution: temurin
24-
java-version: "17"
25-
26-
- name: Download LanguageTool
83+
- name: Build LanguageTool server image with custom dictionary
84+
shell: bash
2785
run: |
2886
set -euo pipefail
29-
LT_VERSION="6.4"
30-
curl -fsSL -o LT.zip "https://languagetool.org/download/LanguageTool-${LT_VERSION}.zip"
31-
unzip -q LT.zip
32-
echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
33-
34-
- name: Run LanguageTool on changed PR files + comment summary
35-
env:
36-
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37-
REPO: ${{ github.repository }}
38-
PR_NUMBER: ${{ github.event.pull_request.number }}
39-
BASE_SHA: ${{ github.event.pull_request.base.sha }}
40-
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
87+
88+
WORDS_DIR=".github/languagetool"
89+
SPELLING_FILE="$WORDS_DIR/spelling.en.txt"
90+
IGNORE_FILE="$WORDS_DIR/ignore.en.txt"
91+
92+
mkdir -p "$WORDS_DIR"
93+
test -f "$SPELLING_FILE" || : > "$SPELLING_FILE"
94+
test -f "$IGNORE_FILE" || : > "$IGNORE_FILE"
95+
96+
# Safety cap (avoid someone committing a gigantic word list)
97+
head -n 2000 "$SPELLING_FILE" > /tmp/spelling_additions.txt
98+
head -n 2000 "$IGNORE_FILE" > /tmp/ignore_additions.txt
99+
100+
mkdir -p /tmp/lt
101+
cp /tmp/spelling_additions.txt /tmp/lt/spelling_additions.txt
102+
cp /tmp/ignore_additions.txt /tmp/lt/ignore_additions.txt
103+
104+
cat > /tmp/lt/Dockerfile <<'EOF'
105+
FROM erikvl87/languagetool:latest
106+
USER root
107+
COPY spelling_additions.txt /tmp/spelling_additions.txt
108+
COPY ignore_additions.txt /tmp/ignore_additions.txt
109+
RUN set -e; \
110+
if [ -s /tmp/spelling_additions.txt ]; then (echo; cat /tmp/spelling_additions.txt) >> org/languagetool/resource/en/hunspell/spelling.txt; fi; \
111+
if [ -s /tmp/ignore_additions.txt ]; then (echo; cat /tmp/ignore_additions.txt) >> org/languagetool/resource/en/hunspell/ignore.txt; fi
112+
USER languagetool
113+
EOF
114+
115+
docker build -t lt-custom /tmp/lt
116+
117+
- name: Start LanguageTool server
118+
shell: bash
41119
run: |
42120
set -euo pipefail
121+
docker run -d --name languagetool -p 8010:8010 lt-custom
43122
44-
echo "Base: $BASE_SHA"
45-
echo "Head: $HEAD_SHA"
46-
47-
mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
48-
| grep -E '\.(md|mdx|rst|txt)$' || true)
49-
50-
if [ "${#FILES[@]}" -eq 0 ]; then
51-
echo "No matching files changed. Skipping."
52-
exit 0
53-
fi
54-
55-
echo "Files to check:"
56-
printf ' - %s\n' "${FILES[@]}"
57-
58-
JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)"
59-
LANG="en-US"
60-
61-
# Write a preprocessor script to a file (avoids YAML/heredoc indentation issues)
62-
PREPROCESS="$(mktemp)"
63-
cat > "$PREPROCESS" <<'PY'
64-
import re, sys
65-
66-
path = sys.argv[1]
67-
text = open(path, "r", encoding="utf-8", errors="replace").read()
68-
69-
# Remove YAML frontmatter at top
70-
if text.startswith("---\n"):
71-
m = re.match(r"^---\n.*?\n---\n", text, flags=re.S)
72-
if m:
73-
text = text[m.end():]
74-
75-
# Remove fenced code blocks
76-
text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M)
77-
78-
# Remove inline code spans
79-
text = re.sub(r"`[^`]*`", " ", text)
80-
81-
# Neutralize path-ish tokens
82-
text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text)
83-
84-
# Neutralize common filename tokens with extensions
85-
exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)"
86-
text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I)
87-
88-
# Neutralize very long identifier-ish tokens
89-
text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text)
90-
91-
text = re.sub(r"[ \t]+", " ", text)
92-
sys.stdout.write(text)
93-
PY
94-
95-
issues=0
96-
REPORT_FILE="$(mktemp)"
97-
: > "$REPORT_FILE"
98-
99-
for f in "${FILES[@]}"; do
100-
echo "-----"
101-
echo "Checking: $f"
102-
103-
tmp="$(mktemp)"
104-
105-
# Preprocess; if it fails, fall back to original file (but keep the workflow running)
106-
if python3 "$PREPROCESS" "$f" > "$tmp" 2>/dev/null; then
107-
:
108-
else
109-
cp "$f" "$tmp"
110-
fi
111-
112-
out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)"
113-
rm -f "$tmp"
114-
115-
if [ -n "$out" ]; then
116-
issues=1
117-
echo "$out"
118-
{
119-
echo "FILE: $f"
120-
echo "$out"
121-
echo
122-
} >> "$REPORT_FILE"
123-
else
124-
echo "OK"
123+
# Wait until the API is up
124+
for i in {1..60}; do
125+
if curl -fsS http://127.0.0.1:8010/v2/languages >/dev/null; then
126+
exit 0
125127
fi
128+
sleep 1
126129
done
127130
128-
rm -f "$PREPROCESS" || true
129-
130-
# Upsert a single PR comment (marker-based)
131-
MARKER="<!-- languagetool-report -->"
132-
BODY_FILE="$(mktemp)"
133-
134-
if [ "$issues" -ne 0 ]; then
135-
{
136-
echo "$MARKER"
137-
echo "### LanguageTool findings"
138-
echo
139-
echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._"
140-
echo
141-
echo '```'
142-
cat "$REPORT_FILE"
143-
echo '```'
144-
} > "$BODY_FILE"
145-
else
146-
{
147-
echo "$MARKER"
148-
echo "### LanguageTool findings"
149-
echo
150-
echo "✅ No issues found in changed files."
151-
} > "$BODY_FILE"
152-
fi
153-
154-
COMMENTS_JSON="$(mktemp)"
155-
gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100 exporting=false" > "$COMMENTS_JSON"
156-
157-
COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY'
158-
import json, sys
159-
data = json.load(open(sys.argv[1], "r", encoding="utf-8"))
160-
for c in data:
161-
if "<!-- languagetool-report -->" in (c.get("body") or ""):
162-
print(c["id"])
163-
break
164-
PY
165-
)"
166-
167-
if [ -n "${COMMENT_ID:-}" ]; then
168-
gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null
169-
echo "Updated existing LanguageTool comment."
170-
else
171-
gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null
172-
echo "Posted new LanguageTool comment."
173-
fi
174-
175-
rm -f "$COMMENTS_JSON" "$BODY_FILE" "$REPORT_FILE" || true
176-
177-
# Fail the check if there were findings (remove this block if you want advisory-only)
178-
if [ "$issues" -ne 0 ]; then
179-
exit 1
180-
fi
131+
echo "LanguageTool server did not start in time" >&2
132+
docker logs languagetool || true
133+
exit 1
134+
135+
- name: Run LanguageTool and comment suggestions on the PR
136+
uses: reviewdog/action-languagetool@v1.23.0
137+
with:
138+
github_token: ${{ secrets.GITHUB_TOKEN }}
139+
reporter: github-pr-review
140+
level: info
141+
patterns: "**/*.md **/*.txt **/*.rst **/*.adoc"
142+
language: ${{ env.LT_LANGUAGE }}
143+
custom_api_endpoint: "http://127.0.0.1:8010"
144+
145+
- name: Remove rerun label (so maintainers can trigger again)
146+
if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:run'
147+
continue-on-error: true
148+
uses: actions/github-script@v7
149+
with:
150+
script: |
151+
await github.rest.issues.removeLabel({
152+
owner: context.repo.owner,
153+
repo: context.repo.repo,
154+
issue_number: context.payload.pull_request.number,
155+
name: 'languagetool:run',
156+
});

0 commit comments

Comments
 (0)