|
1 | | -name: LanguageTool (PR) |
| 1 | +name: LanguageTool (PR review) |
2 | 2 |
|
3 | 3 | on: |
4 | | - pull_request: |
5 | | - types: [opened, synchronize, reopened, ready_for_review] |
| 4 | + # Run once when the PR is opened/re-opened. |
| 5 | + pull_request_target: |
| 6 | + types: [opened, reopened, labeled] |
| 7 | + |
| 8 | + # Allow maintainers to rerun by commenting "/languagetool" |
| 9 | + issue_comment: |
| 10 | + types: [created] |
6 | 11 |
|
7 | 12 | permissions: |
8 | 13 | contents: read |
9 | 14 | pull-requests: write |
| 15 | + issues: write |
| 16 | + |
| 17 | +concurrency: |
| 18 | + group: languagetool-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }} |
| 19 | + cancel-in-progress: true |
| 20 | + |
| 21 | +env: |
| 22 | + LT_LANGUAGE: en-US |
| 23 | + RERUN_LABEL: languagetool:run |
10 | 24 |
|
11 | 25 | jobs: |
| 26 | + # Comment command -> adds a label -> label event triggers the real run |
| 27 | + rerun_on_comment: |
| 28 | + if: | |
| 29 | + github.event_name == 'issue_comment' && |
| 30 | + github.event.issue.pull_request && |
| 31 | + contains(github.event.comment.body, '/languagetool') && |
| 32 | + (github.event.comment.author_association == 'MEMBER' || |
| 33 | + github.event.comment.author_association == 'OWNER' || |
| 34 | + github.event.comment.author_association == 'COLLABORATOR') |
| 35 | + runs-on: ubuntu-latest |
| 36 | + steps: |
| 37 | + - name: Add rerun label to PR |
| 38 | + uses: actions/github-script@v7 |
| 39 | + with: |
| 40 | + script: | |
| 41 | + const label = process.env.RERUN_LABEL; |
| 42 | + const owner = context.repo.owner; |
| 43 | + const repo = context.repo.repo; |
| 44 | + const issue_number = context.issue.number; // PR number for issue_comment |
| 45 | +
|
| 46 | + // Ensure label exists (create if missing) |
| 47 | + try { |
| 48 | + await github.rest.issues.getLabel({ owner, repo, name: label }); |
| 49 | + } catch (e) { |
| 50 | + await github.rest.issues.createLabel({ |
| 51 | + owner, |
| 52 | + repo, |
| 53 | + name: label, |
| 54 | + color: '0e8a16', |
| 55 | + description: 'Rerun LanguageTool on this PR' |
| 56 | + }); |
| 57 | + } |
| 58 | +
|
| 59 | + await github.rest.issues.addLabels({ |
| 60 | + owner, |
| 61 | + repo, |
| 62 | + issue_number, |
| 63 | + labels: [label] |
| 64 | + }); |
| 65 | +
|
12 | 66 | languagetool: |
| 67 | + if: | |
| 68 | + github.event_name == 'pull_request_target' && |
| 69 | + ( |
| 70 | + github.event.action == 'opened' || |
| 71 | + github.event.action == 'reopened' || |
| 72 | + (github.event.action == 'labeled' && github.event.label.name == 'languagetool:run') |
| 73 | + ) |
13 | 74 | runs-on: ubuntu-latest |
| 75 | + |
14 | 76 | steps: |
15 | | - - name: Checkout |
| 77 | + - name: Checkout PR (head SHA) |
16 | 78 | uses: actions/checkout@v4 |
17 | 79 | with: |
| 80 | + ref: ${{ github.event.pull_request.head.sha }} |
18 | 81 | fetch-depth: 0 |
19 | 82 |
|
20 | | - - name: Set up Java |
21 | | - uses: actions/setup-java@v4 |
22 | | - with: |
23 | | - distribution: temurin |
24 | | - java-version: "17" |
25 | | - |
26 | | - - name: Download LanguageTool |
| 83 | + - name: Build LanguageTool server image with custom dictionary |
| 84 | + shell: bash |
27 | 85 | run: | |
28 | 86 | set -euo pipefail |
29 | | - LT_VERSION="6.4" |
30 | | - curl -fsSL -o LT.zip "https://languagetool.org/download/LanguageTool-${LT_VERSION}.zip" |
31 | | - unzip -q LT.zip |
32 | | - echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV" |
33 | | -
|
34 | | - - name: Run LanguageTool on changed PR files + comment summary |
35 | | - env: |
36 | | - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
37 | | - REPO: ${{ github.repository }} |
38 | | - PR_NUMBER: ${{ github.event.pull_request.number }} |
39 | | - BASE_SHA: ${{ github.event.pull_request.base.sha }} |
40 | | - HEAD_SHA: ${{ github.event.pull_request.head.sha }} |
| 87 | +
|
| 88 | + WORDS_DIR=".github/languagetool" |
| 89 | + SPELLING_FILE="$WORDS_DIR/spelling.en.txt" |
| 90 | + IGNORE_FILE="$WORDS_DIR/ignore.en.txt" |
| 91 | +
|
| 92 | + mkdir -p "$WORDS_DIR" |
| 93 | + test -f "$SPELLING_FILE" || : > "$SPELLING_FILE" |
| 94 | + test -f "$IGNORE_FILE" || : > "$IGNORE_FILE" |
| 95 | +
|
| 96 | + # Safety cap (avoid someone committing a gigantic word list) |
| 97 | + head -n 2000 "$SPELLING_FILE" > /tmp/spelling_additions.txt |
| 98 | + head -n 2000 "$IGNORE_FILE" > /tmp/ignore_additions.txt |
| 99 | +
|
| 100 | + mkdir -p /tmp/lt |
| 101 | + cp /tmp/spelling_additions.txt /tmp/lt/spelling_additions.txt |
| 102 | + cp /tmp/ignore_additions.txt /tmp/lt/ignore_additions.txt |
| 103 | +
|
| 104 | + cat > /tmp/lt/Dockerfile <<'EOF' |
| 105 | + FROM erikvl87/languagetool:latest |
| 106 | + USER root |
| 107 | + COPY spelling_additions.txt /tmp/spelling_additions.txt |
| 108 | + COPY ignore_additions.txt /tmp/ignore_additions.txt |
| 109 | + RUN set -e; \ |
| 110 | + if [ -s /tmp/spelling_additions.txt ]; then (echo; cat /tmp/spelling_additions.txt) >> org/languagetool/resource/en/hunspell/spelling.txt; fi; \ |
| 111 | + if [ -s /tmp/ignore_additions.txt ]; then (echo; cat /tmp/ignore_additions.txt) >> org/languagetool/resource/en/hunspell/ignore.txt; fi |
| 112 | + USER languagetool |
| 113 | + EOF |
| 114 | +
|
| 115 | + docker build -t lt-custom /tmp/lt |
| 116 | +
|
| 117 | + - name: Start LanguageTool server |
| 118 | + shell: bash |
41 | 119 | run: | |
42 | 120 | set -euo pipefail |
| 121 | + docker run -d --name languagetool -p 8010:8010 lt-custom |
43 | 122 |
|
44 | | - echo "Base: $BASE_SHA" |
45 | | - echo "Head: $HEAD_SHA" |
46 | | -
|
47 | | - mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \ |
48 | | - | grep -E '\.(md|mdx|rst|txt)$' || true) |
49 | | -
|
50 | | - if [ "${#FILES[@]}" -eq 0 ]; then |
51 | | - echo "No matching files changed. Skipping." |
52 | | - exit 0 |
53 | | - fi |
54 | | -
|
55 | | - echo "Files to check:" |
56 | | - printf ' - %s\n' "${FILES[@]}" |
57 | | -
|
58 | | - JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)" |
59 | | - LANG="en-US" |
60 | | -
|
61 | | - # Write a preprocessor script to a file (avoids YAML/heredoc indentation issues) |
62 | | - PREPROCESS="$(mktemp)" |
63 | | - cat > "$PREPROCESS" <<'PY' |
64 | | -import re, sys |
65 | | - |
66 | | -path = sys.argv[1] |
67 | | -text = open(path, "r", encoding="utf-8", errors="replace").read() |
68 | | - |
69 | | -# Remove YAML frontmatter at top |
70 | | -if text.startswith("---\n"): |
71 | | - m = re.match(r"^---\n.*?\n---\n", text, flags=re.S) |
72 | | - if m: |
73 | | - text = text[m.end():] |
74 | | - |
75 | | -# Remove fenced code blocks |
76 | | -text = re.sub(r"^```.*?$.*?^```.*?$", "\n", text, flags=re.S | re.M) |
77 | | - |
78 | | -# Remove inline code spans |
79 | | -text = re.sub(r"`[^`]*`", " ", text) |
80 | | - |
81 | | -# Neutralize path-ish tokens |
82 | | -text = re.sub(r"\b(?:~?/)?[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\b", " PATH ", text) |
83 | | - |
84 | | -# Neutralize common filename tokens with extensions |
85 | | -exts = r"(so|a|o|dylib|dll|exe|bin|iso|img|qcow2|raw|tar|gz|bz2|xz|zip|7z|deb|rpm|jar|war|py|js|ts|jsx|tsx|java|c|cc|cpp|h|hpp|rs|go|rb|php|sh|yaml|yml|toml|json|xml|md|mdx|rst|txt)" |
86 | | -text = re.sub(rf"\b[A-Za-z0-9._-]+\.(?:{exts})\b", " FILE ", text, flags=re.I) |
87 | | - |
88 | | -# Neutralize very long identifier-ish tokens |
89 | | -text = re.sub(r"\b[A-Za-z][A-Za-z0-9_-]{14,}\b", " IDENT ", text) |
90 | | - |
91 | | -text = re.sub(r"[ \t]+", " ", text) |
92 | | -sys.stdout.write(text) |
93 | | -PY |
94 | | - |
95 | | - issues=0 |
96 | | - REPORT_FILE="$(mktemp)" |
97 | | - : > "$REPORT_FILE" |
98 | | - |
99 | | - for f in "${FILES[@]}"; do |
100 | | - echo "-----" |
101 | | - echo "Checking: $f" |
102 | | - |
103 | | - tmp="$(mktemp)" |
104 | | - |
105 | | - # Preprocess; if it fails, fall back to original file (but keep the workflow running) |
106 | | - if python3 "$PREPROCESS" "$f" > "$tmp" 2>/dev/null; then |
107 | | - : |
108 | | - else |
109 | | - cp "$f" "$tmp" |
110 | | - fi |
111 | | - |
112 | | - out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)" |
113 | | - rm -f "$tmp" |
114 | | - |
115 | | - if [ -n "$out" ]; then |
116 | | - issues=1 |
117 | | - echo "$out" |
118 | | - { |
119 | | - echo "FILE: $f" |
120 | | - echo "$out" |
121 | | - echo |
122 | | - } >> "$REPORT_FILE" |
123 | | - else |
124 | | - echo "OK" |
| 123 | + # Wait until the API is up |
| 124 | + for i in {1..60}; do |
| 125 | + if curl -fsS http://127.0.0.1:8010/v2/languages >/dev/null; then |
| 126 | + exit 0 |
125 | 127 | fi |
| 128 | + sleep 1 |
126 | 129 | done |
127 | 130 |
|
128 | | - rm -f "$PREPROCESS" || true |
129 | | - |
130 | | - # Upsert a single PR comment (marker-based) |
131 | | - MARKER="<!-- languagetool-report -->" |
132 | | - BODY_FILE="$(mktemp)" |
133 | | - |
134 | | - if [ "$issues" -ne 0 ]; then |
135 | | - { |
136 | | - echo "$MARKER" |
137 | | - echo "### LanguageTool findings" |
138 | | - echo |
139 | | - echo "_Checked files changed in this PR (frontmatter + fenced code removed; inline code stripped)._" |
140 | | - echo |
141 | | - echo '```' |
142 | | - cat "$REPORT_FILE" |
143 | | - echo '```' |
144 | | - } > "$BODY_FILE" |
145 | | - else |
146 | | - { |
147 | | - echo "$MARKER" |
148 | | - echo "### LanguageTool findings" |
149 | | - echo |
150 | | - echo "✅ No issues found in changed files." |
151 | | - } > "$BODY_FILE" |
152 | | - fi |
153 | | - |
154 | | - COMMENTS_JSON="$(mktemp)" |
155 | | - gh api "repos/$REPO/issues/$PR_NUMBER/comments?per_page=100 exporting=false" > "$COMMENTS_JSON" |
156 | | - |
157 | | - COMMENT_ID="$(python3 - "$COMMENTS_JSON" <<'PY' |
158 | | -import json, sys |
159 | | -data = json.load(open(sys.argv[1], "r", encoding="utf-8")) |
160 | | -for c in data: |
161 | | - if "<!-- languagetool-report -->" in (c.get("body") or ""): |
162 | | - print(c["id"]) |
163 | | - break |
164 | | -PY |
165 | | - )" |
166 | | - |
167 | | - if [ -n "${COMMENT_ID:-}" ]; then |
168 | | - gh api -X PATCH "repos/$REPO/issues/comments/$COMMENT_ID" -f body="$(cat "$BODY_FILE")" >/dev/null |
169 | | - echo "Updated existing LanguageTool comment." |
170 | | - else |
171 | | - gh api -X POST "repos/$REPO/issues/$PR_NUMBER/comments" -f body="$(cat "$BODY_FILE")" >/dev/null |
172 | | - echo "Posted new LanguageTool comment." |
173 | | - fi |
174 | | - |
175 | | - rm -f "$COMMENTS_JSON" "$BODY_FILE" "$REPORT_FILE" || true |
176 | | - |
177 | | - # Fail the check if there were findings (remove this block if you want advisory-only) |
178 | | - if [ "$issues" -ne 0 ]; then |
179 | | - exit 1 |
180 | | - fi |
| 131 | + echo "LanguageTool server did not start in time" >&2 |
| 132 | + docker logs languagetool || true |
| 133 | + exit 1 |
| 134 | +
|
| 135 | + - name: Run LanguageTool and comment suggestions on the PR |
| 136 | + uses: reviewdog/action-languagetool@v1.23.0 |
| 137 | + with: |
| 138 | + github_token: ${{ secrets.GITHUB_TOKEN }} |
| 139 | + reporter: github-pr-review |
| 140 | + level: info |
| 141 | + patterns: "**/*.md **/*.txt **/*.rst **/*.adoc" |
| 142 | + language: ${{ env.LT_LANGUAGE }} |
| 143 | + custom_api_endpoint: "http://127.0.0.1:8010" |
| 144 | + |
| 145 | + - name: Remove rerun label (so maintainers can trigger again) |
| 146 | + if: github.event.action == 'labeled' && github.event.label.name == 'languagetool:run' |
| 147 | + continue-on-error: true |
| 148 | + uses: actions/github-script@v7 |
| 149 | + with: |
| 150 | + script: | |
| 151 | + await github.rest.issues.removeLabel({ |
| 152 | + owner: context.repo.owner, |
| 153 | + repo: context.repo.repo, |
| 154 | + issue_number: context.payload.pull_request.number, |
| 155 | + name: 'languagetool:run', |
| 156 | + }); |
0 commit comments