2828 unzip -q LT.zip
2929 echo "LT_DIR=LanguageTool-${LT_VERSION}" >> "$GITHUB_ENV"
3030
31- - name : Run LanguageTool on changed files
31+ - name : Run LanguageTool on changed PR files (cleaned)
3232 env :
3333 BASE_SHA : ${{ github.event.pull_request.base.sha }}
3434 HEAD_SHA : ${{ github.event.pull_request.head.sha }}
3838 echo "Base: $BASE_SHA"
3939 echo "Head: $HEAD_SHA"
4040
41- # Adjust file types as you like:
41+ # File types to check (add/remove as needed)
4242 mapfile -t FILES < <(git diff --name-only "$BASE_SHA" "$HEAD_SHA" \
43- | grep -E '\.(md|rst|txt|mdx )$' || true)
43+ | grep -E '\.(md|mdx| rst|txt)$' || true)
4444
4545 if [ "${#FILES[@]}" -eq 0 ]; then
4646 echo "No matching files changed. Skipping."
@@ -51,16 +51,38 @@ jobs:
5151 printf ' - %s\n' "${FILES[@]}"
5252
5353 JAR="$(ls -1 "$LT_DIR"/languagetool-commandline.jar)"
54-
55- # Pick your language here:
5654 LANG="en-US"
5755
5856 issues=0
57+
5958 for f in "${FILES[@]}"; do
6059 echo "-----"
6160 echo "Checking: $f"
62- # LanguageTool returns 0 even with matches, so we count output ourselves.
63- out="$(java -jar "$JAR" -l "$LANG" "$f" || true)"
61+
62+ tmp="$(mktemp)"
63+
64+ # Keep "nearly all" errors but remove the biggest source of noise:
65+ # - YAML frontmatter at top of file (--- ... ---)
66+ # - fenced code blocks (``` ... ```)
67+ #
68+ # Everything else is checked (including normal prose in MDX).
69+ awk '
70+ BEGIN { fm=0; code=0; }
71+ NR==1 && $0=="---" { fm=1; next }
72+ fm==1 && $0=="---" { fm=0; next }
73+ fm==1 { next }
74+
75+ /^```/ { code = !code; next }
76+ code==1 { next }
77+
78+ { print }
79+ ' "$f" > "$tmp"
80+
81+ # LanguageTool CLI prints findings to stdout; it typically exits 0 even with findings,
82+ # so we detect findings by whether output is non-empty.
83+ out="$(java -jar "$JAR" -l "$LANG" "$tmp" || true)"
84+ rm -f "$tmp"
85+
6486 if [ -n "$out" ]; then
6587 issues=1
6688 echo "$out"
0 commit comments