From 8634c102bc5160082c9f575e6974c5177fff7ff9 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 20 Jun 2026 17:45:24 +0800 Subject: [PATCH 1/9] Added operation support for physical lines of code (LOC). Added the rcnCountLinesOfCode() API function to count physical lines of code in provided source text. Added the corresponding options mode in the RcnCountOption enumeration. Added LOC support in scount. Changed test resources for expected output text to include LOC counts. [CL]: Added operation support for physical lines of code (LOC). [Issue#10] Signed-off-by: Phil Gaiser --- man/scount.1 | 3 +- src/lib/CMakeLists.txt | 1 + src/lib/c/evaluation.h | 22 + src/lib/c/factories.c | 48 ++ src/lib/c/loc.c | 430 ++++++++++++++++++ src/lib/c/statistics.c | 25 + src/lib/include/reckon/reckon.h | 93 +++- src/lib/tests/CMakeLists.txt | 7 + src/lib/tests/unit/c/test_loc_lines.c | 175 +++++++ src/scount/c/print.c | 65 +++ src/scount/c/scount.h | 1 + src/scount/c/statistics.c | 5 +- .../functionality/res/expected/mixed.txt | 42 +- .../res/expected/mixedWithAllFiles.txt | 114 ++--- .../res/expected/mixedWithFiles.txt | 112 ++--- .../expected/mixedWithSyntaxErrorLenient.txt | 24 +- .../expected/mixedWithSyntaxErrorStrict.txt | 20 +- .../output_multi_byte_char_in_filename.txt | 18 +- .../res/expected/output_multiple_files.txt | 20 +- .../output_multiple_files_lines_only.txt | 20 +- .../expected/output_multiple_files_no_llc.txt | 26 +- .../res/expected/output_single_file.txt | 1 + .../output_single_file_lines_only.txt | 1 + .../expected/output_single_file_no_llc.txt | 1 + .../res/expected/output_stdin.txt | 1 + .../res/expected/output_stdin_lines_only.txt | 1 + .../res/expected/output_stdin_with_ext.txt | 1 + 27 files changed, 1076 insertions(+), 201 deletions(-) create mode 100644 src/lib/c/loc.c create mode 100644 src/lib/tests/unit/c/test_loc_lines.c diff --git a/man/scount.1 b/man/scount.1 index 8600849..ebdd9a5 100644 --- a/man/scount.1 +++ b/man/scount.1 @@ -24,6 +24,7 @@ Cb Cb L L. Abbr.|Description LLC|Logical lines of code +LOC|Physical lines of code PHL|Hard physical lines WRD|Number of words CHR|Number of characters @@ -55,7 +56,7 @@ This option can only be used on a single file input. .B \-l, \-\-lines Compute and display only line-specific metrics. .br -This includes logical and physical lines. +This includes logical lines of code, physical lines of code and hard physical lines. .TP .B \-\-show\-files Show a table of individual files in the result when the given input path refers to a directory. diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 08d4b22..747fdc3 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -43,6 +43,7 @@ target_sources( "c/lang_cpp.c" "c/logical.c" "c/physical.c" + "c/loc.c" "c/statistics.c" "c/tree.c" "c/words.c" diff --git a/src/lib/c/evaluation.h b/src/lib/c/evaluation.h index 5007854..c2d9ef5 100644 --- a/src/lib/c/evaluation.h +++ b/src/lib/c/evaluation.h @@ -148,6 +148,28 @@ NodeVisitor createEvaluationFunction(RcnTextFormat language); */ const char* getInlineSourceCommentString(RcnTextFormat language); +/** + * Returns the opening marker for block comments in the specified programming + * language (e.g. slash-star for C-family languages), or NULL if the language + * does not support block comments. The caller does not own the returned string + * and must not attempt to free it. + */ +const char* getBlockCommentStartString(RcnTextFormat language); + +/** + * Returns the closing marker for block comments in the specified programming + * language (e.g. star-slash for C-family languages), or NULL if the language + * does not support block comments. The caller does not own the returned string + * and must not attempt to free it. + */ +const char* getBlockCommentEndString(RcnTextFormat language); + +/** + * Returns true if the specified format denotes a supported programming + * language for LOC counting, false otherwise (e.g. for plain-text). + */ +bool isLocEnabled(RcnTextFormat language); + /** * Allocates a new node evaluation context for an annotation operation. * Ownership of the returned context is transferred to the caller. It must be diff --git a/src/lib/c/factories.c b/src/lib/c/factories.c index 936e196..eb5bd42 100644 --- a/src/lib/c/factories.c +++ b/src/lib/c/factories.c @@ -102,6 +102,54 @@ const char* getInlineSourceCommentString(RcnTextFormat language) { } } +const char* getBlockCommentStartString(RcnTextFormat language) { + switch (language) { + case RCN_LANG_C: + case RCN_LANG_JAVA: + case RCN_LANG_CPP: + case RCN_LANG_JAVASCRIPT: + case RCN_LANG_TYPESCRIPT: + return "/*"; + case RCN_LANG_PYTHON: + case RCN_LANG_R: + case RCN_LANG_BASH: + default: + return NULL; + } +} + +const char* getBlockCommentEndString(RcnTextFormat language) { + switch (language) { + case RCN_LANG_C: + case RCN_LANG_JAVA: + case RCN_LANG_CPP: + case RCN_LANG_JAVASCRIPT: + case RCN_LANG_TYPESCRIPT: + return "*/"; + case RCN_LANG_PYTHON: + case RCN_LANG_R: + case RCN_LANG_BASH: + default: + return NULL; + } +} + +bool isLocEnabled(RcnTextFormat language) { + switch (language) { + case RCN_LANG_C: + case RCN_LANG_JAVA: + case RCN_LANG_PYTHON: + case RCN_LANG_CPP: + case RCN_LANG_JAVASCRIPT: + case RCN_LANG_TYPESCRIPT: + case RCN_LANG_R: + case RCN_LANG_BASH: + return true; + default: + return false; + } +} + SourceFormatDetection detectSourceFormat(const RcnSourceFile* file) { SourceFormatDetection detection = { .isSupportedFormat = false, diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c new file mode 100644 index 0000000..0352957 --- /dev/null +++ b/src/lib/c/loc.c @@ -0,0 +1,430 @@ +/* + * Copyright (C) 2026 Raven Computing + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include "reckon/reckon.h" +#include "evaluation.h" + +/** + * Returns true if the given byte is an ASCII whitespace character + * (space, tab, carriage return, form feed, or vertical tab). + */ +static inline bool isAsciiSpace(char c) { + return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'; +} + +/** + * Scans the byte range [ptr, end) to determine whether it contains actual + * source code, updating the block-comment tracking state as boundaries + * are crossed. + * + * A line segment is counted as source code if it contains at least one + * character that is not whitespace and not part of a comment. + * + * @param ptr Start of the line segment (inclusive). + * @param end End of the line segment (exclusive). Should point at the NL byte + * or one past the last byte of text. + * @param lineComment Null-terminated line-comment start marker, or NULL. + * @param lineComLen strlen(lineComment), or 0 if lineComment is NULL. + * @param blockStart Null-terminated block-comment start marker, or NULL. + * @param blockStartLen strlen(blockStart), or 0 if blockStart is NULL. + * @param blockEnd Null-terminated block-comment end marker, or NULL. + * @param blockEndLen strlen(blockEnd), or 0 if blockEnd is NULL. + * @param inBlockComment Out param: whether currently inside a block + * comment when entering the segment. + * @return True if the line segment contains source code. + */ +static bool segmentHasCode( + const char* ptr, + const char* end, + const char* lineComment, + size_t lineComLen, + const char* blockStart, + size_t blockStartLen, + const char* blockEnd, + size_t blockEndLen, + bool* inBlockComment +) { + while (ptr < end) { + if (*inBlockComment) { + // Look for the end of the block comment in this segment + const size_t remaining = (size_t) (end - ptr); + const char* found = NULL; + if (blockEndLen > 0 && remaining >= blockEndLen) { + const size_t maxOffset = remaining - blockEndLen; + for (size_t offset = 0; offset <= maxOffset; ++offset) { + if (memcmp(ptr + offset, blockEnd, blockEndLen) == 0) { + found = ptr + offset; + break; + } + } + } + if (!found) { + return false; // Entire segment is within a block comment + } + ptr = found + blockEndLen; + *inBlockComment = false; + continue; + } + + if (isAsciiSpace(*ptr)) { + ++ptr; + continue; + } + + const size_t remaining = (size_t) (end - ptr); + + // Check for line-comment marker + if (lineComLen > 0 + && remaining >= lineComLen + && memcmp(ptr, lineComment, lineComLen) == 0) { + + return false; + } + + // Check for block-comment start marker + if (blockStartLen > 0 + && remaining >= blockStartLen + && memcmp(ptr, blockStart, blockStartLen) == 0) { + + // Search for the matching closing marker on the same line + const char* start = ptr + blockStartLen; + size_t searchLen = (size_t) (end - start); + const char* closingFound = NULL; + if (blockEndLen > 0 && searchLen >= blockEndLen) { + const size_t maxOffset = searchLen - blockEndLen; + for (size_t offset = 0; offset <= maxOffset; ++offset) { + if (memcmp(start + offset, blockEnd, blockEndLen) == 0) { + closingFound = start + offset; + break; + } + } + } + if (!closingFound) { + *inBlockComment = true; + return false; // Block comment continues on the next line + } + // Is inline block comment. Skip over it and continue scanning + ptr = closingFound + blockEndLen; + continue; + } + + // Non-whitespace, non-comment character, so this line has source code + return true; + } + return false; +} + +/** + * Counts LOC in UTF-8 encoded text (with or without BOM). + */ +static RcnCount countLocUTF8( + RcnSourceText source, + const char* lineComment, + size_t lineComLen, + const char* blockStart, + size_t blockStartLen, + const char* blockEnd, + size_t blockEndLen +) { + RcnCount count = 0; + const char* text = source.text; + size_t size = source.size; + + // Skip UTF-8 BOM if present + if (hasUTF8BOM(source)) { + text += 3; + size -= 3; + } + + bool inBlockComment = false; + const char* pos = text; + const char* const textEnd = text + size; + + while (pos < textEnd) { + // Locate the end of the current line + const char* lineEnd = pos; + while (lineEnd < textEnd && *lineEnd != '\n') { + ++lineEnd; + } + + const bool lineHasSourceCode = segmentHasCode( + pos, lineEnd, + lineComment, lineComLen, + blockStart, blockStartLen, + blockEnd, blockEndLen, + &inBlockComment + ); + if (lineHasSourceCode) { + ++count; + } + + // Advance past the NL, or stop if at end of text + pos = (lineEnd < textEnd) ? lineEnd + 1 : textEnd; + } + + return count; +} + +/** + * Returns the ASCII code of the UTF-16 code unit at byte offset `offset` + * within `text`, or 0 if the code unit is not a pure ASCII character + * or represents the null character. + * + * No bounds checking is performed. The caller must ensure at least two bytes + * are available at `offset`. + */ +static inline unsigned char utf16AsciiAt( + const char* text, + size_t offset, + bool isLittleEndian +) { + unsigned char lo = (unsigned char)( + isLittleEndian ? text[offset] : text[offset + 1] + ); + unsigned char hi = (unsigned char)( + isLittleEndian ? text[offset + 1] : text[offset] + ); + return (hi == 0 && lo > 0 && lo < 128) ? lo : 0; +} + +/** + * Searches for the first occurrence of the ASCII string `target` encoded + * as UTF-16 code units within the byte range [text, text+length). + * The search advances in 2-byte steps. + * + * Returns the byte offset of the first match, or `SIZE_MAX` if not found. + */ +static size_t utf16FindAscii( + const char* text, + size_t length, + const char* target, + size_t targetLength, + bool isLittleEndian +) { + const size_t targetBytes = targetLength * 2; + if (targetBytes == 0 || targetBytes > length) { + return SIZE_MAX; + } + for (size_t i = 0; i + targetBytes <= length; i += 2) { + bool match = true; + for (size_t j = 0; j < targetLength; ++j) { + unsigned char c = utf16AsciiAt(text, i + j * 2, isLittleEndian); + if (c != (unsigned char) target[j]) { + match = false; + break; + } + } + if (match) { + return i; + } + } + return SIZE_MAX; +} + +/** + * Counts LOC in UTF-16 encoded text (LE or BE). + * `text` points to the first byte after the BOM. + * `size` is the number of remaining bytes. + */ +static RcnCount countLocUTF16( + const char* text, + size_t size, + bool isLittleEndian, + const char* lineComment, + size_t lineComLen, + const char* blockStart, + size_t blockStartLen, + const char* blockEnd, + size_t blockEndLen +) { + RcnCount count = 0; + bool inBlockComment = false; + size_t offset = 0; // Current byte position (always even) + + while (offset + 1 < size) { + // Find the end of the current line + size_t lineEndOffset = offset; + while ((lineEndOffset + 1) < size) { + unsigned char c = utf16AsciiAt(text, lineEndOffset, isLittleEndian); + if (c == '\n') { + break; + } + lineEndOffset += 2; + } + + size_t scan = offset; + bool lineHasSourceCode = false; + + while ((scan + 1) <= lineEndOffset) { + if (inBlockComment) { + size_t remaining = lineEndOffset - scan; + size_t found = utf16FindAscii( + text + scan, + remaining, + blockEnd, blockEndLen, + isLittleEndian + ); + if (found == SIZE_MAX) { + break; // Entire rest of line is within block comment + } + scan += found + blockEndLen * 2; + inBlockComment = false; + continue; + } + + unsigned char c = utf16AsciiAt(text, scan, isLittleEndian); + + if (isAsciiSpace(c)) { + scan += 2; + continue; + } + + size_t remaining = lineEndOffset - scan; + + // Check for line-comment marker + if (lineComLen > 0 && remaining >= (lineComLen * 2)) { + size_t found = utf16FindAscii( + text + scan, + lineComLen * 2, + lineComment, lineComLen, + isLittleEndian + ); + if (found == 0) { + break; + } + } + + // Check for block-comment start marker + if (blockStartLen > 0 && remaining >= (blockStartLen * 2)) { + size_t found = utf16FindAscii( + text + scan, + blockStartLen * 2, + blockStart, blockStartLen, + isLittleEndian + ); + if (found == 0) { + const size_t afterStart = scan + (blockStartLen * 2); + const size_t searchLen = lineEndOffset - afterStart; + size_t closingFound = SIZE_MAX; + if (blockEndLen > 0) { + closingFound = utf16FindAscii( + text + afterStart, + searchLen, + blockEnd, blockEndLen, + isLittleEndian + ); + } + if (closingFound == SIZE_MAX) { + inBlockComment = true; + break; + } + scan = afterStart + closingFound + (blockEndLen * 2); + continue; + } + } + + lineHasSourceCode = true; + break; + } + + if (lineHasSourceCode) { + ++count; + } + + if ((lineEndOffset + 1) < size + && utf16AsciiAt(text, lineEndOffset, isLittleEndian) == '\n') { + offset = lineEndOffset + 2; + } else { + offset = size; + } + } + + return count; +} + +RcnCountResult rcnCountLinesOfCode( + RcnTextFormat language, + RcnSourceText sourceCode +) { + RcnCountResult result = {0}; + + if (sourceCode.size == 0) { + result.state.ok = true; + result.state.errorCode = RCN_ERR_NONE; + return result; + } + if (!sourceCode.text) { + result.state.errorCode = RCN_ERR_INVALID_INPUT; + result.state.errorMessage = "Source code input must not be NULL"; + return result; + } + if (sourceCode.size > UINT32_MAX) { + result.state.errorCode = RCN_ERR_INPUT_TOO_LARGE; + result.state.errorMessage = "Input exceeds maximum supported size"; + return result; + } + + if (!isLocEnabled(language)) { + result.state.errorCode = RCN_ERR_UNSUPPORTED_FORMAT; + result.state.errorMessage = ( + "The input format or programming language is not supported" + ); + return result; + } + + const char* lineComment = getInlineSourceCommentString(language); + const char* blockStart = getBlockCommentStartString(language); + const char* blockEnd = getBlockCommentEndString(language); + + const size_t lineComLength = lineComment ? strlen(lineComment) : 0; + const size_t blockStartLength = blockStart ? strlen(blockStart) : 0; + const size_t blockEndLength = blockEnd ? strlen(blockEnd) : 0; + + TextEncoding encoding = detectEncoding(sourceCode); + if (encoding == TextEncodingUTF8) { + result.count = countLocUTF8( + sourceCode, + lineComment, lineComLength, + blockStart, blockStartLength, + blockEnd, blockEndLength + ); + } else { + assert( + encoding == TextEncodingUTF16LE || encoding == TextEncodingUTF16BE + ); + const bool isLittleEndian = (encoding == TextEncodingUTF16LE); + // The BOM occupies the first 2 bytes + const char* text = sourceCode.text + 2; + const size_t size = sourceCode.size >= 2 ? sourceCode.size - 2 : 0; + result.count = countLocUTF16( + text, size, + isLittleEndian, + lineComment, lineComLength, + blockStart, blockStartLength, + blockEnd, blockEndLength + ); + } + + result.state.ok = true; + result.state.errorCode = RCN_ERR_NONE; + return result; +} diff --git a/src/lib/c/statistics.c b/src/lib/c/statistics.c index 3e64245..db48b20 100644 --- a/src/lib/c/statistics.c +++ b/src/lib/c/statistics.c @@ -49,6 +49,7 @@ static bool isFormatSelected(RcnStatOptions options, RcnTextFormat srcFormat) { static inline void resetResultGroup(RcnCountResultGroup* resultGroup) { resultGroup->logicalLines = 0; + resultGroup->codeLines = 0; resultGroup->physicalLines = 0; resultGroup->words = 0; resultGroup->characters = 0; @@ -132,6 +133,24 @@ static inline bool countLogicalLines( return true; } +static inline bool countLoc( + RcnCountStatistics* stats, + RcnSourceFile* file, + RcnTextFormat language, + RcnCountResultGroup* resultGroup +) { + RcnCountResult result = rcnCountLinesOfCode(language, file->content); + if (!checkIntermediateResultState(stats, resultGroup, result.state)) { + return false; + } + resultGroup->codeLines = result.count; + resultGroup->state.ok = true; + resultGroup->state.errorCode = RCN_ERR_NONE; + stats->totalCodeLines += resultGroup->codeLines; + stats->codeLines[language] += resultGroup->codeLines; + return true; +} + static inline bool countPhysicalLines( RcnCountStatistics* stats, RcnSourceFile* file, @@ -249,6 +268,7 @@ static inline bool count( bool ok = false; result->hasLogicalLines = detected.isProgrammingLanguage; + result->hasLocLines = detected.isProgrammingLanguage; RcnTextFormat sourceFormat = detected.format; ok = ensureFileContent(stats, options, file, result); if (ok && options.operations & RCN_OPT_COUNT_LOGICAL_LINES){ @@ -256,6 +276,11 @@ static inline bool count( ok = countLogicalLines(stats, options, file, sourceFormat, result); } } + if (ok && options.operations & RCN_OPT_COUNT_CODE_LINES) { + if (result->hasLocLines) { + ok = countLoc(stats, file, sourceFormat, result); + } + } if (ok && options.operations & RCN_OPT_COUNT_PHYSICAL_LINES) { ok = countPhysicalLines(stats, file, sourceFormat, result); } diff --git a/src/lib/include/reckon/reckon.h b/src/lib/include/reckon/reckon.h index 03cdcba..d0f2394 100644 --- a/src/lib/include/reckon/reckon.h +++ b/src/lib/include/reckon/reckon.h @@ -58,6 +58,16 @@ * multiple LLCs. One statement spanning multiple physical lines counts * as one LLC. * + * * Physical Lines of Code (LOC): + * The number of physical source lines that contain actual code. A physical + * line contributes to the LOC count if it is not blank (i.e. it contains at + * least one non-whitespace character) and not a comment line (i.e. its + * non-whitespace content is not entirely within a comment). This metric is + * only applicable to files containing source code written in a supported + * programming language. A line that contains both code and a comment (e.g. + * a trailing inline comment) is counted as LOC. Lines that are entirely + * within a block comment are not counted. + * * * Physical Lines (PHL): * The number of hard physical lines in the source text, including blank lines * and comments. @@ -376,6 +386,16 @@ typedef struct RcnCountResultGroup { */ RcnCount logicalLines; + /** + * The counted physical lines of code (LOC). + * + * This is the number of physical lines that contain actual source code, + * excluding blank lines and comment-only lines. + * + * @since 1.7.0 + */ + RcnCount codeLines; + /** * The counted hard physical lines. */ @@ -427,6 +447,20 @@ typedef struct RcnCountResultGroup { */ bool hasLogicalLines; + /** + * Indicates whether physical lines of code (LOC) can be computed for + * the source entity. + * + * If `true`, the `codeLines` field contains a valid count. If `false`, + * then LOC is not applicable for the source entity's format, + * e.g. for plain text files, and the `codeLines` field is zero. + * This field is only set by a counting operation and remains initialized + * as `false` if no such operation was performed. + * + * @since 1.7.0 + */ + bool hasLocLines; + } RcnCountResultGroup; /** @@ -623,6 +657,14 @@ typedef struct RcnCountStatistics { */ RcnCount totalLogicalLines; + /** + * The total number of physical lines of code (LOC), across all files + * and programming languages. + * + * @since 1.7.0 + */ + RcnCount totalCodeLines; + /** * The total number of hard physical lines, across all files and formats. */ @@ -652,6 +694,16 @@ typedef struct RcnCountStatistics { */ RcnCount logicalLines[RECKON_NUM_SUPPORTED_FORMATS]; + /** + * The number of physical lines of code (LOC) per supported programming + * language. + * + * The index corresponds to the `RcnTextFormat` enumerator values. + * + * @since 1.7.0 + */ + RcnCount codeLines[RECKON_NUM_SUPPORTED_FORMATS]; + /** * The number of hard physical lines per supported programming language. * @@ -734,7 +786,19 @@ typedef enum RcnCountOption { * includes files containing source code written in a programming language * but not, for example, plain text files (.txt). */ - RCN_OPT_COUNT_LOGICAL_LINES = 0x08 + RCN_OPT_COUNT_LOGICAL_LINES = 0x08, + + /** + * Count physical lines of code (LOC). + * + * A physical line is counted as LOC if it contains at least one + * non-whitespace character that is not part of a comment. Blank lines + * and comment-only lines are excluded. This option is only applicable + * to source files containing text in a supported programming language. + * + * @since 1.7.0 + */ + RCN_OPT_COUNT_CODE_LINES = 0x10, } RcnCountOption; @@ -1097,6 +1161,33 @@ RECKON_EXPORT RcnSourceText rcnMarkLogicalLinesInSourceText( */ RECKON_EXPORT void rcnFreeSourceText(RcnSourceText* source); +/** + * Counts the number of physical lines of code (LOC) in the specified source. + * + * A physical line is counted as LOC if it is not blank and its + * non-whitespace content is not entirely within a comment. Lines that + * contain both code and an inline comment are counted as LOC. Lines that + * consist only of a comment, or that are entirely inside a block comment, + * are not counted. + * + * Counting LOC is only meaningful for source files containing code in a + * supported programming language. For non-programming-language formats + * (e.g. plain text, Markdown), the function returns an error with + * `RCN_ERR_UNSUPPORTED_FORMAT`. + * + * See header documentation for details on supported encodings. + * + * @param language The format of the specified source text. Must denote a + * supported programming language. + * @param sourceCode The source text to count lines of code in. + * @return A `RcnCountResult` struct containing the LOC count. + * @since 1.7.0 + */ +RECKON_EXPORT RcnCountResult rcnCountLinesOfCode( + RcnTextFormat language, + RcnSourceText sourceCode +); + /** * Counts the number of hard physical lines in the specified source text. * diff --git a/src/lib/tests/CMakeLists.txt b/src/lib/tests/CMakeLists.txt index 13bf0b2..36c026c 100644 --- a/src/lib/tests/CMakeLists.txt +++ b/src/lib/tests/CMakeLists.txt @@ -36,6 +36,13 @@ add_test_suite( TEST_SUITE_LINK ${RECKON_TARGET_LIB_OBJ} ) +add_test_suite( + TEST_SUITE_NAME CodeLinesUnitTest + TEST_SUITE_TARGET test_loc_lines + TEST_SUITE_SOURCE unit/c/test_loc_lines.c + TEST_SUITE_LINK ${RECKON_TARGET_LIB_OBJ} +) + add_test_suite( TEST_SUITE_NAME LogicalLinesUnitTest TEST_SUITE_TARGET test_logical_lines diff --git a/src/lib/tests/unit/c/test_loc_lines.c b/src/lib/tests/unit/c/test_loc_lines.c new file mode 100644 index 0000000..97d1a6b --- /dev/null +++ b/src/lib/tests/unit/c/test_loc_lines.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2026 Raven Computing + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "unity.h" + +#include "reckon/reckon.h" +#include "fileio.h" + +#define TEST_DIR_ENC RECKON_TEST_PATH_RES_BASE "/encodings" +#define TEST_FILE_SOURCE_UTF_8 TEST_DIR_ENC "/Source_UTF_8.java" +#define TEST_FILE_SOURCE_UTF_8_BOM TEST_DIR_ENC "/Source_UTF_8_with_BOM.java" +#define TEST_FILE_SOURCE_UTF_16_LE TEST_DIR_ENC "/Source_UTF_16LE.java" +#define TEST_FILE_SOURCE_UTF_16_BE TEST_DIR_ENC "/Source_UTF_16BE.java" + +void setUp(void) { } + +void tearDown(void) { } + +void testCodeLineCountIsCorrect(void) { + char *text = + "public class Test {\n" + " public static void main(String[] args) {\n" + " // This is a comment\n" + " System.out.println(\"This is actual code!\");\n" + " /*A block comment \n" + " spanning \n" + " multiple lines. */\n" + " }\n" + "}\n"; + + RcnSourceText source = { + .text = text, + .size = strlen(text) + }; + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(5, result.count); +} + +void testCodeLineCountWithInvalidInputFails(void) { + RcnSourceText source = { + .text = NULL, + .size = 1 + }; + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, source); + TEST_ASSERT_FALSE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_INVALID_INPUT, result.state.errorCode); + TEST_ASSERT_EQUAL_STRING( + "Source code input must not be NULL", + result.state.errorMessage + ); + TEST_ASSERT_EQUAL_INT(0, result.count); +} + +void testCodeLineCountWithZeroSizeInputSucceeds(void) { + RcnSourceText source = { + .text = NULL, + .size = 0 + }; + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(0, result.count); +} + +void testCodeLineCountWithEncodedSourceUTF8(void) { + RcnSourceFile* file = newSourceFile(TEST_FILE_SOURCE_UTF_8); + readSourceFileContent(file); + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, file->content); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(19, result.count); +} + +void testCodeLineCountWithEncodedSourceUTF8WithBOM(void) { + RcnSourceFile *file = newSourceFile(TEST_FILE_SOURCE_UTF_8_BOM); + readSourceFileContent(file); + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, file->content); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(19, result.count); +} + +void testCodeLineCountWithEncodedSourceUTF16LE(void) { + RcnSourceFile* file = newSourceFile(TEST_FILE_SOURCE_UTF_16_LE); + readSourceFileContent(file); + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, file->content); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(19, result.count); +} + +void testCodeLineCountWithEncodedSourceUTF16BE(void) { + RcnSourceFile* file = newSourceFile(TEST_FILE_SOURCE_UTF_16_BE); + readSourceFileContent(file); + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, file->content); + freeSourceFile(file); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(19, result.count); +} + +void testCodeLineCountWithLastLineNotEndingWithNewline(void) { + char *text = + "public class Test {\n" + " public static void main(String[] args) {\n" + " // This is a comment\n" + " System.out.println(\"This is code!\");\n" + " }\n" + "}"; + + RcnSourceText source = { + .text = text, + .size = strlen(text) + }; + + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(5, result.count); +} + +void testCodeLineCountWithOnlyInputUTF16LEBOM(void) { + char* text = "\xff\xfe"; + RcnSourceText source = { + .text = text, + .size = 2 + }; + RcnCountResult result = rcnCountLinesOfCode(RCN_LANG_JAVA, source); + TEST_ASSERT_TRUE(result.state.ok); + TEST_ASSERT_EQUAL_INT(RCN_ERR_NONE, result.state.errorCode); + TEST_ASSERT_NULL(result.state.errorMessage); + TEST_ASSERT_EQUAL_INT(0, result.count); +} + +int main(void) { + UNITY_BEGIN(); + RUN_TEST(testCodeLineCountIsCorrect); + RUN_TEST(testCodeLineCountWithInvalidInputFails); + RUN_TEST(testCodeLineCountWithZeroSizeInputSucceeds); + RUN_TEST(testCodeLineCountWithEncodedSourceUTF8); + RUN_TEST(testCodeLineCountWithEncodedSourceUTF8WithBOM); + RUN_TEST(testCodeLineCountWithEncodedSourceUTF16LE); + RUN_TEST(testCodeLineCountWithEncodedSourceUTF16BE); + RUN_TEST(testCodeLineCountWithLastLineNotEndingWithNewline); + RUN_TEST(testCodeLineCountWithOnlyInputUTF16LEBOM); + return UNITY_END(); +} diff --git a/src/scount/c/print.c b/src/scount/c/print.c index a44ec25..858c54f 100644 --- a/src/scount/c/print.c +++ b/src/scount/c/print.c @@ -35,6 +35,7 @@ static const size_t LARGE_RESULT_THRESHOLD = 32; static const int WIDTH_COL0 = 26; // File static const int WIDTH_COL1 = 11; // LLC +static const int WIDTH_COL_LOC = 11; // LOC static const int WIDTH_COL2 = 11; // PHL static const int WIDTH_COL3 = 11; // WRD static const int WIDTH_COL4 = 11; // CHR @@ -291,6 +292,18 @@ static void prLogicalLineCount( } } +static void prLocLineCount( + PrintBuffer* buffer, + bool hasLocLines, + RcnCount value +) { + if (hasLocLines) { + prCnt(buffer, value, WIDTH_COL_LOC); + } else { + prNotApplicable(buffer); + } +} + static void prHeaderCell(PrintBuffer* buffer, const char* label, int width) { assert(label != NULL); const int length = (int) strlen(label); @@ -396,6 +409,10 @@ static void prTableTop(PrintBuffer* buffer, const char* title) { prChr(buffer, TABLE_BORDER_CORNER); prHeaderCell(buffer, "LLC", WIDTH_COL1); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_CORNER); + prHeaderCell(buffer, "LOC", WIDTH_COL_LOC); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_CORNER); prHeaderCell(buffer, "PHL", WIDTH_COL2); @@ -424,6 +441,10 @@ static void prTableBottom(PrintBuffer* buffer, char border) { prChr(buffer, TABLE_BORDER_CORNER); prRpt(buffer, border, WIDTH_COL1); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_CORNER); + prRpt(buffer, border, WIDTH_COL_LOC); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_CORNER); prRpt(buffer, border, WIDTH_COL2); @@ -464,6 +485,14 @@ static void prFileRowSkipped(PrintBuffer* buffer) { prStr(buffer, even ? ellipsisEven : ellipsisOdd); prRpt(buffer, ' ', (WIDTH_COL1 - correction) / 2); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); + even = WIDTH_COL_LOC % 2 == 0; + correction = even ? ellEvenLen : ellOddLen; + prRpt(buffer, ' ', (WIDTH_COL_LOC - correction) / 2); + prStr(buffer, even ? ellipsisEven : ellipsisOdd); + prRpt(buffer, ' ', (WIDTH_COL_LOC - correction) / 2); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); even = WIDTH_COL2 % 2 == 0; @@ -516,6 +545,12 @@ static void prFileRowData( prLogicalLineCount(buffer, res->hasLogicalLines, res->logicalLines); prChr(buffer, ' '); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); + prChr(buffer, ' '); + prLocLineCount(buffer, res->hasLocLines, res->codeLines); + prChr(buffer, ' '); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); prChr(buffer, ' '); @@ -684,6 +719,16 @@ static void prSummaryRows( ); prChr(buffer, ' '); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); + prChr(buffer, ' '); + prLocLineCount( + buffer, + hasLogicalLines, + stats->codeLines[frmt] + ); + prChr(buffer, ' '); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); prChr(buffer, ' '); @@ -729,6 +774,16 @@ static void prTotalsRow(PrintBuffer* buffer, const RcnCountStatistics* stats) { ); prChr(buffer, ' '); } + if (buffer->showLinesOfCode) { + prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); + prChr(buffer, ' '); + prLocLineCount( + buffer, + hasAnyLogicalLines(stats), + stats->totalCodeLines + ); + prChr(buffer, ' '); + } if (buffer->showPhysicalLines) { prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); prChr(buffer, ' '); @@ -784,6 +839,16 @@ void printResultSingle(const RcnCountStatistics* stats, PrintBuffer* buffer) { } prChr(buffer, '\n'); } + if (buffer->showLinesOfCode) { + prStr(buffer, " Lines of Code (LOC): "); + if (result->hasLocLines) { + pr8ld(buffer, result->codeLines); + } else { + prRpt(buffer, ' ', 8 - strlen(LABEL_NOT_APPLICABLE)); + prStr(buffer, LABEL_NOT_APPLICABLE); + } + prChr(buffer, '\n'); + } if (buffer->showPhysicalLines) { prStr(buffer, " Physical Lines (PHL): "); pr8ld(buffer, result->physicalLines); diff --git a/src/scount/c/scount.h b/src/scount/c/scount.h index 38ef596..16686c8 100644 --- a/src/scount/c/scount.h +++ b/src/scount/c/scount.h @@ -104,6 +104,7 @@ typedef struct PrintBuffer { size_t size; size_t capacity; bool showLogicalLines; + bool showLinesOfCode; bool showPhysicalLines; bool showWords; bool showCharacters; diff --git a/src/scount/c/statistics.c b/src/scount/c/statistics.c index 615495a..f9bfda6 100644 --- a/src/scount/c/statistics.c +++ b/src/scount/c/statistics.c @@ -152,7 +152,9 @@ ExitStatus outputStatistics(AppArgs args) { }; if (args.linesOnly) { options.operations = ( - RCN_OPT_COUNT_LOGICAL_LINES | RCN_OPT_COUNT_PHYSICAL_LINES + RCN_OPT_COUNT_LOGICAL_LINES + | RCN_OPT_COUNT_CODE_LINES + | RCN_OPT_COUNT_PHYSICAL_LINES ); } @@ -177,6 +179,7 @@ ExitStatus outputStatistics(AppArgs args) { PrintBuffer buffer = { .showLogicalLines = true, + .showLinesOfCode = true, .showPhysicalLines = true, .showWords = !args.linesOnly, .showCharacters = !args.linesOnly, diff --git a/src/scount/tests/functionality/res/expected/mixed.txt b/src/scount/tests/functionality/res/expected/mixed.txt index 6574120..7f1f172 100644 --- a/src/scount/tests/functionality/res/expected/mixed.txt +++ b/src/scount/tests/functionality/res/expected/mixed.txt @@ -1,26 +1,26 @@ Directory: mixed Scanned files: 34 - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 3 | 22 | 127 | 127 | - | Markdown | n/a | 2 | 16 | 105 | 105 | - | XML | n/a | 19 | 39 | 441 | 441 | - | JSON | n/a | 42 | 64 | 561 | 561 | - | CSS | n/a | 19 | 38 | 234 | 234 | - | HTML | n/a | 20 | 30 | 272 | 272 | - | SQL | n/a | 17 | 48 | 265 | 265 | - | CMake | n/a | 15 | 33 | 384 | 384 | - | YAML | n/a | 13 | 24 | 166 | 166 | - | C | 9 | 21 | 62 | 399 | 399 | - | Java | 7 | 25 | 72 | 519 | 519 | - | Python | 13 | 25 | 60 | 493 | 493 | - | C++ | 9 | 21 | 74 | 457 | 457 | - | JavaScript | 10 | 19 | 37 | 331 | 331 | - | TypeScript | 11 | 27 | 51 | 436 | 436 | - | R | 6 | 8 | 18 | 86 | 86 | - | Shell | 8 | 18 | 41 | 250 | 250 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 73 | 314 | 729 | 5526 | 5526 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 3 | 22 | 127 | 127 | + | Markdown | n/a | n/a | 2 | 16 | 105 | 105 | + | XML | n/a | n/a | 19 | 39 | 441 | 441 | + | JSON | n/a | n/a | 42 | 64 | 561 | 561 | + | CSS | n/a | n/a | 19 | 38 | 234 | 234 | + | HTML | n/a | n/a | 20 | 30 | 272 | 272 | + | SQL | n/a | n/a | 17 | 48 | 265 | 265 | + | CMake | n/a | n/a | 15 | 33 | 384 | 384 | + | YAML | n/a | n/a | 13 | 24 | 166 | 166 | + | C | 9 | 11 | 21 | 62 | 399 | 399 | + | Java | 7 | 11 | 25 | 72 | 519 | 519 | + | Python | 13 | 13 | 25 | 60 | 493 | 493 | + | C++ | 9 | 11 | 21 | 74 | 457 | 457 | + | JavaScript | 10 | 14 | 19 | 37 | 331 | 331 | + | TypeScript | 11 | 21 | 27 | 51 | 436 | 436 | + | R | 6 | 6 | 8 | 18 | 86 | 86 | + | Shell | 8 | 11 | 18 | 41 | 250 | 250 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 73 | 98 | 314 | 729 | 5526 | 5526 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/mixedWithAllFiles.txt b/src/scount/tests/functionality/res/expected/mixedWithAllFiles.txt index 94a1ad2..f7f69ec 100644 --- a/src/scount/tests/functionality/res/expected/mixedWithAllFiles.txt +++ b/src/scount/tests/functionality/res/expected/mixedWithAllFiles.txt @@ -1,65 +1,65 @@ Directory: mixed Scanned files: 34 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | CMakeLists.txt | n/a | 10 | 21 | 250 | 250 | - | Sample1.cmake | n/a | 5 | 12 | 134 | 134 | - | Sample1.java | 3 | 12 | 34 | 233 | 233 | - | Sample2.java | 4 | 13 | 38 | 286 | 286 | - | sample1.R | 3 | 4 | 9 | 28 | 28 | - | sample1.c | 4 | 10 | 29 | 180 | 180 | - | sample1.cpp | 4 | 10 | 33 | 201 | 201 | - | sample1.css | n/a | 8 | 17 | 98 | 98 | - | sample1.html | n/a | 9 | 14 | 121 | 121 | - | sample1.js | 5 | 10 | 22 | 184 | 184 | - | sample1.json | n/a | 19 | 29 | 250 | 250 | - | sample1.md | n/a | 1 | 8 | 52 | 52 | - | sample1.py | 6 | 12 | 28 | 229 | 229 | - | sample1.sh | 3 | 7 | 16 | 97 | 97 | - | sample1.sql | n/a | 6 | 17 | 95 | 95 | - | sample1.ts | 5 | 14 | 27 | 216 | 216 | - | sample1.txt | n/a | 1 | 9 | 52 | 52 | - | sample1.xml | n/a | 9 | 18 | 204 | 204 | - | sample1.yaml | n/a | 6 | 11 | 73 | 73 | - | sample2.bash | 5 | 11 | 25 | 153 | 153 | - | sample2.c | 5 | 11 | 33 | 219 | 219 | - | sample2.cpp | 5 | 11 | 41 | 256 | 256 | - | sample2.css | n/a | 11 | 21 | 136 | 136 | - | sample2.html | n/a | 11 | 16 | 151 | 151 | - | sample2.js | 5 | 9 | 15 | 147 | 147 | - | sample2.json | n/a | 23 | 35 | 311 | 311 | - | sample2.md | n/a | 1 | 8 | 53 | 53 | - | sample2.py | 7 | 13 | 32 | 264 | 264 | - | sample2.r | 3 | 4 | 9 | 58 | 58 | - | sample2.sql | n/a | 11 | 31 | 170 | 170 | - | sample2.ts | 6 | 13 | 24 | 220 | 220 | - | sample2.txt | n/a | 2 | 13 | 75 | 75 | - | sample2.xml | n/a | 10 | 21 | 237 | 237 | - | sample2.yml | n/a | 7 | 13 | 93 | 93 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | CMakeLists.txt | n/a | n/a | 10 | 21 | 250 | 250 | + | Sample1.cmake | n/a | n/a | 5 | 12 | 134 | 134 | + | Sample1.java | 3 | 5 | 12 | 34 | 233 | 233 | + | Sample2.java | 4 | 6 | 13 | 38 | 286 | 286 | + | sample1.R | 3 | 3 | 4 | 9 | 28 | 28 | + | sample1.c | 4 | 5 | 10 | 29 | 180 | 180 | + | sample1.cpp | 4 | 5 | 10 | 33 | 201 | 201 | + | sample1.css | n/a | n/a | 8 | 17 | 98 | 98 | + | sample1.html | n/a | n/a | 9 | 14 | 121 | 121 | + | sample1.js | 5 | 7 | 10 | 22 | 184 | 184 | + | sample1.json | n/a | n/a | 19 | 29 | 250 | 250 | + | sample1.md | n/a | n/a | 1 | 8 | 52 | 52 | + | sample1.py | 6 | 6 | 12 | 28 | 229 | 229 | + | sample1.sh | 3 | 4 | 7 | 16 | 97 | 97 | + | sample1.sql | n/a | n/a | 6 | 17 | 95 | 95 | + | sample1.ts | 5 | 11 | 14 | 27 | 216 | 216 | + | sample1.txt | n/a | n/a | 1 | 9 | 52 | 52 | + | sample1.xml | n/a | n/a | 9 | 18 | 204 | 204 | + | sample1.yaml | n/a | n/a | 6 | 11 | 73 | 73 | + | sample2.bash | 5 | 7 | 11 | 25 | 153 | 153 | + | sample2.c | 5 | 6 | 11 | 33 | 219 | 219 | + | sample2.cpp | 5 | 6 | 11 | 41 | 256 | 256 | + | sample2.css | n/a | n/a | 11 | 21 | 136 | 136 | + | sample2.html | n/a | n/a | 11 | 16 | 151 | 151 | + | sample2.js | 5 | 7 | 9 | 15 | 147 | 147 | + | sample2.json | n/a | n/a | 23 | 35 | 311 | 311 | + | sample2.md | n/a | n/a | 1 | 8 | 53 | 53 | + | sample2.py | 7 | 7 | 13 | 32 | 264 | 264 | + | sample2.r | 3 | 3 | 4 | 9 | 58 | 58 | + | sample2.sql | n/a | n/a | 11 | 31 | 170 | 170 | + | sample2.ts | 6 | 10 | 13 | 24 | 220 | 220 | + | sample2.txt | n/a | n/a | 2 | 13 | 75 | 75 | + | sample2.xml | n/a | n/a | 10 | 21 | 237 | 237 | + | sample2.yml | n/a | n/a | 7 | 13 | 93 | 93 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 3 | 22 | 127 | 127 | - | Markdown | n/a | 2 | 16 | 105 | 105 | - | XML | n/a | 19 | 39 | 441 | 441 | - | JSON | n/a | 42 | 64 | 561 | 561 | - | CSS | n/a | 19 | 38 | 234 | 234 | - | HTML | n/a | 20 | 30 | 272 | 272 | - | SQL | n/a | 17 | 48 | 265 | 265 | - | CMake | n/a | 15 | 33 | 384 | 384 | - | YAML | n/a | 13 | 24 | 166 | 166 | - | C | 9 | 21 | 62 | 399 | 399 | - | Java | 7 | 25 | 72 | 519 | 519 | - | Python | 13 | 25 | 60 | 493 | 493 | - | C++ | 9 | 21 | 74 | 457 | 457 | - | JavaScript | 10 | 19 | 37 | 331 | 331 | - | TypeScript | 11 | 27 | 51 | 436 | 436 | - | R | 6 | 8 | 18 | 86 | 86 | - | Shell | 8 | 18 | 41 | 250 | 250 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 73 | 314 | 729 | 5526 | 5526 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 3 | 22 | 127 | 127 | + | Markdown | n/a | n/a | 2 | 16 | 105 | 105 | + | XML | n/a | n/a | 19 | 39 | 441 | 441 | + | JSON | n/a | n/a | 42 | 64 | 561 | 561 | + | CSS | n/a | n/a | 19 | 38 | 234 | 234 | + | HTML | n/a | n/a | 20 | 30 | 272 | 272 | + | SQL | n/a | n/a | 17 | 48 | 265 | 265 | + | CMake | n/a | n/a | 15 | 33 | 384 | 384 | + | YAML | n/a | n/a | 13 | 24 | 166 | 166 | + | C | 9 | 11 | 21 | 62 | 399 | 399 | + | Java | 7 | 11 | 25 | 72 | 519 | 519 | + | Python | 13 | 13 | 25 | 60 | 493 | 493 | + | C++ | 9 | 11 | 21 | 74 | 457 | 457 | + | JavaScript | 10 | 14 | 19 | 37 | 331 | 331 | + | TypeScript | 11 | 21 | 27 | 51 | 436 | 436 | + | R | 6 | 6 | 8 | 18 | 86 | 86 | + | Shell | 8 | 11 | 18 | 41 | 250 | 250 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 73 | 98 | 314 | 729 | 5526 | 5526 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/mixedWithFiles.txt b/src/scount/tests/functionality/res/expected/mixedWithFiles.txt index df98e26..80852b5 100644 --- a/src/scount/tests/functionality/res/expected/mixedWithFiles.txt +++ b/src/scount/tests/functionality/res/expected/mixedWithFiles.txt @@ -1,64 +1,64 @@ Directory: mixed Scanned files: 34 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | CMakeLists.txt | n/a | 10 | 21 | 250 | 250 | - | Sample1.cmake | n/a | 5 | 12 | 134 | 134 | - | Sample1.java | 3 | 12 | 34 | 233 | 233 | - | Sample2.java | 4 | 13 | 38 | 286 | 286 | - | sample1.R | 3 | 4 | 9 | 28 | 28 | - | sample1.c | 4 | 10 | 29 | 180 | 180 | - | sample1.cpp | 4 | 10 | 33 | 201 | 201 | - | sample1.css | n/a | 8 | 17 | 98 | 98 | - | sample1.html | n/a | 9 | 14 | 121 | 121 | - | sample1.js | 5 | 10 | 22 | 184 | 184 | - | sample1.json | n/a | 19 | 29 | 250 | 250 | - | sample1.md | n/a | 1 | 8 | 52 | 52 | - | sample1.py | 6 | 12 | 28 | 229 | 229 | - | sample1.sh | 3 | 7 | 16 | 97 | 97 | - | sample1.sql | n/a | 6 | 17 | 95 | 95 | - | sample1.ts | 5 | 14 | 27 | 216 | 216 | - | sample1.txt | n/a | 1 | 9 | 52 | 52 | - | sample1.xml | n/a | 9 | 18 | 204 | 204 | - | sample1.yaml | n/a | 6 | 11 | 73 | 73 | - | sample2.bash | 5 | 11 | 25 | 153 | 153 | - | sample2.c | 5 | 11 | 33 | 219 | 219 | - | sample2.cpp | 5 | 11 | 41 | 256 | 256 | - | sample2.css | n/a | 11 | 21 | 136 | 136 | - | sample2.html | n/a | 11 | 16 | 151 | 151 | - | sample2.js | 5 | 9 | 15 | 147 | 147 | - | sample2.json | n/a | 23 | 35 | 311 | 311 | - | sample2.md | n/a | 1 | 8 | 53 | 53 | - | sample2.py | 7 | 13 | 32 | 264 | 264 | - | sample2.r | 3 | 4 | 9 | 58 | 58 | - | sample2.sql | n/a | 11 | 31 | 170 | 170 | - | sample2.ts | 6 | 13 | 24 | 220 | 220 | - | .. | ... | ... | ... | ... | ... | - | sample2.yml | n/a | 7 | 13 | 93 | 93 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | CMakeLists.txt | n/a | n/a | 10 | 21 | 250 | 250 | + | Sample1.cmake | n/a | n/a | 5 | 12 | 134 | 134 | + | Sample1.java | 3 | 5 | 12 | 34 | 233 | 233 | + | Sample2.java | 4 | 6 | 13 | 38 | 286 | 286 | + | sample1.R | 3 | 3 | 4 | 9 | 28 | 28 | + | sample1.c | 4 | 5 | 10 | 29 | 180 | 180 | + | sample1.cpp | 4 | 5 | 10 | 33 | 201 | 201 | + | sample1.css | n/a | n/a | 8 | 17 | 98 | 98 | + | sample1.html | n/a | n/a | 9 | 14 | 121 | 121 | + | sample1.js | 5 | 7 | 10 | 22 | 184 | 184 | + | sample1.json | n/a | n/a | 19 | 29 | 250 | 250 | + | sample1.md | n/a | n/a | 1 | 8 | 52 | 52 | + | sample1.py | 6 | 6 | 12 | 28 | 229 | 229 | + | sample1.sh | 3 | 4 | 7 | 16 | 97 | 97 | + | sample1.sql | n/a | n/a | 6 | 17 | 95 | 95 | + | sample1.ts | 5 | 11 | 14 | 27 | 216 | 216 | + | sample1.txt | n/a | n/a | 1 | 9 | 52 | 52 | + | sample1.xml | n/a | n/a | 9 | 18 | 204 | 204 | + | sample1.yaml | n/a | n/a | 6 | 11 | 73 | 73 | + | sample2.bash | 5 | 7 | 11 | 25 | 153 | 153 | + | sample2.c | 5 | 6 | 11 | 33 | 219 | 219 | + | sample2.cpp | 5 | 6 | 11 | 41 | 256 | 256 | + | sample2.css | n/a | n/a | 11 | 21 | 136 | 136 | + | sample2.html | n/a | n/a | 11 | 16 | 151 | 151 | + | sample2.js | 5 | 7 | 9 | 15 | 147 | 147 | + | sample2.json | n/a | n/a | 23 | 35 | 311 | 311 | + | sample2.md | n/a | n/a | 1 | 8 | 53 | 53 | + | sample2.py | 7 | 7 | 13 | 32 | 264 | 264 | + | sample2.r | 3 | 3 | 4 | 9 | 58 | 58 | + | sample2.sql | n/a | n/a | 11 | 31 | 170 | 170 | + | sample2.ts | 6 | 10 | 13 | 24 | 220 | 220 | + | .. | ... | ... | ... | ... | ... | ... | + | sample2.yml | n/a | n/a | 7 | 13 | 93 | 93 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 3 | 22 | 127 | 127 | - | Markdown | n/a | 2 | 16 | 105 | 105 | - | XML | n/a | 19 | 39 | 441 | 441 | - | JSON | n/a | 42 | 64 | 561 | 561 | - | CSS | n/a | 19 | 38 | 234 | 234 | - | HTML | n/a | 20 | 30 | 272 | 272 | - | SQL | n/a | 17 | 48 | 265 | 265 | - | CMake | n/a | 15 | 33 | 384 | 384 | - | YAML | n/a | 13 | 24 | 166 | 166 | - | C | 9 | 21 | 62 | 399 | 399 | - | Java | 7 | 25 | 72 | 519 | 519 | - | Python | 13 | 25 | 60 | 493 | 493 | - | C++ | 9 | 21 | 74 | 457 | 457 | - | JavaScript | 10 | 19 | 37 | 331 | 331 | - | TypeScript | 11 | 27 | 51 | 436 | 436 | - | R | 6 | 8 | 18 | 86 | 86 | - | Shell | 8 | 18 | 41 | 250 | 250 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 73 | 314 | 729 | 5526 | 5526 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 3 | 22 | 127 | 127 | + | Markdown | n/a | n/a | 2 | 16 | 105 | 105 | + | XML | n/a | n/a | 19 | 39 | 441 | 441 | + | JSON | n/a | n/a | 42 | 64 | 561 | 561 | + | CSS | n/a | n/a | 19 | 38 | 234 | 234 | + | HTML | n/a | n/a | 20 | 30 | 272 | 272 | + | SQL | n/a | n/a | 17 | 48 | 265 | 265 | + | CMake | n/a | n/a | 15 | 33 | 384 | 384 | + | YAML | n/a | n/a | 13 | 24 | 166 | 166 | + | C | 9 | 11 | 21 | 62 | 399 | 399 | + | Java | 7 | 11 | 25 | 72 | 519 | 519 | + | Python | 13 | 13 | 25 | 60 | 493 | 493 | + | C++ | 9 | 11 | 21 | 74 | 457 | 457 | + | JavaScript | 10 | 14 | 19 | 37 | 331 | 331 | + | TypeScript | 11 | 21 | 27 | 51 | 436 | 436 | + | R | 6 | 6 | 8 | 18 | 86 | 86 | + | Shell | 8 | 11 | 18 | 41 | 250 | 250 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 73 | 98 | 314 | 729 | 5526 | 5526 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorLenient.txt b/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorLenient.txt index 144491b..036246e 100644 --- a/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorLenient.txt +++ b/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorLenient.txt @@ -1,20 +1,20 @@ Directory: mixedWithSyntaxError Scanned files: 3 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | 01_CorrectFile.java | 5 | 12 | 33 | 273 | 273 | - | 02_has_syntax_error.c | 9 | 17 | 51 | 357 | 357 | - | 03_correct_file.txt | n/a | 1 | 20 | 109 | 109 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | 01_CorrectFile.java | 5 | 7 | 12 | 33 | 273 | 273 | + | 02_has_syntax_error.c | 9 | 10 | 17 | 51 | 357 | 357 | + | 03_correct_file.txt | n/a | n/a | 1 | 20 | 109 | 109 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 1 | 20 | 109 | 109 | - | C | 9 | 17 | 51 | 357 | 357 | - | Java | 5 | 12 | 33 | 273 | 273 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 14 | 30 | 104 | 739 | 739 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 1 | 20 | 109 | 109 | + | C | 9 | 10 | 17 | 51 | 357 | 357 | + | Java | 5 | 7 | 12 | 33 | 273 | 273 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 14 | 17 | 30 | 104 | 739 | 739 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorStrict.txt b/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorStrict.txt index 35467c2..0a465f9 100644 --- a/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorStrict.txt +++ b/src/scount/tests/functionality/res/expected/mixedWithSyntaxErrorStrict.txt @@ -1,18 +1,18 @@ Directory: mixedWithSyntaxError Scanned files: 3 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | 01_CorrectFile.java | 5 | 12 | 33 | 273 | 273 | - | 03_correct_file.txt | n/a | 1 | 20 | 109 | 109 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | 01_CorrectFile.java | 5 | 7 | 12 | 33 | 273 | 273 | + | 03_correct_file.txt | n/a | n/a | 1 | 20 | 109 | 109 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 1 | 20 | 109 | 109 | - | Java | 5 | 12 | 33 | 273 | 273 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 5 | 13 | 53 | 382 | 382 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 1 | 20 | 109 | 109 | + | Java | 5 | 7 | 12 | 33 | 273 | 273 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 5 | 7 | 13 | 53 | 382 | 382 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/output_multi_byte_char_in_filename.txt b/src/scount/tests/functionality/res/expected/output_multi_byte_char_in_filename.txt index 4bf7479..f3cd175 100644 --- a/src/scount/tests/functionality/res/expected/output_multi_byte_char_in_filename.txt +++ b/src/scount/tests/functionality/res/expected/output_multi_byte_char_in_filename.txt @@ -1,17 +1,17 @@ Directory: special Scanned files: 2 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | mb_😀_char_🔥_n.txt | n/a | 1 | 13 | 78 | 78 | - | placeholder.txt | n/a | 1 | 6 | 33 | 33 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | mb_😀_char_🔥_n.txt | n/a | n/a | 1 | 13 | 78 | 78 | + | placeholder.txt | n/a | n/a | 1 | 6 | 33 | 33 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 2 | 19 | 111 | 111 | - o==========================o===========o===========o===========o===========o===========o - | Total: | n/a | 2 | 19 | 111 | 111 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 2 | 19 | 111 | 111 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | n/a | n/a | 2 | 19 | 111 | 111 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/output_multiple_files.txt b/src/scount/tests/functionality/res/expected/output_multiple_files.txt index a70b020..096dc08 100644 --- a/src/scount/tests/functionality/res/expected/output_multiple_files.txt +++ b/src/scount/tests/functionality/res/expected/output_multiple_files.txt @@ -1,18 +1,18 @@ Directory: java Scanned files: 3 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Sample.java | 104 | 188 | 494 | 4709 | 4709 | - | SampleAnnotated.java | 104 | 188 | 905 | 7424 | 7424 | - | SampleMinFormatting.java | 104 | 107 | 494 | 4061 | 4061 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Sample.java | 104 | 142 | 188 | 494 | 4709 | 4709 | + | SampleAnnotated.java | 104 | 142 | 188 | 905 | 7424 | 7424 | + | SampleMinFormatting.java | 104 | 81 | 107 | 494 | 4061 | 4061 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Java | 312 | 483 | 1893 | 16194 | 16194 | - o==========================o===========o===========o===========o===========o===========o - | Total: | 312 | 483 | 1893 | 16194 | 16194 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Java | 312 | 365 | 483 | 1893 | 16194 | 16194 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | 312 | 365 | 483 | 1893 | 16194 | 16194 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/output_multiple_files_lines_only.txt b/src/scount/tests/functionality/res/expected/output_multiple_files_lines_only.txt index 9b7fc93..338b580 100644 --- a/src/scount/tests/functionality/res/expected/output_multiple_files_lines_only.txt +++ b/src/scount/tests/functionality/res/expected/output_multiple_files_lines_only.txt @@ -1,18 +1,18 @@ Directory: java Scanned files: 3 - o---------- File ----------o--- LLC ---o--- PHL ---o - | Sample.java | 104 | 188 | - | SampleAnnotated.java | 104 | 188 | - | SampleMinFormatting.java | 104 | 107 | - o--------------------------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o + | Sample.java | 104 | 142 | 188 | + | SampleAnnotated.java | 104 | 142 | 188 | + | SampleMinFormatting.java | 104 | 81 | 107 | + o--------------------------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o - | Java | 312 | 483 | - o==========================o===========o===========o - | Total: | 312 | 483 | - o==========================o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o + | Java | 312 | 365 | 483 | + o==========================o===========o===========o===========o + | Total: | 312 | 365 | 483 | + o==========================o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/output_multiple_files_no_llc.txt b/src/scount/tests/functionality/res/expected/output_multiple_files_no_llc.txt index a62abe9..1da96a1 100644 --- a/src/scount/tests/functionality/res/expected/output_multiple_files_no_llc.txt +++ b/src/scount/tests/functionality/res/expected/output_multiple_files_no_llc.txt @@ -1,21 +1,21 @@ Directory: txt Scanned files: 6 - o---------- File ----------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | 1sample1.txt | n/a | 1 | 2 | 26 | 26 | - | 1sample2.txt | n/a | 1 | 2 | 26 | 26 | - | 1sample3.txt | n/a | 1 | 2 | 26 | 26 | - | 2sample1.txt | n/a | 1 | 2 | 31 | 31 | - | 2sample2.txt | n/a | 1 | 2 | 31 | 31 | - | 3sample1.txt | n/a | 1 | 2 | 36 | 36 | - o--------------------------o-----------o-----------o-----------o-----------o-----------o + o---------- File ----------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | 1sample1.txt | n/a | n/a | 1 | 2 | 26 | 26 | + | 1sample2.txt | n/a | n/a | 1 | 2 | 26 | 26 | + | 1sample3.txt | n/a | n/a | 1 | 2 | 26 | 26 | + | 2sample1.txt | n/a | n/a | 1 | 2 | 31 | 31 | + | 2sample2.txt | n/a | n/a | 1 | 2 | 31 | 31 | + | 3sample1.txt | n/a | n/a | 1 | 2 | 36 | 36 | + o--------------------------o-----------o-----------o-----------o-----------o-----------o-----------o Summary: - o-------- Language --------o--- LLC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o - | Plain Text | n/a | 6 | 12 | 176 | 176 | - o==========================o===========o===========o===========o===========o===========o - | Total: | n/a | 6 | 12 | 176 | 176 | - o==========================o===========o===========o===========o===========o===========o + o-------- Language --------o--- LLC ---o--- LOC ---o--- PHL ---o--- WRD ---o--- CHR ---o--- SZE ---o + | Plain Text | n/a | n/a | 6 | 12 | 176 | 176 | + o==========================o===========o===========o===========o===========o===========o===========o + | Total: | n/a | n/a | 6 | 12 | 176 | 176 | + o==========================o===========o===========o===========o===========o===========o===========o diff --git a/src/scount/tests/functionality/res/expected/output_single_file.txt b/src/scount/tests/functionality/res/expected/output_single_file.txt index 0986aee..ff67a82 100644 --- a/src/scount/tests/functionality/res/expected/output_single_file.txt +++ b/src/scount/tests/functionality/res/expected/output_single_file.txt @@ -1,6 +1,7 @@ File: Sample.java Logical Lines of Code (LLC): 104 + Lines of Code (LOC): 142 Physical Lines (PHL): 188 Words (WRD): 494 Characters (CHR): 4709 diff --git a/src/scount/tests/functionality/res/expected/output_single_file_lines_only.txt b/src/scount/tests/functionality/res/expected/output_single_file_lines_only.txt index 533a603..fc55961 100644 --- a/src/scount/tests/functionality/res/expected/output_single_file_lines_only.txt +++ b/src/scount/tests/functionality/res/expected/output_single_file_lines_only.txt @@ -1,5 +1,6 @@ File: Sample.java Logical Lines of Code (LLC): 104 + Lines of Code (LOC): 142 Physical Lines (PHL): 188 diff --git a/src/scount/tests/functionality/res/expected/output_single_file_no_llc.txt b/src/scount/tests/functionality/res/expected/output_single_file_no_llc.txt index d882727..71cbb8a 100644 --- a/src/scount/tests/functionality/res/expected/output_single_file_no_llc.txt +++ b/src/scount/tests/functionality/res/expected/output_single_file_no_llc.txt @@ -1,6 +1,7 @@ File: sample1.md Logical Lines of Code (LLC): n/a + Lines of Code (LOC): n/a Physical Lines (PHL): 1 Words (WRD): 8 Characters (CHR): 52 diff --git a/src/scount/tests/functionality/res/expected/output_stdin.txt b/src/scount/tests/functionality/res/expected/output_stdin.txt index da09785..53d6b31 100644 --- a/src/scount/tests/functionality/res/expected/output_stdin.txt +++ b/src/scount/tests/functionality/res/expected/output_stdin.txt @@ -1,6 +1,7 @@ File: - (Data from standard input treated as a .txt file) Logical Lines of Code (LLC): n/a + Lines of Code (LOC): n/a Physical Lines (PHL): 2 Words (WRD): 2 Characters (CHR): 8 diff --git a/src/scount/tests/functionality/res/expected/output_stdin_lines_only.txt b/src/scount/tests/functionality/res/expected/output_stdin_lines_only.txt index b038a0f..9df7bed 100644 --- a/src/scount/tests/functionality/res/expected/output_stdin_lines_only.txt +++ b/src/scount/tests/functionality/res/expected/output_stdin_lines_only.txt @@ -1,5 +1,6 @@ File: - (Data from standard input treated as a .txt file) Logical Lines of Code (LLC): n/a + Lines of Code (LOC): n/a Physical Lines (PHL): 3 diff --git a/src/scount/tests/functionality/res/expected/output_stdin_with_ext.txt b/src/scount/tests/functionality/res/expected/output_stdin_with_ext.txt index 583408b..974e4f7 100644 --- a/src/scount/tests/functionality/res/expected/output_stdin_with_ext.txt +++ b/src/scount/tests/functionality/res/expected/output_stdin_with_ext.txt @@ -1,6 +1,7 @@ File: - (Data from standard input treated as a .java file) Logical Lines of Code (LLC): 3 + Lines of Code (LOC): 5 Physical Lines (PHL): 12 Words (WRD): 34 Characters (CHR): 233 From b5743cfcd1c82b2f59b275b41af369f50dd1e7b8 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 17:56:24 +0800 Subject: [PATCH 2/9] Refactored column printing in prFileRowSkipped() into separate function. Signed-off-by: Phil Gaiser --- src/scount/c/print.c | 55 +++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/src/scount/c/print.c b/src/scount/c/print.c index 858c54f..9ce9f47 100644 --- a/src/scount/c/print.c +++ b/src/scount/c/print.c @@ -465,6 +465,19 @@ static void prTableBottom(PrintBuffer* buffer, char border) { prChr(buffer, '\n'); } +static inline void prFileRowSkippedItem(PrintBuffer* buffer, int width) { + const char* ellipsisOdd = "..."; + const char* ellipsisEven = ".."; + const int ellOddLen = (int) strlen(ellipsisOdd); + const int ellEvenLen = (int) strlen(ellipsisEven); + prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); + const bool even = width % 2 == 0; + const int correction = even ? ellEvenLen : ellOddLen; + prRpt(buffer, ' ', (width - correction) / 2); + prStr(buffer, even ? ellipsisEven : ellipsisOdd); + prRpt(buffer, ' ', (width - correction) / 2); +} + static void prFileRowSkipped(PrintBuffer* buffer) { const char* ellipsisOdd = "..."; const char* ellipsisEven = ".."; @@ -478,52 +491,22 @@ static void prFileRowSkipped(PrintBuffer* buffer) { prStr(buffer, even ? ellipsisEven : ellipsisOdd); prRpt(buffer, ' ', (WIDTH_COL0 - correction) / 2); if (buffer->showLogicalLines) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL1 % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL1 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL1 - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL1); } if (buffer->showLinesOfCode) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL_LOC % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL_LOC - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL_LOC - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL_LOC); } if (buffer->showPhysicalLines) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL2 % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL2 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL2 - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL2); } if (buffer->showWords) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL3 % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL3 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL3 - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL3); } if (buffer->showCharacters) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL4 % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL4 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL4 - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL4); } if (buffer->showSourceSize) { - prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); - even = WIDTH_COL5 % 2 == 0; - correction = even ? ellEvenLen : ellOddLen; - prRpt(buffer, ' ', (WIDTH_COL5 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); - prRpt(buffer, ' ', (WIDTH_COL5 - correction) / 2); + prFileRowSkippedItem(buffer, WIDTH_COL5); } prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); prChr(buffer, '\n'); From 444c756df53eea8ec9115b6f8a352115bcc265e9 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 20:28:35 +0800 Subject: [PATCH 3/9] Refactored ellipsis strings and length indicators into static globals. This removes duplication. Signed-off-by: Phil Gaiser --- src/scount/c/print.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/scount/c/print.c b/src/scount/c/print.c index 9ce9f47..af33100 100644 --- a/src/scount/c/print.c +++ b/src/scount/c/print.c @@ -48,6 +48,10 @@ static const char TABLE_BORDER_VERTICAL_EMPHASIS = '|'; static const char TABLE_BORDER_CORNER = 'o'; static const char* TABLE_PADDING_LEFT = " "; static const char* LABEL_NOT_APPLICABLE = "n/a"; +static const char ELLIPSIS_ODD[] = "..."; +static const char ELLIPSIS_EVEN[] = ".."; +static const int ELLIPSIS_ODD_LEN = sizeof(ELLIPSIS_ODD) - 1; +static const int ELLIPSIS_EVEN_LEN = sizeof(ELLIPSIS_EVEN) - 1; static const char errorMessage[] = "Error"; #ifdef _WIN32 @@ -466,29 +470,21 @@ static void prTableBottom(PrintBuffer* buffer, char border) { } static inline void prFileRowSkippedItem(PrintBuffer* buffer, int width) { - const char* ellipsisOdd = "..."; - const char* ellipsisEven = ".."; - const int ellOddLen = (int) strlen(ellipsisOdd); - const int ellEvenLen = (int) strlen(ellipsisEven); prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); const bool even = width % 2 == 0; - const int correction = even ? ellEvenLen : ellOddLen; + const int correction = even ? ELLIPSIS_EVEN_LEN : ELLIPSIS_ODD_LEN; prRpt(buffer, ' ', (width - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); + prStr(buffer, even ? ELLIPSIS_EVEN : ELLIPSIS_ODD); prRpt(buffer, ' ', (width - correction) / 2); } static void prFileRowSkipped(PrintBuffer* buffer) { - const char* ellipsisOdd = "..."; - const char* ellipsisEven = ".."; - const int ellOddLen = (int) strlen(ellipsisOdd); - const int ellEvenLen = (int) strlen(ellipsisEven); prStr(buffer, TABLE_PADDING_LEFT); prChr(buffer, TABLE_BORDER_VERTICAL_NORMAL); bool even = WIDTH_COL0 % 2 == 0; - int correction = even ? ellEvenLen : ellOddLen; + int correction = even ? ELLIPSIS_EVEN_LEN : ELLIPSIS_ODD_LEN; prRpt(buffer, ' ', (WIDTH_COL0 - correction) / 2); - prStr(buffer, even ? ellipsisEven : ellipsisOdd); + prStr(buffer, even ? ELLIPSIS_EVEN : ELLIPSIS_ODD); prRpt(buffer, ' ', (WIDTH_COL0 - correction) / 2); if (buffer->showLogicalLines) { prFileRowSkippedItem(buffer, WIDTH_COL1); From 539b807f3dd742021cab900cb67fa8451a1c8f35 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 20:30:48 +0800 Subject: [PATCH 4/9] Changed variable name. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index 0352957..df3f9c7 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -291,9 +291,9 @@ static RcnCount countLocUTF16( continue; } - unsigned char c = utf16AsciiAt(text, scan, isLittleEndian); + unsigned char character = utf16AsciiAt(text, scan, isLittleEndian); - if (isAsciiSpace(c)) { + if (isAsciiSpace(character)) { scan += 2; continue; } From c4a2cb3f1b05af5211170703a1205af099685170 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 20:33:18 +0800 Subject: [PATCH 5/9] Changed variable and parameter names. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index df3f9c7..d5edfc8 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -27,8 +27,12 @@ * Returns true if the given byte is an ASCII whitespace character * (space, tab, carriage return, form feed, or vertical tab). */ -static inline bool isAsciiSpace(char c) { - return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'; +static inline bool isAsciiSpace(char character) { + return character == ' ' + || character == '\t' + || character == '\r' + || character == '\f' + || character == '\v'; } /** @@ -227,8 +231,12 @@ static size_t utf16FindAscii( for (size_t i = 0; i + targetBytes <= length; i += 2) { bool match = true; for (size_t j = 0; j < targetLength; ++j) { - unsigned char c = utf16AsciiAt(text, i + j * 2, isLittleEndian); - if (c != (unsigned char) target[j]) { + unsigned char character = utf16AsciiAt( + text, + i + j * 2, + isLittleEndian + ); + if (character != (unsigned char) target[j]) { match = false; break; } @@ -264,8 +272,12 @@ static RcnCount countLocUTF16( // Find the end of the current line size_t lineEndOffset = offset; while ((lineEndOffset + 1) < size) { - unsigned char c = utf16AsciiAt(text, lineEndOffset, isLittleEndian); - if (c == '\n') { + unsigned char character = utf16AsciiAt( + text, + lineEndOffset, + isLittleEndian + ); + if (character == '\n') { break; } lineEndOffset += 2; From 056f840b5019f868868d3c8614f80d5807581c7d Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 20:36:03 +0800 Subject: [PATCH 6/9] Changed variable names. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index d5edfc8..3d8f7a9 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -201,13 +201,13 @@ static inline unsigned char utf16AsciiAt( size_t offset, bool isLittleEndian ) { - unsigned char lo = (unsigned char)( + unsigned char low = (unsigned char)( isLittleEndian ? text[offset] : text[offset + 1] ); - unsigned char hi = (unsigned char)( + unsigned char high = (unsigned char)( isLittleEndian ? text[offset + 1] : text[offset] ); - return (hi == 0 && lo > 0 && lo < 128) ? lo : 0; + return (high == 0 && low > 0 && low < 128) ? low : 0; } /** From 1011af9f26d903d91014f0c09835a852dd4896cd Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 20:54:25 +0800 Subject: [PATCH 7/9] Refactored search for block-comment end marker into separate function. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 59 ++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index 3d8f7a9..cc2c372 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -35,6 +35,30 @@ static inline bool isAsciiSpace(char character) { || character == '\v'; } +static const char* searchBlockClosingMarker( + const char* ptr, + const char* end, + const char* lineComment, + size_t lineComLen, + const char* blockStart, + size_t blockStartLen, + const char* blockEnd, + size_t blockEndLen +) { + size_t searchLen = (size_t) (end - ptr); + const char* closingFound = NULL; + if (blockEndLen > 0 && searchLen >= blockEndLen) { + const size_t maxOffset = searchLen - blockEndLen; + for (size_t offset = 0; offset <= maxOffset; ++offset) { + if (memcmp(ptr + offset, blockEnd, blockEndLen) == 0) { + closingFound = ptr + offset; + break; + } + } + } + return closingFound; +} + /** * Scans the byte range [ptr, end) to determine whether it contains actual * source code, updating the block-comment tracking state as boundaries @@ -70,17 +94,12 @@ static bool segmentHasCode( while (ptr < end) { if (*inBlockComment) { // Look for the end of the block comment in this segment - const size_t remaining = (size_t) (end - ptr); - const char* found = NULL; - if (blockEndLen > 0 && remaining >= blockEndLen) { - const size_t maxOffset = remaining - blockEndLen; - for (size_t offset = 0; offset <= maxOffset; ++offset) { - if (memcmp(ptr + offset, blockEnd, blockEndLen) == 0) { - found = ptr + offset; - break; - } - } - } + const char* found = searchBlockClosingMarker( + ptr, end, + lineComment, lineComLen, + blockStart, blockStartLen, + blockEnd, blockEndLen + ); if (!found) { return false; // Entire segment is within a block comment } @@ -110,18 +129,12 @@ static bool segmentHasCode( && memcmp(ptr, blockStart, blockStartLen) == 0) { // Search for the matching closing marker on the same line - const char* start = ptr + blockStartLen; - size_t searchLen = (size_t) (end - start); - const char* closingFound = NULL; - if (blockEndLen > 0 && searchLen >= blockEndLen) { - const size_t maxOffset = searchLen - blockEndLen; - for (size_t offset = 0; offset <= maxOffset; ++offset) { - if (memcmp(start + offset, blockEnd, blockEndLen) == 0) { - closingFound = start + offset; - break; - } - } - } + const char* closingFound = searchBlockClosingMarker( + ptr + blockStartLen, end, + lineComment, lineComLen, + blockStart, blockStartLen, + blockEnd, blockEndLen + ); if (!closingFound) { *inBlockComment = true; return false; // Block comment continues on the next line From e5547ec91f12c51260b092dc4fd2810a410fb8e4 Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sat, 27 Jun 2026 21:03:56 +0800 Subject: [PATCH 8/9] Improved handling of unsigned and signed char types. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index cc2c372..f104d7c 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -316,7 +316,7 @@ static RcnCount countLocUTF16( continue; } - unsigned char character = utf16AsciiAt(text, scan, isLittleEndian); + char character = (char) utf16AsciiAt(text, scan, isLittleEndian); if (isAsciiSpace(character)) { scan += 2; From 5541bee864f697550e49e427ef1fd584c0b5efff Mon Sep 17 00:00:00 2001 From: Phil Gaiser Date: Sun, 28 Jun 2026 16:05:36 +0800 Subject: [PATCH 9/9] Refactored countLocUTF16() function. Made it smaller and easier to understand. Resolved linter error about cognitive complexity. Signed-off-by: Phil Gaiser --- src/lib/c/loc.c | 284 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 198 insertions(+), 86 deletions(-) diff --git a/src/lib/c/loc.c b/src/lib/c/loc.c index f104d7c..fd5a925 100644 --- a/src/lib/c/loc.c +++ b/src/lib/c/loc.c @@ -262,115 +262,227 @@ static size_t utf16FindAscii( } /** - * Counts LOC in UTF-16 encoded text (LE or BE). - * `text` points to the first byte after the BOM. - * `size` is the number of remaining bytes. + * Returns the byte offset of the current UTF-16 line ending (`\n`) or the + * first byte past the end of available input if the line has no terminator. */ -static RcnCount countLocUTF16( +static size_t utf16LineEndOffset( const char* text, size_t size, + size_t offset, + bool isLittleEndian +) { + size_t lineEndOffset = offset; + while ((lineEndOffset + 1) < size) { + unsigned char character = utf16AsciiAt( + text, + lineEndOffset, + isLittleEndian + ); + if (character == '\n') { + break; + } + lineEndOffset += 2; + } + return lineEndOffset; +} + +static bool utf16AdvanceOverBlockComment( + const char* text, + size_t lineEndOffset, + bool isLittleEndian, + const char* blockEnd, + size_t blockEndLen, + size_t* scan, + bool* inBlockComment +) { + const size_t remaining = lineEndOffset - *scan; + const size_t found = utf16FindAscii( + text + *scan, + remaining, + blockEnd, blockEndLen, + isLittleEndian + ); + if (found == SIZE_MAX) { + return false; + } + *scan += found + (blockEndLen * 2); + *inBlockComment = false; + return true; +} + +static bool utf16StartsWithAsciiAt( + const char* text, + size_t scan, + bool isLittleEndian, + const char* marker, + size_t markerLen +) { + return ( + markerLen > 0 + && utf16FindAscii( + text + scan, + markerLen * 2, + marker, + markerLen, + isLittleEndian + ) == 0 + ); +} + +static bool utf16ConsumeInlineBlockComment( + const char* text, + size_t lineEndOffset, + bool isLittleEndian, + const char* blockEnd, + size_t blockEndLen, + size_t* scan, + bool* inBlockComment, + size_t blockStartLen +) { + const size_t afterStart = *scan + (blockStartLen * 2); + const size_t searchLen = lineEndOffset - afterStart; + size_t closingFound = SIZE_MAX; + + if (blockEndLen > 0) { + closingFound = utf16FindAscii( + text + afterStart, + searchLen, + blockEnd, + blockEndLen, + isLittleEndian + ); + } + if (closingFound == SIZE_MAX) { + *inBlockComment = true; + return false; + } + + *scan = afterStart + closingFound + (blockEndLen * 2); + return true; +} + +static bool utf16LineHasCode( + const char* text, bool isLittleEndian, const char* lineComment, size_t lineComLen, const char* blockStart, size_t blockStartLen, const char* blockEnd, - size_t blockEndLen + size_t blockEndLen, + size_t offset, + size_t lineEndOffset, + bool* inBlockComment ) { - RcnCount count = 0; - bool inBlockComment = false; - size_t offset = 0; // Current byte position (always even) + size_t scan = offset; - while (offset + 1 < size) { - // Find the end of the current line - size_t lineEndOffset = offset; - while ((lineEndOffset + 1) < size) { - unsigned char character = utf16AsciiAt( + while ((scan + 1) <= lineEndOffset) { + if (*inBlockComment) { + if (!utf16AdvanceOverBlockComment( text, lineEndOffset, - isLittleEndian - ); - if (character == '\n') { - break; + isLittleEndian, + blockEnd, + blockEndLen, + &scan, + inBlockComment) + ) { + return false; } - lineEndOffset += 2; + continue; } - size_t scan = offset; - bool lineHasSourceCode = false; - - while ((scan + 1) <= lineEndOffset) { - if (inBlockComment) { - size_t remaining = lineEndOffset - scan; - size_t found = utf16FindAscii( - text + scan, - remaining, - blockEnd, blockEndLen, - isLittleEndian - ); - if (found == SIZE_MAX) { - break; // Entire rest of line is within block comment - } - scan += found + blockEndLen * 2; - inBlockComment = false; - continue; - } + char character = (char) utf16AsciiAt(text, scan, isLittleEndian); + if (isAsciiSpace(character)) { + scan += 2; + continue; + } - char character = (char) utf16AsciiAt(text, scan, isLittleEndian); + const size_t remaining = lineEndOffset - scan; - if (isAsciiSpace(character)) { - scan += 2; - continue; - } + if (lineComLen > 0 && remaining >= (lineComLen * 2) + && utf16StartsWithAsciiAt( + text, + scan, + isLittleEndian, + lineComment, + lineComLen + ) + ) { + return false; + } - size_t remaining = lineEndOffset - scan; - - // Check for line-comment marker - if (lineComLen > 0 && remaining >= (lineComLen * 2)) { - size_t found = utf16FindAscii( - text + scan, - lineComLen * 2, - lineComment, lineComLen, - isLittleEndian - ); - if (found == 0) { - break; - } - } + if (blockStartLen > 0 && remaining >= (blockStartLen * 2) + && utf16StartsWithAsciiAt( + text, + scan, + isLittleEndian, + blockStart, + blockStartLen)) { - // Check for block-comment start marker - if (blockStartLen > 0 && remaining >= (blockStartLen * 2)) { - size_t found = utf16FindAscii( - text + scan, - blockStartLen * 2, - blockStart, blockStartLen, - isLittleEndian - ); - if (found == 0) { - const size_t afterStart = scan + (blockStartLen * 2); - const size_t searchLen = lineEndOffset - afterStart; - size_t closingFound = SIZE_MAX; - if (blockEndLen > 0) { - closingFound = utf16FindAscii( - text + afterStart, - searchLen, - blockEnd, blockEndLen, - isLittleEndian - ); - } - if (closingFound == SIZE_MAX) { - inBlockComment = true; - break; - } - scan = afterStart + closingFound + (blockEndLen * 2); - continue; - } + if (!utf16ConsumeInlineBlockComment( + text, + lineEndOffset, + isLittleEndian, + blockEnd, + blockEndLen, + &scan, + inBlockComment, + blockStartLen + )) { + return false; } - - lineHasSourceCode = true; - break; + continue; } + return true; + } + + return false; +} + +/** + * Counts LOC in UTF-16 encoded text (LE or BE). + * `text` points to the first byte after the BOM. + * `size` is the number of remaining bytes. + */ +static RcnCount countLocUTF16( + const char* text, + size_t size, + bool isLittleEndian, + const char* lineComment, + size_t lineComLen, + const char* blockStart, + size_t blockStartLen, + const char* blockEnd, + size_t blockEndLen +) { + RcnCount count = 0; + bool inBlockComment = false; + size_t offset = 0; // Current byte position (always even) + + while (offset + 1 < size) { + const size_t lineEndOffset = utf16LineEndOffset( + text, + size, + offset, + isLittleEndian + ); + + const bool lineHasSourceCode = utf16LineHasCode( + text, + isLittleEndian, + lineComment, + lineComLen, + blockStart, + blockStartLen, + blockEnd, + blockEndLen, + offset, + lineEndOffset, + &inBlockComment + ); + if (lineHasSourceCode) { ++count; }