Skip to content

Commit 03dd77b

Browse files
committed
Added lexer and lexer tests
1 parent aab51db commit 03dd77b

3 files changed

Lines changed: 111 additions & 1 deletion

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# stringutils
22

33
[![Build Status](https://travis-ci.com/maxmousee/stringutils.svg?branch=master)](https://travis-ci.org/maxmousee/stringutils)
4-
[![Go Report](https://goreportcard.com/badge/github.com/maxmousee/go-stringutils)](https://goreportcard.com/report/github.com/maxmousee/go-stringutils)
4+
[![Go Report](https://goreportcard.com/badge/github.com/maxmousee/stringutils)](https://goreportcard.com/report/github.com/maxmousee/stringutils)
55
[![Coverage Status](https://coveralls.io/repos/github/maxmousee/go-stringutils/badge.svg?branch=master)](https://coveralls.io/github/maxmousee/go-stringutils?branch=master)
66

77
StringUtils for Go! :)

lexer.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,45 @@
11
package stringutils
22

3+
// Tokenize splits a string into an array of strings/words, and categorizes them into tokens
4+
// If the string is empty, it returns an empty slice
5+
func Tokenize(input string, tokenTypes []TokenType) []Token {
6+
var tokens []Token
7+
words := WordSplit(input)
8+
for index, aWord := range words {
9+
aToken := TokenizeWord(aWord, index, tokenTypes)
10+
tokens = append(tokens, aToken)
11+
}
12+
return tokens
13+
}
14+
15+
// TokenizeWord categorizes a given word into tokens of a given set of token types
16+
// If no match is found, it returns a token with "" as type
17+
func TokenizeWord(word string, position int, tokenTypes []TokenType) Token {
18+
aTokenType := LookupType(word, tokenTypes)
19+
return Token{
20+
Type: aTokenType.Type,
21+
Position: position,
22+
Text: word,
23+
}
24+
}
25+
26+
// LookupType looks up a token type for a given word
27+
// If no match is found, it returns a token type with "" as type
28+
func LookupType(word string, tokenTypes []TokenType) TokenType {
29+
for _, aToken := range tokenTypes {
30+
if aToken.CaseSensitive {
31+
if EqualsAny(word, aToken.Words) {
32+
return aToken
33+
}
34+
} else {
35+
if EqualsAnyIgnoreCase(word, aToken.Words) {
36+
return aToken
37+
}
38+
}
39+
}
40+
return TokenType{
41+
Type: "",
42+
Words: []string{""},
43+
CaseSensitive: false,
44+
}
45+
}

lexer_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package stringutils
2+
3+
import (
4+
"github.com/stretchr/testify/assert"
5+
"testing"
6+
)
7+
8+
func TestTokenizeCaseSensitive(t *testing.T) {
9+
assertions := assert.New(t)
10+
11+
tokenTypes := []TokenType{}
12+
tokenTypes = append(tokenTypes, TokenType{
13+
Type: "keyword",
14+
Words: []string{"if", "for"},
15+
CaseSensitive: true,
16+
})
17+
result := Tokenize("if", tokenTypes)
18+
assertions.Equal("if", result[0].Text)
19+
assertions.Equal(0, result[0].Position)
20+
assertions.Equal("keyword", result[0].Type)
21+
}
22+
23+
func TestTokenizeCaseInsensitive(t *testing.T) {
24+
assertions := assert.New(t)
25+
26+
tokenTypes := []TokenType{}
27+
tokenTypes = append(tokenTypes, TokenType{
28+
Type: "keyword",
29+
Words: []string{"if", "for"},
30+
CaseSensitive: false,
31+
})
32+
result := Tokenize("IF", tokenTypes)
33+
assertions.Equal("IF", result[0].Text)
34+
assertions.Equal(0, result[0].Position)
35+
assertions.Equal("keyword", result[0].Type)
36+
}
37+
38+
func TestTokenizeWord(t *testing.T) {
39+
assertions := assert.New(t)
40+
41+
tokenTypes := []TokenType{}
42+
tokenTypes = append(tokenTypes, TokenType{
43+
Type: "keyword",
44+
Words: []string{"if", "for"},
45+
CaseSensitive: true,
46+
})
47+
result := TokenizeWord("if", 0, tokenTypes)
48+
assertions.Equal("if", result.Text)
49+
assertions.Equal(0, result.Position)
50+
assertions.Equal("keyword", result.Type)
51+
}
52+
53+
func TestLookupType(t *testing.T) {
54+
assertions := assert.New(t)
55+
56+
tokenTypes := []TokenType{}
57+
tokenTypes = append(tokenTypes, TokenType{
58+
Type: "keyword",
59+
Words: []string{"if", "for"},
60+
CaseSensitive: false,
61+
})
62+
63+
result := LookupType("for", tokenTypes)
64+
assertions.Equal("keyword", result.Type)
65+
assertions.Equal([]string{"if", "for"}, result.Words)
66+
assertions.Equal(false, result.CaseSensitive)
67+
}

0 commit comments

Comments
 (0)