Added lexer and lexer tests

maxmousee · maxmousee · commit 03dd77b7f73b · 2020-08-08T16:44:28.000+02:00
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # stringutils
 
 [![Build Status](https://travis-ci.com/maxmousee/stringutils.svg?branch=master)](https://travis-ci.org/maxmousee/stringutils)
-[![Go Report](https://goreportcard.com/badge/github.com/maxmousee/go-stringutils)](https://goreportcard.com/report/github.com/maxmousee/go-stringutils)
+[![Go Report](https://goreportcard.com/badge/github.com/maxmousee/stringutils)](https://goreportcard.com/report/github.com/maxmousee/stringutils)
 [![Coverage Status](https://coveralls.io/repos/github/maxmousee/go-stringutils/badge.svg?branch=master)](https://coveralls.io/github/maxmousee/go-stringutils?branch=master)
 
 StringUtils for Go! :) 
diff --git a/lexer.go b/lexer.go
@@ -1,2 +1,45 @@
 package stringutils
 
+// Tokenize splits a string into an array of strings/words, and categorizes them into tokens
+// If the string is empty, it returns an empty slice
+func Tokenize(input string, tokenTypes []TokenType) []Token {
+	var tokens []Token
+	words := WordSplit(input)
+	for index, aWord := range words {
+		aToken := TokenizeWord(aWord, index, tokenTypes)
+		tokens = append(tokens, aToken)
+	}
+	return tokens
+}
+
+// TokenizeWord categorizes a given word into tokens of a given set of token types
+// If no match is found, it returns a token with "" as type
+func TokenizeWord(word string, position int, tokenTypes []TokenType) Token {
+	aTokenType := LookupType(word, tokenTypes)
+	return Token{
+		Type:     aTokenType.Type,
+		Position: position,
+		Text:     word,
+	}
+}
+
+// LookupType looks up a token type for a given word
+// If no match is found, it returns a token type with "" as type
+func LookupType(word string, tokenTypes []TokenType) TokenType {
+	for _, aToken := range tokenTypes {
+		if aToken.CaseSensitive {
+			if EqualsAny(word, aToken.Words) {
+				return aToken
+			}
+		} else {
+			if EqualsAnyIgnoreCase(word, aToken.Words) {
+				return aToken
+			}
+		}
+	}
+	return TokenType{
+		Type:          "",
+		Words:         []string{""},
+		CaseSensitive: false,
+	}
+}
diff --git a/lexer_test.go b/lexer_test.go
@@ -0,0 +1,67 @@
+package stringutils
+
+import (
+	"github.com/stretchr/testify/assert"
+	"testing"
+)
+
+func TestTokenizeCaseSensitive(t *testing.T) {
+	assertions := assert.New(t)
+
+	tokenTypes := []TokenType{}
+	tokenTypes = append(tokenTypes, TokenType{
+		Type:          "keyword",
+		Words:         []string{"if", "for"},
+		CaseSensitive: true,
+	})
+	result := Tokenize("if", tokenTypes)
+	assertions.Equal("if", result[0].Text)
+	assertions.Equal(0, result[0].Position)
+	assertions.Equal("keyword", result[0].Type)
+}
+
+func TestTokenizeCaseInsensitive(t *testing.T) {
+	assertions := assert.New(t)
+
+	tokenTypes := []TokenType{}
+	tokenTypes = append(tokenTypes, TokenType{
+		Type:          "keyword",
+		Words:         []string{"if", "for"},
+		CaseSensitive: false,
+	})
+	result := Tokenize("IF", tokenTypes)
+	assertions.Equal("IF", result[0].Text)
+	assertions.Equal(0, result[0].Position)
+	assertions.Equal("keyword", result[0].Type)
+}
+
+func TestTokenizeWord(t *testing.T) {
+	assertions := assert.New(t)
+
+	tokenTypes := []TokenType{}
+	tokenTypes = append(tokenTypes, TokenType{
+		Type:          "keyword",
+		Words:         []string{"if", "for"},
+		CaseSensitive: true,
+	})
+	result := TokenizeWord("if", 0, tokenTypes)
+	assertions.Equal("if", result.Text)
+	assertions.Equal(0, result.Position)
+	assertions.Equal("keyword", result.Type)
+}
+
+func TestLookupType(t *testing.T) {
+	assertions := assert.New(t)
+
+	tokenTypes := []TokenType{}
+	tokenTypes = append(tokenTypes, TokenType{
+		Type:          "keyword",
+		Words:         []string{"if", "for"},
+		CaseSensitive: false,
+	})
+
+	result := LookupType("for", tokenTypes)
+	assertions.Equal("keyword", result.Type)
+	assertions.Equal([]string{"if", "for"}, result.Words)
+	assertions.Equal(false, result.CaseSensitive)
+}