Skip to content

Commit f86ee3e

Browse files
committed
Add preparser to cleanup the sql file before parser ingestion
1 parent dbdc5ba commit f86ee3e

3 files changed

Lines changed: 123 additions & 0 deletions

File tree

internal/parser.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ func normalizeSql(sql string) string {
5555
}
5656

5757
func ParseSchema(sql string) (*core.SQLSchema, error) {
58+
sql = normalize(sql)
5859
stmts, err := parser.Parse(sql)
5960
if err != nil {
6061
return nil, fmt.Errorf("schema parsing error: %w", err)

internal/preparser.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package internal
2+
3+
import (
4+
"regexp"
5+
"slices"
6+
"strings"
7+
)
8+
9+
// The parser used has some limitation (based on cockroachDB syntax),
10+
// The preparser is there to normalize the input and avoid a set of known errors
11+
12+
func normalize(sql string) string {
13+
alter := []func(string) string{removeTrigger, removeCustomTypes, replaceSubType}
14+
for _, f := range alter {
15+
sql = f(sql)
16+
}
17+
return sql
18+
}
19+
20+
var triggerRegexp = regexp.MustCompile(`CREATE TRIGGER[\s\S]*?END\s*?;`)
21+
22+
func removeTrigger(sql string) string {
23+
res := triggerRegexp.FindAllStringSubmatchIndex(sql, -1)
24+
if len(res) == 0 {
25+
return sql
26+
}
27+
28+
slices.Reverse(res)
29+
30+
for _, ind := range res {
31+
sql = sql[:ind[0]] + sql[ind[1]:]
32+
}
33+
34+
return sql
35+
}
36+
37+
var customTypeRegexp = regexp.MustCompile(`CREATE TYPE[\s\S]*?;`)
38+
39+
func removeCustomTypes(sql string) string {
40+
res := customTypeRegexp.FindAllStringSubmatchIndex(sql, -1)
41+
if len(res) == 0 {
42+
return sql
43+
}
44+
45+
slices.Reverse(res)
46+
47+
for _, ind := range res {
48+
sql = sql[:ind[0]] + sql[ind[1]:]
49+
}
50+
51+
return sql
52+
}
53+
54+
func replaceSubType(sql string) string {
55+
return strings.ReplaceAll(sql, "BLOB SUB_TYPE TEXT", "bytea")
56+
}

internal/preparser_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package internal
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestTrigger(t *testing.T) {
10+
schema, err := ParseSchema(`
11+
CREATE TABLE actor (
12+
actor_id numeric NOT NULL ,
13+
first_name VARCHAR(45) NOT NULL,
14+
last_name VARCHAR(45) NOT NULL,
15+
last_update TIMESTAMP NOT NULL,
16+
PRIMARY KEY (actor_id)
17+
);
18+
19+
CREATE INDEX idx_actor_last_name ON actor(last_name);
20+
21+
CREATE TRIGGER actor_trigger_ai AFTER INSERT ON actor
22+
BEGIN
23+
UPDATE actor SET last_update = DATETIME('NOW') WHERE rowid = new.rowid;
24+
END;
25+
`)
26+
assert.NoError(t, err)
27+
28+
assert.Equal(t, 4, len(schema.Tables["actor"].Columns))
29+
}
30+
31+
func TestEnums(t *testing.T) {
32+
schema, err := ParseSchema(`
33+
CREATE TABLE actor (
34+
actor_id numeric NOT NULL ,
35+
first_name VARCHAR(45) NOT NULL,
36+
last_name VARCHAR(45) NOT NULL,
37+
last_update TIMESTAMP NOT NULL,
38+
PRIMARY KEY (actor_id)
39+
);
40+
41+
CREATE INDEX idx_actor_last_name ON actor(last_name);
42+
43+
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
44+
`)
45+
assert.NoError(t, err)
46+
47+
assert.Equal(t, 4, len(schema.Tables["actor"].Columns))
48+
}
49+
50+
func TestBlob(t *testing.T) {
51+
schema, err := ParseSchema(`
52+
CREATE TABLE actor (
53+
actor_id numeric NOT NULL ,
54+
description BLOB SUB_TYPE TEXT DEFAULT NULL,
55+
first_name VARCHAR(45) NOT NULL,
56+
last_name VARCHAR(45) NOT NULL,
57+
last_update TIMESTAMP NOT NULL,
58+
PRIMARY KEY (actor_id)
59+
);
60+
61+
CREATE INDEX idx_actor_last_name ON actor(last_name);
62+
`)
63+
assert.NoError(t, err)
64+
65+
assert.Equal(t, 5, len(schema.Tables["actor"].Columns))
66+
}

0 commit comments

Comments
 (0)