Skip to content

Commit 39fe3af

Browse files
committed
first sketch of Splitting user records with delims
1 parent e8897b8 commit 39fe3af

2 files changed

Lines changed: 225 additions & 0 deletions
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
let
2+
// sugar for an xray function
3+
_ = (source as any) => [
4+
render = Text.Replace( str, ",", ",#(cr,lf)" ),
5+
str = Text.FromBinary( Json.FromValue( source, TextEncoding.Utf8 ), TextEncoding.Utf8 ),
6+
ret = render
7+
][ret],
8+
9+
/* question:
10+
11+
99 -Mike Surname 2022 - 2023
12+
99 - Antonis test 2022 - 2023.
13+
14+
How can I keep only the name and the date period. The preferable result should be "Mike Surname 2022 - 2023" , "Antonis test 2022 - 2023".
15+
*/
16+
17+
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WsrRU0PXNzE5VCC4tykvMTVUwMjAyUtAFUcZKsToQBQqOeSX5eZnFCiWpxSVYVOi5p+YXpacqBCQW4Neflp+vkJRYpFBcUpqWpgAEKKpjAQ==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [Text = _t]),
18+
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Text", type text}}),
19+
20+
21+
// ProcessRows = ( source as table ) => [
22+
// schema_info = Record.Type,
23+
// Col_Info = Table.AddColumn(
24+
// source, "Split.Dash3",
25+
// (row) as record =>
26+
// SplitBy_Dash3( row[RawText] ),
27+
// schema_info
28+
// ),
29+
// Result = SplitBy_Dash3( Col_Info ),
30+
// Result_Text = _( Result ),
31+
// return = Result_Text
32+
// ],
33+
34+
SplitBy_Dash3 = (string as text) as any => [
35+
ExpectedLength = 3,
36+
SplitFunc = Splitter.SplitTextByEachDelimiter(
37+
{ "-", "-" }, QuoteStyle.None ),
38+
Segments = SplitFunc( string ),
39+
SegmentCount = List.Count( Segments ),
40+
return =
41+
if SegmentCount = ExpectedLength
42+
then Segments
43+
else error [
44+
Message.Parameters = { ExpectedLength, string },
45+
Message.Format = "InvalidResultException: SplitByEachDelimiter did not return exactly #{0} segments #(cr,lf) String: '#{1}',"
46+
]
47+
][return],
48+
49+
// basically duplcate except the parameter
50+
SplitBy_MixedDash3 = (string as text) as any => [
51+
ExpectedLength = 3,
52+
SplitFunc = Splitter.SplitTextByEachDelimiter(
53+
{ ".", "-" }, QuoteStyle.None ),
54+
Segments = SplitFunc( string ),
55+
SegmentCount = List.Count( Segments ),
56+
return =
57+
if SegmentCount = ExpectedLength
58+
then Segments
59+
else error [
60+
Message.Parameters = { ExpectedLength, string },
61+
Message.Format = "InvalidResultException: SplitByEachDelimiter did not return exactly #{0} segments #(cr,lf) String: '#{1}',"
62+
]
63+
][return],
64+
65+
66+
67+
Summary = [
68+
// Was = ProcessRows( Rows ),
69+
Rows = #"Changed Type",
70+
OneRow = Rows{0}?,
71+
OneText = OneRow[Text]?,
72+
Try1_Dash3_t = OneText,
73+
z__ = _( [ name = "bob", id = 999 ]),
74+
Try1_Dash3 = SplitBy_Dash3( OneText ),
75+
Try1_Dash3_ = _( OneText )
76+
],
77+
Rows = Summary[Rows],
78+
79+
80+
// get the crumbs, else throw when it's valid
81+
Col_Step1 = Table.AddColumn(
82+
Rows, "Step1",
83+
(row) =>
84+
try SplitBy_MixedDash3( row[Text] )
85+
catch (e) => SplitBy_Dash3( row[Text] ),
86+
(type { text } )
87+
),
88+
89+
// error if fields are ever missing:
90+
Col_LineAsRecord = Table.AddColumn(
91+
Col_Step1, "Step2",
92+
(row) => [
93+
Crumbs = row[Step1],
94+
SplitBy_TwoWhitespace = Splitter.SplitTextByWhitespace(
95+
QuoteStyle.None
96+
),
97+
Id = Crumbs{0},
98+
Name = Crumbs{1},
99+
YearEnd = Crumbs{2},
100+
// Name2 = SplitBy_TwoWhitespace( Crumbs{1} ),
101+
// CrumbsWhite = SplitBy_TwoWhitespace( row[Name] ),
102+
NameWithoutYear = Text.BeforeDelimiter( Name, " ", Occurrence.Last ),
103+
YearStart = Text.AfterDelimiter( Name, " ", Occurrence.Last ),
104+
return = [
105+
Id = Id, Name = NameWithoutYear, YearStart = YearStart, YearEnd = YearEnd
106+
]
107+
][return],
108+
Record.Type
109+
),
110+
// error if fields are ever missing:
111+
Step2 = Col_LineAsRecord{0}[Step2],
112+
Custom1 = Col_LineAsRecord,
113+
#"Expanded Step2" = Table.ExpandRecordColumn(Custom1, "Step2", {"Id", "Name", "YearStart", "YearEnd"}, {"Id", "Name", "YearStart", "YearEnd"}),
114+
#"Changed Type1" = Table.TransformColumnTypes(#"Expanded Step2",{{"Id", Int64.Type}, {"Name", type text}})
115+
in
116+
#"Changed Type1"
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
let
2+
// sugar for an xray function
3+
_ = (source as any) => [
4+
render = Text.Replace( str, ",", ",#(cr,lf)" ),
5+
str = Text.FromBinary( Json.FromValue( source, TextEncoding.Utf8 ), TextEncoding.Utf8 ),
6+
ret = render
7+
][ret],
8+
9+
/* question:
10+
11+
99 -Mike Surname 2022 - 2023
12+
99 - Antonis test 2022 - 2023.
13+
14+
How can I keep only the name and the date period. The preferable result should be "Mike Surname 2022 - 2023" , "Antonis test 2022 - 2023".
15+
*/
16+
17+
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WsrRU0PXNzE5VCC4tykvMTVUwMjAyUtAFUcZKsToQBQqOeSX5eZnFCiWpxSVYVOi5p+YXpacqBCQWoMrGAgA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [Text = _t]),
18+
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Text", type text}}),
19+
20+
21+
// ProcessRows = ( source as table ) => [
22+
// schema_info = Record.Type,
23+
// Col_Info = Table.AddColumn(
24+
// source, "Split.Dash3",
25+
// (row) as record =>
26+
// SplitBy_Dash3( row[RawText] ),
27+
// schema_info
28+
// ),
29+
// Result = SplitBy_Dash3( Col_Info ),
30+
// Result_Text = _( Result ),
31+
// return = Result_Text
32+
// ],
33+
34+
SplitBy_Dash3 = (string as text) as any => [
35+
ExpectedLength = 3,
36+
SplitFunc = Splitter.SplitTextByEachDelimiter(
37+
{ "-", "-" }, QuoteStyle.None ),
38+
Segments = SplitFunc( string ),
39+
SegmentCount = List.Count( Segments ),
40+
return =
41+
if SegmentCount = ExpectedLength
42+
then Segments
43+
else error [
44+
Message.Parameters = { ExpectedLength, string },
45+
Message.Format = "InvalidResultException: SplitByEachDelimiter did not return exactly #{0} segments #(cr,lf) String: '#{1}',"
46+
]
47+
][return],
48+
49+
// basically duplcate except the parameter
50+
SplitBy_MixedDash3 = (string as text) as any => [
51+
ExpectedLength = 3,
52+
SplitFunc = Splitter.SplitTextByEachDelimiter(
53+
{ ".", "-" }, QuoteStyle.None ),
54+
Segments = SplitFunc( string ),
55+
SegmentCount = List.Count( Segments ),
56+
return =
57+
if SegmentCount = ExpectedLength
58+
then Segments
59+
else error [
60+
Message.Parameters = { ExpectedLength, string },
61+
Message.Format = "InvalidResultException: SplitByEachDelimiter did not return exactly #{0} segments #(cr,lf) String: '#{1}',"
62+
]
63+
][return],
64+
65+
66+
67+
Summary = [
68+
// Was = ProcessRows( Rows ),
69+
Rows = #"Changed Type",
70+
OneRow = Rows{0}?,
71+
OneText = OneRow[Text]?,
72+
Try1_Dash3_t = OneText,
73+
z__ = _( [ name = "bob", id = 999 ]),
74+
Try1_Dash3 = SplitBy_Dash3( OneText ),
75+
Try1_Dash3_ = _( OneText )
76+
],
77+
Rows = Summary[Rows],
78+
79+
80+
// get the crumbs, else throw when it's valid
81+
Col_Step1 = Table.AddColumn(
82+
Rows, "Step1",
83+
(row) =>
84+
try SplitBy_MixedDash3( row[Text] )
85+
catch (e) => SplitBy_Dash3( row[Text] ),
86+
(type { text } )
87+
),
88+
89+
// error if fields are ever missing:
90+
Col_LineAsRecord = Table.AddColumn(
91+
Col_Step1, "Step2",
92+
(row) => [
93+
Crumbs = row[Step1],
94+
SplitBy_TwoWhitespace = Splitter.SplitTextByWhitespace(
95+
QuoteStyle.None
96+
),
97+
Id = Crumbs{0},
98+
Name = Crumbs{1},
99+
Name2 = SplitBy_TwoWhitespace( Crumbs{1} ),
100+
// CrumbsWhite = SplitBy_TwoWhitespace( row[Name] ),
101+
NameWithoutYear = Text.BeforeDelimiter( Name, " ", Occurrence.Last ),
102+
DateStart = Text.AfterDelimiter( Name, " ", Occurrence.Last )
103+
],
104+
Any.Type
105+
),
106+
// error if fields are ever missing:
107+
Step2 = Col_LineAsRecord{0}[Step2]
108+
in
109+
Step2

0 commit comments

Comments
 (0)