Skip to content

Commit e8897b8

Browse files
committed
new: Split and Filter by Longest UO
1 parent a6bfc56 commit e8897b8

5 files changed

Lines changed: 121 additions & 0 deletions
31.9 KB
Binary file not shown.
108 KB
Loading
60.9 KB
Loading
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
original question: <https://www.reddit.com/r/PowerBI/comments/1brmxet/split_lines_in_excel_and_delimiters_in_power_query/>
2+
3+
The problem causing duplicates is from splitting and expanding in multiple steps.
4+
5+
There's a different function you can call named `Table.TransformRows`
6+
That lets you modify multiple columns in one pass. Here's the full code:
7+
8+
- [Final results screenshot.png](https://raw.githubusercontent.com/ninmonkey/ninMonkQuery-examples/main/forumQuestions/img/2024-03_SplitExcelNewlines.png)
9+
- [SplitExcelNewlines.pq](https://github.com/ninmonkey/ninMonkQuery-examples/blob/5e0b338b6d211c4e712b0819588f83c90f014256/forumQuestions/pq/2024-03_SplitExcelNewlines.pq#L19-L56)
10+
- [SplitExcelNewlines.pbix](https://github.com/ninmonkey/ninMonkQuery-examples/blob/5e0b338b6d211c4e712b0819588f83c90f014256/forumQuestions/2024-03_SplitExcelNewlines.pbix)
11+
12+
### Importing multiple date formats
13+
14+
As a bonus: I wrote a function that imports both date formats in your example
15+
16+
try Something catch (e) => // ... handler function
17+
18+
It's better than `try otherwise` because you can optionally return any value, including the original error record. Like `null meta [ info = e ]`
19+
20+
### The Main Code
21+
22+
- Split each column by a newline
23+
- then generate a list of N records, using position and expanding the PartId
24+
25+
ExpandRows = (row as record) as any => [
26+
Delim = "#(lf)", // some apps use #(cf,lf) instead
27+
invoiceList = Text.Split(row[Invoice], Delim ),
28+
qtyList = Text.Split( row[Qty], Delim ),
29+
datesList = Text.Split( row[Expected], Delim ),
30+
31+
totalRecords = List.Max({
32+
List.Count(invoiceList),
33+
List.Count(qtyList),
34+
List.Count(datesList)
35+
}),
36+
37+
return = List.Transform(
38+
{ 0..(totalRecords - 1) },
39+
(num) => [
40+
Part = partId,
41+
Invoice = invoiceList{ num }?,
42+
Qty = qtyList{ num }?,
43+
Expected = datesList{ num }?,
44+
LineNumber = num
45+
]
46+
)
47+
][return]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// DevicesOU_MultiRow
2+
let
3+
EnterData = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45Wckkty0xONbRSqKis0ssvSo+JgYgUK8Xq4JGNiQkoyk8hRk1MjH9aGpCLpNYIU60CWFoBwjPG6xosshiuMcFrAhbZmJiQ1OISoCtiAQ==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [Text = _t]),
4+
Source = Table.TransformColumnTypes(EnterData,{{"Text", type text}}),
5+
6+
#"Added Custom" = Table.AddColumn(Source,
7+
"SplitToRecords",
8+
(row) => [
9+
Segments = Text.Split( row[Text], ": "),
10+
DeviceName = Segments{0},
11+
OU = Segments{1}
12+
], Record.Type
13+
),
14+
// returns a single item, the longest string
15+
List.SelectLongest = (source as list) as any =>
16+
List.First( List.SortByLongest( source ) ),
17+
18+
// returns original list, sorted as longest first
19+
List.SortByLongest = (source as list) as list =>
20+
List.Sort( source, (item) => -Text.Length( item ) ),
21+
22+
23+
#"Expanded SplitToRecords" = Table.ExpandRecordColumn(#"Added Custom", "SplitToRecords", {"DeviceName", "OU"}, {"Device", "OU"}),
24+
#"Changed Type" = Table.TransformColumnTypes(#"Expanded SplitToRecords",{{"Device", type text}, {"OU", type text}}),
25+
#"Cleaned Text" = Table.TransformColumns(#"Changed Type",{ {"Device", Text.Trim, type text}, {"OU", Text.Trim, type text}} ),
26+
#"Removed Other Columns" = Table.SelectColumns(#"Cleaned Text",{"Device", "OU"}),
27+
#"Grouped Rows" = Table.Group(
28+
#"Removed Other Columns", {"Device"},
29+
{
30+
{
31+
"OU_Longest",
32+
(row) => List.SelectLongest( row[OU] ),
33+
Text.Type
34+
}
35+
}
36+
37+
)
38+
in
39+
#"Grouped Rows"
40+
41+
// DevicesOU_SingleLine
42+
let
43+
44+
EnterData = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45Wckkty0xONbRSqKis0ssvSo+JgYgUW6MLxMQEFOWn4BCOifFPSwNylWJ1YGYaYZqpgCRtTLSVSJpMiNIUklpcArQqFgA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [Text = _t]),
45+
Source = Table.TransformColumnTypes(EnterData,{{"Text", type text}}),
46+
47+
// this is overkill. I kept it verbose so you can inspect each stage
48+
// it splits a single line, multiple times, as a single "step"
49+
SplitToRecords = Table.AddColumn(Source,
50+
"SplitToRecords",
51+
(row) => [
52+
// Segments = Text.Split( row[Text], ": "),
53+
line = row[Text],
54+
DeviceName = Text.BeforeDelimiter( line, ": ", 0 ),
55+
RestOfLine = Text.AfterDelimiter( line, ": ", 0 ),
56+
OU_list = Text.Split( RestOfLine, ";"),
57+
sortByLongest = List.Sort( OU_list, each -Text.Length(_) ),
58+
OU_Longest = List.First( sortByLongest, null )
59+
], Record.Type
60+
),
61+
62+
// for testing, this lets you see one record at a time
63+
InspectOneRow = SplitToRecords{0}[SplitToRecords],
64+
65+
// else grab the only two columns that we care about
66+
#"Expanded SplitToRecords" = Table.ExpandRecordColumn(
67+
SplitToRecords, "SplitToRecords",
68+
{"DeviceName", "OU_Longest"}, {"DeviceName", "OU_Longest"} ),
69+
70+
#"Changed Type" = Table.TransformColumnTypes(
71+
#"Expanded SplitToRecords",
72+
{ {"DeviceName", type text}, {"OU_Longest", type text}} )
73+
in
74+
#"Changed Type"

0 commit comments

Comments
 (0)