Skip to content

Commit 7f69306

Browse files
authored
Sync proto structure and message names for data preparation. (#1811)
* Sync proto structure and message names for data preparation. Moved data preparation into another package. * Fix broken test.
1 parent 2531b12 commit 7f69306

3 files changed

Lines changed: 61 additions & 49 deletions

File tree

core/actions/data_preparation.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import { Resolvable } from "df/core/common";
44
import * as Path from "df/core/path";
55
import { Session } from "df/core/session";
66
import {
7-
actionConfigToCompiledGraphTarget,
87
addDependenciesToActionDependencyTargets,
98
configTargetToCompiledGraphTarget,
109
nativeRequire,
@@ -36,7 +35,7 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
3635
config.filename = resolveActionsConfigFilename(config.filename, configPath);
3736
const dataPreparationAsJson = nativeRequire(config.filename).asJson;
3837
const dataPreparationDefinition = parseDataPreparationDefinitionJson(dataPreparationAsJson);
39-
this.proto.dataPreparationContents = dataform.DataPreparationDefinition.encode(dataPreparationDefinition).finish();
38+
this.proto.dataPreparationContents = dataform.dataprep.DataPreparation.encode(dataPreparationDefinition).finish();
4039

4140
// Find targets
4241
const targets = getTargets(dataPreparationDefinition);
@@ -110,11 +109,11 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
110109

111110
function parseDataPreparationDefinitionJson(dataPreparationAsJson: {
112111
[key: string]: unknown;
113-
}): dataform.DataPreparationDefinition {
112+
}): dataform.dataprep.DataPreparation {
114113
try {
115-
return dataform.DataPreparationDefinition.create(
114+
return dataform.dataprep.DataPreparation.create(
116115
verifyObjectMatchesProto(
117-
dataform.DataPreparationDefinition,
116+
dataform.dataprep.DataPreparation,
118117
dataPreparationAsJson as {
119118
[key: string]: any;
120119
},
@@ -129,7 +128,7 @@ function parseDataPreparationDefinitionJson(dataPreparationAsJson: {
129128
}
130129
}
131130

132-
function getTargets(definition: dataform.DataPreparationDefinition): dataform.Target[] {
131+
function getTargets(definition: dataform.dataprep.DataPreparation): dataform.Target[] {
133132
const targets: dataform.Target[] = [];
134133

135134
definition.nodes.forEach(node => {

core/main_test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -920,13 +920,13 @@ nodes:
920920
// Generate Base64 encoded representation of the YAML.
921921
const dataPreparationAsObject = loadYaml(dataPreparationYaml);
922922
const dataPreparationDefinition = verifyObjectMatchesProto(
923-
dataform.DataPreparationDefinition,
923+
dataform.dataprep.DataPreparation,
924924
dataPreparationAsObject as {
925925
[key: string]: any;
926926
}
927927
);
928928
const base64encodedContents = encode64(
929-
dataform.DataPreparationDefinition,
929+
dataform.dataprep.DataPreparation,
930930
dataPreparationDefinition
931931
);
932932

protos/data_preparation.proto

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,69 @@
11
syntax = "proto3";
22

3-
package dataform;
3+
package dataform.dataprep;
44

5-
option java_package = "com.dataform.protos";
5+
option java_package = "com.dataform.dataprep.protos";
66
option java_multiple_files = true;
77

8-
option go_package = "github.com/dataform-co/dataform/protos/dataform";
8+
option go_package = "github.com/dataform-co/dataform/protos/dataform/dataprep";
99

10-
message DataPreparationDefinition {
10+
message DataPreparation {
1111
repeated DataPreparationNode nodes = 1;
1212
DataPreparationGenerated generated = 2;
13+
DataPreparationConfiguration configuration = 3;
1314
}
1415

1516
message DataPreparationNode {
1617
string id = 1;
17-
DataPreparationNodeSource source = 2;
18-
repeated DataPreparationNodeStep steps = 3;
19-
DataPreparationNodeGenerated generated = 4;
20-
// Destination BigQuery table(s) are defined within the data preparation
21-
optional DataPreparationNodeDestination destination = 5;
18+
Source source = 2;
19+
repeated Step steps = 3;
20+
NodeGenerated generated = 4;
21+
Destination destination = 5;
2222
}
2323

24-
message DataPreparationNodeSource {
24+
message DataPreparationConfiguration {
25+
TableReference error_table = 1;
26+
}
27+
28+
message Source {
2529
oneof source {
2630
string node_id = 1;
2731
TableReference table = 2;
2832
Join join = 3;
2933
}
3034
}
3135

32-
message DataPreparationNodeDestination {
36+
message Destination {
3337
oneof destination {
3438
TableReference table = 1;
3539
}
3640
}
3741

38-
message DataPreparationNodeStep {
42+
message Step {
3943
string id = 1;
4044
string description = 2;
45+
4146
oneof definition {
4247
ColumnStep column_step = 3;
4348
FilterStep filter_step = 4;
4449
}
45-
DataPreparationNodeStepGenerated generated = 5;
50+
StepGenerated generated = 5;
4651
}
4752

4853
message ColumnStep {
4954
string column_name = 1;
5055
Expression expression = 2;
5156
}
5257

58+
enum FilterType {
59+
FILTER_TYPE_UNSPECIFIED = 0;
60+
ROW_FILTER_KEEP_ROWS = 1;
61+
VALIDATION = 2;
62+
}
63+
5364
message FilterStep {
5465
Expression expression = 1;
66+
optional FilterType filter_type = 2;
5567
}
5668

5769
message Expression {
@@ -98,75 +110,76 @@ message TableReference {
98110
}
99111

100112
message DataPreparationGenerated {
101-
repeated DataPreparationValidationError validation_errors = 1;
113+
repeated ValidationError validation_errors = 1;
102114
optional string location = 2;
103115
}
104116

105-
message DataPreparationNodeGenerated {
106-
repeated DataPreparationSection sections = 1;
117+
message NodeGenerated {
118+
repeated Section sections = 1;
107119
repeated string sources = 2;
108-
repeated DataPreparationValidationError validation_errors = 3;
109-
optional DataPreparationSchema output_schema = 4;
110-
DataPreparationNodeSourceGenerated source_generated = 5;
111-
optional DataPreparationNodeDestinationGenerated destination_generated = 6;
120+
repeated ValidationError validation_errors = 3;
121+
Schema output_schema = 4;
122+
SourceGenerated source_generated = 5;
123+
DestinationGenerated destination_generated = 6;
112124
}
113125

114-
message DataPreparationSection {
115-
DataPreparationSectionType type = 1;
126+
message Section {
127+
SectionType type = 1;
116128
string label = 2;
117129
}
118130

119-
enum DataPreparationSectionType {
131+
enum SectionType {
120132
SECTION_TYPE_UNSPECIFIED = 0;
121133
SECTION_UNPARSEABLE = 1;
122134
SECTION_SOURCE_TABLE = 2;
123135
SECTION_SQL = 3;
124136
SECTION_DESTINATION_TABLE = 4;
137+
SECTION_JOIN = 5;
125138
}
126139

127-
message DataPreparationNodeSourceGenerated {
128-
optional DataPreparationNodeSourceSourceSchema source_schema = 4;
140+
message SourceGenerated {
141+
SourceSchema source_schema = 4;
129142
}
130143

131-
message DataPreparationNodeSourceSourceSchema {
144+
message SourceSchema {
132145
oneof source_schema {
133-
DataPreparationSchema node_schema = 1;
134-
DataPreparationSchema table_schema = 2;
146+
Schema node_schema = 1;
147+
Schema table_schema = 2;
135148
JoinSchema join_schema = 3;
136149
}
137150
}
138151

139152
message JoinSchema {
140-
DataPreparationSchema left_schema = 1;
141-
DataPreparationSchema right_schema = 2;
153+
Schema left_schema = 1;
154+
Schema right_schema = 2;
142155
}
143156

144-
message DataPreparationNodeDestinationGenerated {
145-
optional DataPreparationSchema schema = 1;
157+
message DestinationGenerated {
158+
Schema schema = 1;
146159
}
147160

148-
message DataPreparationNodeStepGenerated {
161+
message StepGenerated {
149162
repeated string source_columns = 1;
150-
repeated DataPreparationValidationError validation_errors = 2;
163+
repeated ValidationError validation_errors = 2;
151164
}
152165

153-
message DataPreparationSchema {
154-
repeated DataPreparationSchemaField field = 1;
166+
message Schema {
167+
repeated Field field = 1;
155168
}
156169

157-
message DataPreparationSchemaField {
170+
message Field {
158171
string name = 1;
159172
optional string type = 2;
160173
optional string mode = 3;
161-
repeated DataPreparationSchemaField fields = 4;
174+
repeated Field fields = 4;
162175
}
163176

164-
message DataPreparationValidationError {
165-
DataPreparationValidationErrorLevel level = 1;
177+
message ValidationError {
178+
ValidationErrorLevel level = 1;
166179
string description = 2;
167180
}
168181

169-
enum DataPreparationValidationErrorLevel {
182+
enum ValidationErrorLevel {
170183
LEVEL_UNSPECIFIED = 0;
171184
LEVEL_WARN = 1;
172185
LEVEL_ERROR = 2;

0 commit comments

Comments
 (0)