Skip to content

Commit 86254eb

Browse files
authored
Update Data Preparation processor to remove proto dependency. (#1839)
* Update Data Preparation processor to remove proto dependency. The data preparation in the compiled graph now contains the resolved YAML after compiling. * Fixed compilation issue
1 parent bb36a53 commit 86254eb

3 files changed

Lines changed: 99 additions & 120 deletions

File tree

core/actions/data_preparation.ts

Lines changed: 94 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { dump as dumpYaml } from "js-yaml";
2+
13
import { verifyObjectMatchesProto, VerifyProtoErrorBehaviour } from "df/common/protos";
24
import { ActionBuilder } from "df/core/actions";
35
import { Resolvable } from "df/core/common";
@@ -34,10 +36,12 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
3436

3537
config.filename = resolveActionsConfigFilename(config.filename, configPath);
3638
const dataPreparationAsJson = nativeRequire(config.filename).asJson;
37-
const dataPreparationDefinition = parseDataPreparationDefinitionJson(dataPreparationAsJson);
39+
const dataPreparationDefinition = this.parseDataPreparationDefinitionJson(dataPreparationAsJson);
3840

3941
// Find targets
40-
const targets = getTargets(dataPreparationDefinition);
42+
const targets = this.getTargets(dataPreparationAsJson as {
43+
[key: string]: any;
44+
});
4145
this.proto.targets = targets.map(target =>
4246
this.applySessionToTarget(target, session.projectConfig, config.filename, true)
4347
);
@@ -46,8 +50,8 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
4650
);
4751

4852
// Resolve all table references with compilation overrides and encode resolved proto instance
49-
const resolvedDefinition = applySessionToDataPreparationContents(this, dataPreparationDefinition);
50-
this.proto.dataPreparationContents = dataform.dataprep.DataPreparation.encode(resolvedDefinition).finish();
53+
const resolvedDefinition = this.applySessionToDataPreparationContents(dataPreparationAsJson);
54+
this.proto.dataPreparationYaml = dumpYaml(resolvedDefinition)
5155

5256
// Set the unique target key as the first target defined.
5357
// TODO: Remove once multiple targets are supported.
@@ -108,107 +112,112 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
108112
VerifyProtoErrorBehaviour.SUGGEST_REPORTING_TO_DATAFORM_TEAM
109113
);
110114
}
111-
}
112115

113-
function parseDataPreparationDefinitionJson(dataPreparationAsJson: {
114-
[key: string]: unknown;
115-
}): dataform.dataprep.DataPreparation {
116-
try {
117-
return dataform.dataprep.DataPreparation.create(
118-
verifyObjectMatchesProto(
119-
dataform.dataprep.DataPreparation,
120-
dataPreparationAsJson as {
121-
[key: string]: any;
122-
},
123-
VerifyProtoErrorBehaviour.SHOW_DOCS_LINK
124-
)
125-
);
126-
} catch (e) {
127-
if (e instanceof ReferenceError) {
128-
throw ReferenceError(`Data Preparation parsing error: ${e.message}`);
116+
private applySessionToDataPreparationContents(
117+
definition: {[key: string]: any}
118+
): {[key: string]: any} {
119+
// Resolve error table, if set
120+
// @ts-ignore
121+
const errorTable = definition.configuration?.errorTable;
122+
if (errorTable) {
123+
definition.configuration.errorTable =
124+
this.applySessionToTableReference(errorTable as {[key: string]: string});
129125
}
130-
throw e;
131-
}
132-
}
133-
134-
function applySessionToDataPreparationContents(
135-
actionBuilder: ActionBuilder<dataform.DataPreparation>,
136-
definition: dataform.dataprep.DataPreparation
137-
): dataform.dataprep.DataPreparation {
138-
const resolvedDataPreparation = dataform.dataprep.DataPreparation.create(definition);
139-
140-
// Resolve error table, if set
141-
const errorTable = definition.configuration?.errorTable;
142-
if (errorTable) {
143-
resolvedDataPreparation.configuration.errorTable =
144-
applySessionToTableReference(actionBuilder, errorTable);
145-
}
146126

147-
// Loop through all nodes and resolve the compilation overrides for
148-
// all source and destination tables.
149-
definition.nodes.forEach((node, index) => {
127+
// Loop through all nodes and resolve the compilation overrides for
128+
// all source and destination tables.
129+
if (definition.nodes) {
130+
(definition.nodes as Array<{ [key: string]: any }>).forEach((node, index) => {
150131

151132
// Resolve source tables, if set.
152-
const sourceTable = node.source.table;
133+
const sourceTable = node.source?.table;
153134
if (sourceTable) {
154-
resolvedDataPreparation.nodes[index].source.table =
155-
applySessionToTableReference(actionBuilder, sourceTable);
135+
definition.nodes[index].source.table =
136+
this.applySessionToTableReference(sourceTable as {[key: string]: string});
156137
}
157138

158139
// Resolve destination tables, if set.
159140
const destinationTable = node.destination?.table;
160141
if (destinationTable) {
161-
resolvedDataPreparation.nodes[index].destination.table =
162-
applySessionToTableReference(actionBuilder, destinationTable);
142+
definition.nodes[index].destination.table =
143+
this.applySessionToTableReference(destinationTable as {[key: string]: string});
163144
}
164-
}
145+
});
146+
}
165147

166-
);
148+
return definition;
149+
}
167150

168-
return resolvedDataPreparation;
169-
}
151+
private applySessionToTableReference(
152+
tableReference: {[key: string]: string}
153+
): object {
154+
const target: dataform.ITarget = {
155+
database: tableReference.project,
156+
schema: tableReference.dataset,
157+
name: tableReference.table
158+
}
159+
const resolvedTarget =
160+
this.applySessionToTarget(
161+
dataform.Target.create(target),
162+
this.session.projectConfig)
163+
// Convert resolved target into a Data Preparation Table Reference
164+
let resolvedTableReference : {[key: string]: string} = {
165+
table: resolvedTarget.name,
166+
}
170167

171-
function applySessionToTableReference(
172-
actionBuilder: ActionBuilder<dataform.DataPreparation>,
173-
tableReference: dataform.dataprep.ITableReference
174-
): dataform.dataprep.ITableReference {
175-
const target: dataform.ITarget = {
176-
database: tableReference.project,
177-
schema: tableReference.dataset,
178-
name: tableReference.table
179-
}
180-
const resolvedTarget =
181-
actionBuilder.applySessionToTarget(
182-
dataform.Target.create(target),
183-
actionBuilder.session.projectConfig)
184-
// Convert resolved target into a Data Preparation Table Reference
185-
const resolvedTableReference = dataform.dataprep.TableReference.create({
186-
table: resolvedTarget.name
187-
});
188-
if (resolvedTarget.database) {
189-
resolvedTableReference.project = resolvedTarget.database;
168+
// Ensure project and dataset field are added in order
169+
if (resolvedTarget.schema) {
170+
resolvedTableReference = { dataset: resolvedTarget.schema, ...resolvedTableReference }
171+
}
172+
if (resolvedTarget.database) {
173+
resolvedTableReference = { project: resolvedTarget.database, ...resolvedTableReference }
174+
}
175+
return resolvedTableReference;
190176
}
191-
if (resolvedTarget.schema) {
192-
resolvedTableReference.dataset = resolvedTarget.schema;
177+
178+
private parseDataPreparationDefinitionJson(dataPreparationAsJson: {
179+
[key: string]: unknown;
180+
}): dataform.dataprep.DataPreparation {
181+
try {
182+
return dataform.dataprep.DataPreparation.create(
183+
verifyObjectMatchesProto(
184+
dataform.dataprep.DataPreparation,
185+
dataPreparationAsJson as {
186+
[key: string]: any;
187+
},
188+
VerifyProtoErrorBehaviour.SHOW_DOCS_LINK
189+
)
190+
);
191+
} catch (e) {
192+
if (e instanceof ReferenceError) {
193+
throw ReferenceError(`Data Preparation parsing error: ${e.message}`);
194+
}
195+
throw e;
196+
}
193197
}
194-
return resolvedTableReference;
195-
}
196198

197199

198-
function getTargets(definition: dataform.dataprep.DataPreparation): dataform.Target[] {
199-
const targets: dataform.Target[] = [];
200+
private getTargets(definition: {
201+
[key: string]: any;
202+
}): dataform.Target[] {
203+
const targets: dataform.Target[] = [];
204+
205+
if (definition.nodes) {
206+
(definition.nodes as Array<{ [key: string]: any }>).forEach(node => {
207+
const table = node.destination?.table;
208+
if (table) {
209+
const compiledGraphTarget: dataform.ITarget = {
210+
database: table.project,
211+
schema: table.dataset,
212+
name: table.table
213+
};
214+
targets.push(dataform.Target.create(compiledGraphTarget));
200215

201-
definition.nodes.forEach(node => {
202-
const table = node.destination?.table;
203-
if (table) {
204-
const compiledGraphTarget: dataform.ITarget = {
205-
database: table.project,
206-
schema: table.dataset,
207-
name: table.table
208-
};
209-
targets.push(dataform.Target.create(compiledGraphTarget));
216+
}
217+
});
210218
}
211-
});
212219

213-
return targets;
220+
return targets;
221+
}
214222
}
223+

core/main_test.ts

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -917,20 +917,6 @@ nodes:
917917
dataPreparationYaml
918918
);
919919

920-
// Generate Base64 encoded representation of the YAML.
921-
const dataPreparationAsObject = loadYaml(dataPreparationYaml);
922-
const dataPreparationDefinition = verifyObjectMatchesProto(
923-
dataform.dataprep.DataPreparation,
924-
dataPreparationAsObject as {
925-
[key: string]: any;
926-
},
927-
VerifyProtoErrorBehaviour.DEFAULT
928-
);
929-
const base64encodedContents = encode64(
930-
dataform.dataprep.DataPreparation,
931-
dataPreparationDefinition
932-
);
933-
934920
const result = runMainInVm(coreExecutionRequestFromPath(projectDir));
935921

936922
expect(result.compile.compiledGraph.graphErrors.compilationErrors).deep.equals([]);
@@ -962,8 +948,7 @@ nodes:
962948
}
963949
],
964950
fileName: "definitions/data_preparation.yaml",
965-
// Base64 encoded representation of the data preparation definition proto.
966-
dataPreparationContents: base64encodedContents
951+
dataPreparationYaml: dumpYaml(loadYaml(dataPreparationYaml))
967952
}
968953
])
969954
);
@@ -1080,20 +1065,6 @@ nodes:
10801065
mode: NULLABLE
10811066
`
10821067

1083-
// Generate Base64 encoded representation of the YAML.
1084-
const dataPreparationAsObject = loadYaml(resolvedYaml);
1085-
const dataPreparationDefinition = verifyObjectMatchesProto(
1086-
dataform.dataprep.DataPreparation,
1087-
dataPreparationAsObject as {
1088-
[key: string]: any;
1089-
},
1090-
VerifyProtoErrorBehaviour.DEFAULT
1091-
);
1092-
const base64encodedContents = encode64(
1093-
dataform.dataprep.DataPreparation,
1094-
dataPreparationDefinition
1095-
);
1096-
10971068
const result = runMainInVm(coreExecutionRequestFromPath(projectDir));
10981069

10991070
expect(result.compile.compiledGraph.graphErrors.compilationErrors).deep.equals([]);
@@ -1125,8 +1096,7 @@ nodes:
11251096
}
11261097
],
11271098
fileName: "definitions/data_preparation.yaml",
1128-
// Base64 encoded representation of the data preparation definition proto.
1129-
dataPreparationContents: base64encodedContents
1099+
dataPreparationYaml: dumpYaml(loadYaml(resolvedYaml))
11301100
}
11311101
])
11321102
);

protos/core.proto

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,10 @@ message DataPreparation {
267267

268268
bool disabled = 6;
269269

270-
// Binary encoded contents of the Data preparation proto
271-
bytes data_preparation_contents = 10;
270+
// YAML file containing the data preparation YAML with resolved dependencies.
271+
string data_preparation_yaml = 11;
272272

273-
reserved 7;
273+
reserved 7, 10;
274274
}
275275

276276
message CompiledGraph {

0 commit comments

Comments
 (0)