Skip to content

Commit 674b1e6

Browse files
authored
Added support for compilation overrides in Data Preparations (#1818)
* Added support for compilation overrides in Data Preparations * Added explicit test for unknown properties. * Fixed tests order * Fixed whitespace * Removed logic to ignore schema mismatch * Add error table to resolution. * Add error table to resolution. * Addressed PR Comments
1 parent bd7d6fb commit 674b1e6

2 files changed

Lines changed: 260 additions & 29 deletions

File tree

core/actions/data_preparation.ts

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
3535
config.filename = resolveActionsConfigFilename(config.filename, configPath);
3636
const dataPreparationAsJson = nativeRequire(config.filename).asJson;
3737
const dataPreparationDefinition = parseDataPreparationDefinitionJson(dataPreparationAsJson);
38-
this.proto.dataPreparationContents = dataform.dataprep.DataPreparation.encode(dataPreparationDefinition).finish();
3938

4039
// Find targets
4140
const targets = getTargets(dataPreparationDefinition);
@@ -46,6 +45,10 @@ export class DataPreparation extends ActionBuilder<dataform.DataPreparation> {
4645
this.applySessionToTarget(target, session.canonicalProjectConfig)
4746
);
4847

48+
// Resolve all table references with compilation overrides and encode resolved proto instance
49+
const resolvedDefinition = applySessionToDataPreparationContents(this, dataPreparationDefinition);
50+
this.proto.dataPreparationContents = dataform.dataprep.DataPreparation.encode(resolvedDefinition).finish();
51+
4952
// Set the unique target key as the first target defined.
5053
// TODO: Remove once multiple targets are supported.
5154
this.proto.target = this.proto.targets[0];
@@ -128,6 +131,70 @@ function parseDataPreparationDefinitionJson(dataPreparationAsJson: {
128131
}
129132
}
130133

134+
function applySessionToDataPreparationContents(
135+
actionBuilder: ActionBuilder<dataform.DataPreparation>,
136+
definition: dataform.dataprep.DataPreparation
137+
): dataform.dataprep.DataPreparation {
138+
const resolvedDataPreparation = dataform.dataprep.DataPreparation.create(definition);
139+
140+
// Resolve error table, if set
141+
const errorTable = definition.configuration?.errorTable;
142+
if (errorTable) {
143+
resolvedDataPreparation.configuration.errorTable =
144+
applySessionToTableReference(actionBuilder, errorTable);
145+
}
146+
147+
// Loop through all nodes and resolve the compilation overrides for
148+
// all source and destination tables.
149+
definition.nodes.forEach((node, index) => {
150+
151+
// Resolve source tables, if set.
152+
const sourceTable = node.source.table;
153+
if (sourceTable) {
154+
resolvedDataPreparation.nodes[index].source.table =
155+
applySessionToTableReference(actionBuilder, sourceTable);
156+
}
157+
158+
// Resolve destination tables, if set.
159+
const destinationTable = node.destination?.table;
160+
if (destinationTable) {
161+
resolvedDataPreparation.nodes[index].destination.table =
162+
applySessionToTableReference(actionBuilder, destinationTable);
163+
}
164+
}
165+
166+
);
167+
168+
return resolvedDataPreparation;
169+
}
170+
171+
function applySessionToTableReference(
172+
actionBuilder: ActionBuilder<dataform.DataPreparation>,
173+
tableReference: dataform.dataprep.ITableReference
174+
): dataform.dataprep.ITableReference {
175+
const target: dataform.ITarget = {
176+
database: tableReference.project,
177+
schema: tableReference.dataset,
178+
name: tableReference.table
179+
}
180+
const resolvedTarget =
181+
actionBuilder.applySessionToTarget(
182+
dataform.Target.create(target),
183+
actionBuilder.session.projectConfig)
184+
// Convert resolved target into a Data Preparation Table Reference
185+
const resolvedTableReference = dataform.dataprep.TableReference.create({
186+
table: resolvedTarget.name
187+
});
188+
if (resolvedTarget.database) {
189+
resolvedTableReference.project = resolvedTarget.database;
190+
}
191+
if (resolvedTarget.schema) {
192+
resolvedTableReference.dataset = resolvedTarget.schema;
193+
}
194+
return resolvedTableReference;
195+
}
196+
197+
131198
function getTargets(definition: dataform.dataprep.DataPreparation): dataform.Target[] {
132199
const targets: dataform.Target[] = [];
133200

core/main_test.ts

Lines changed: 192 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { dump as dumpYaml, load as loadYaml } from "js-yaml";
55
import * as path from "path";
66
import { CompilerFunction, NodeVM } from "vm2";
77

8-
import { decode64, encode64, verifyObjectMatchesProto } from "df/common/protos";
8+
import { decode64, encode64, verifyObjectMatchesProto, VerifyProtoErrorBehaviour } from "df/common/protos";
99
import { compile } from "df/core/compilers";
1010
import { version } from "df/core/version";
1111
import { dataform } from "df/protos/ts";
@@ -913,7 +913,7 @@ nodes:
913913
`
914914

915915
fs.writeFileSync(
916-
path.join(projectDir, "definitions/data_preparation.yaml"),
916+
path.join(projectDir, "definitions/data_preparation.yaml"),
917917
dataPreparationYaml
918918
);
919919

@@ -923,7 +923,8 @@ nodes:
923923
dataform.dataprep.DataPreparation,
924924
dataPreparationAsObject as {
925925
[key: string]: any;
926-
}
926+
},
927+
VerifyProtoErrorBehaviour.DEFAULT
927928
);
928929
const base64encodedContents = encode64(
929930
dataform.dataprep.DataPreparation,
@@ -934,37 +935,200 @@ nodes:
934935

935936
expect(result.compile.compiledGraph.graphErrors.compilationErrors).deep.equals([]);
936937
expect(asPlainObject(result.compile.compiledGraph.dataPreparations)).deep.equals(
937-
asPlainObject([
938-
{
939-
target: {
940-
database: "prj",
941-
schema: "ds",
942-
name: "dest"
943-
},
944-
canonicalTarget: {
945-
database: "prj",
946-
schema: "ds",
947-
name: "dest"
948-
},
949-
targets: [
950-
{
938+
asPlainObject([
939+
{
940+
target: {
951941
database: "prj",
952942
schema: "ds",
953943
name: "dest"
954-
}
955-
],
956-
canonicalTargets: [
957-
{
944+
},
945+
canonicalTarget: {
958946
database: "prj",
959947
schema: "ds",
960948
name: "dest"
961-
}
962-
],
963-
fileName: "definitions/data_preparation.yaml",
964-
// Base64 encoded representation of the data preparation definition proto.
965-
dataPreparationContents: base64encodedContents
966-
}
967-
])
949+
},
950+
targets: [
951+
{
952+
database: "prj",
953+
schema: "ds",
954+
name: "dest"
955+
}
956+
],
957+
canonicalTargets: [
958+
{
959+
database: "prj",
960+
schema: "ds",
961+
name: "dest"
962+
}
963+
],
964+
fileName: "definitions/data_preparation.yaml",
965+
// Base64 encoded representation of the data preparation definition proto.
966+
dataPreparationContents: base64encodedContents
967+
}
968+
])
969+
);
970+
});
971+
972+
test(`data preparations resolves compilation overrides before encoding`, () => {
973+
const projectDir = createSimpleDataPreparationProject();
974+
const dataPreparationYaml = `
975+
configuration:
976+
errorTable:
977+
table: error
978+
nodes:
979+
- id: node1
980+
source:
981+
table:
982+
table: src
983+
generated:
984+
sourceGenerated:
985+
sourceSchema:
986+
tableSchema:
987+
field:
988+
- name: a
989+
type: STRING
990+
mode: NULLABLE
991+
outputSchema:
992+
field:
993+
- name: a
994+
type: INT64
995+
mode: NULLABLE
996+
- id: node2
997+
source:
998+
nodeId: node1
999+
destination:
1000+
table:
1001+
table: dest
1002+
generated:
1003+
sourceGenerated:
1004+
sourceSchema:
1005+
nodeSchema:
1006+
field:
1007+
- name: a
1008+
type: STRING
1009+
mode: NULLABLE
1010+
outputSchema:
1011+
field:
1012+
- name: a
1013+
type: INT64
1014+
mode: NULLABLE
1015+
destinationGenerated:
1016+
schema:
1017+
field:
1018+
- name: a
1019+
type: STRING
1020+
mode: NULLABLE
1021+
`
1022+
1023+
fs.writeFileSync(
1024+
path.join(projectDir, "definitions/data_preparation.yaml"),
1025+
dataPreparationYaml
1026+
);
1027+
1028+
const resolvedYaml = `
1029+
configuration:
1030+
errorTable:
1031+
project: defaultProject
1032+
dataset: defaultDataset
1033+
table: error
1034+
nodes:
1035+
- id: node1
1036+
source:
1037+
table:
1038+
project: defaultProject
1039+
dataset: defaultDataset
1040+
table: src
1041+
generated:
1042+
sourceGenerated:
1043+
sourceSchema:
1044+
tableSchema:
1045+
field:
1046+
- name: a
1047+
type: STRING
1048+
mode: NULLABLE
1049+
outputSchema:
1050+
field:
1051+
- name: a
1052+
type: INT64
1053+
mode: NULLABLE
1054+
- id: node2
1055+
source:
1056+
nodeId: node1
1057+
destination:
1058+
table:
1059+
project: defaultProject
1060+
dataset: defaultDataset
1061+
table: dest
1062+
generated:
1063+
sourceGenerated:
1064+
sourceSchema:
1065+
nodeSchema:
1066+
field:
1067+
- name: a
1068+
type: STRING
1069+
mode: NULLABLE
1070+
outputSchema:
1071+
field:
1072+
- name: a
1073+
type: INT64
1074+
mode: NULLABLE
1075+
destinationGenerated:
1076+
schema:
1077+
field:
1078+
- name: a
1079+
type: STRING
1080+
mode: NULLABLE
1081+
`
1082+
1083+
// Generate Base64 encoded representation of the YAML.
1084+
const dataPreparationAsObject = loadYaml(resolvedYaml);
1085+
const dataPreparationDefinition = verifyObjectMatchesProto(
1086+
dataform.dataprep.DataPreparation,
1087+
dataPreparationAsObject as {
1088+
[key: string]: any;
1089+
},
1090+
VerifyProtoErrorBehaviour.DEFAULT
1091+
);
1092+
const base64encodedContents = encode64(
1093+
dataform.dataprep.DataPreparation,
1094+
dataPreparationDefinition
1095+
);
1096+
1097+
const result = runMainInVm(coreExecutionRequestFromPath(projectDir));
1098+
1099+
expect(result.compile.compiledGraph.graphErrors.compilationErrors).deep.equals([]);
1100+
expect(asPlainObject(result.compile.compiledGraph.dataPreparations)).deep.equals(
1101+
asPlainObject([
1102+
{
1103+
target: {
1104+
database: "defaultProject",
1105+
schema: "defaultDataset",
1106+
name: "dest"
1107+
},
1108+
canonicalTarget: {
1109+
database: "defaultProject",
1110+
schema: "defaultDataset",
1111+
name: "dest"
1112+
},
1113+
targets: [
1114+
{
1115+
database: "defaultProject",
1116+
schema: "defaultDataset",
1117+
name: "dest"
1118+
}
1119+
],
1120+
canonicalTargets: [
1121+
{
1122+
database: "defaultProject",
1123+
schema: "defaultDataset",
1124+
name: "dest"
1125+
}
1126+
],
1127+
fileName: "definitions/data_preparation.yaml",
1128+
// Base64 encoded representation of the data preparation definition proto.
1129+
dataPreparationContents: base64encodedContents
1130+
}
1131+
])
9681132
);
9691133
});
9701134
});

0 commit comments

Comments
 (0)