Skip to content

Commit 6635abc

Browse files
authored
Added proto definitions for Data Preparations (#1788)
* Added proto definitions for Data Preparations * Moved Data preparation protos into a separate file
1 parent 97123fa commit 6635abc

4 files changed

Lines changed: 223 additions & 1 deletion

File tree

protos/BUILD

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
load("@rules_proto//proto:defs.bzl", "proto_library")
2-
load("//tools:ts_proto_library.bzl", "ts_proto_library")
32
load("//testing:build_test.bzl", "build_test")
3+
load("//tools:ts_proto_library.bzl", "ts_proto_library")
44

55
package(default_visibility = ["//visibility:public"])
66

@@ -9,6 +9,7 @@ proto_library(
99
srcs = [
1010
"configs.proto",
1111
"core.proto",
12+
"data_preparation.proto",
1213
"evaluation.proto",
1314
"execution.proto",
1415
"profiles.proto",

protos/configs.proto

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,32 @@ message ActionConfig {
515515
// TODO(ekrekr): add a notebook runtime field definition.
516516
}
517517

518+
message DataPreparationConfig {
519+
// The name of the data preparation.
520+
string name = 1;
521+
522+
// Targets of actions that this action is dependent on.
523+
repeated Target dependency_targets = 2;
524+
525+
// Path to the source file that the contents of the action is loaded from.
526+
string filename = 3;
527+
528+
// A list of user-defined tags with which the action should be labeled.
529+
repeated string tags = 4;
530+
531+
// If set to true, this action will not be executed. However, the action can
532+
// still be depended upon. Useful for temporarily turning off broken
533+
// actions.
534+
bool disabled = 7;
535+
536+
// Description of the data preparation.
537+
string description = 8;
538+
539+
// When set to true, assertions dependent upon any dependency will
540+
// be add as dedpendency to this action
541+
bool depend_on_dependency_assertions = 9;
542+
}
543+
518544
oneof action {
519545
TableConfig table = 1;
520546
ViewConfig view = 2;
@@ -523,6 +549,7 @@ message ActionConfig {
523549
OperationConfig operation = 5;
524550
DeclarationConfig declaration = 6;
525551
NotebookConfig notebook = 7;
552+
DataPreparationConfig data_preparation = 8;
526553
}
527554
}
528555

protos/core.proto

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
syntax = "proto3";
2+
import "data_preparation.proto";
23

34
package dataform;
45

@@ -247,6 +248,24 @@ message NotebookRuntimeOptions {
247248
}
248249
}
249250

251+
// Data Preparation Related entries
252+
message DataPreparation {
253+
// Data preparatiohs can have more than 1 output
254+
repeated Target targets = 1;
255+
256+
repeated Target canonical_targets = 2;
257+
258+
repeated string tags = 3;
259+
260+
repeated Target dependency_targets = 4;
261+
262+
string file_name = 5;
263+
264+
bool disabled = 6;
265+
266+
DataPreparationDefinition data_preparation = 7;
267+
}
268+
250269
message CompiledGraph {
251270
ProjectConfig project_config = 4;
252271

@@ -258,6 +277,7 @@ message CompiledGraph {
258277
// it is not used at runtime.
259278
repeated Test tests = 8;
260279
repeated Notebook notebooks = 12;
280+
repeated DataPreparation data_preparations = 13;
261281

262282
GraphErrors graph_errors = 7;
263283

protos/data_preparation.proto

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
syntax = "proto3";
2+
3+
package dataform;
4+
5+
option java_package = "com.dataform.protos";
6+
option java_multiple_files = true;
7+
8+
option go_package = "github.com/dataform-co/dataform/protos/dataform";
9+
10+
message DataPreparationDefinition {
11+
repeated DataPreparationNode nodes = 1;
12+
DataPreparationGenerated generated = 2;
13+
}
14+
15+
message DataPreparationNode {
16+
string id = 1;
17+
DataPreparationNodeSource source = 2;
18+
repeated DataPreparationNodeStep steps = 3;
19+
DataPreparationNodeGenerated generated = 4;
20+
// Destination BigQuery table(s) are defined within the data preparation
21+
optional DataPreparationNodeDestination destination = 5;
22+
}
23+
24+
message DataPreparationNodeSource {
25+
oneof source {
26+
string node_id = 1;
27+
TableReference table = 2;
28+
Join join = 3;
29+
}
30+
}
31+
32+
message DataPreparationNodeDestination {
33+
oneof destination {
34+
TableReference table = 1;
35+
}
36+
}
37+
38+
message DataPreparationNodeStep {
39+
string id = 1;
40+
string description = 2;
41+
oneof definition {
42+
ColumnStep column_step = 3;
43+
FilterStep filter_step = 4;
44+
}
45+
DataPreparationNodeStepGenerated generated = 5;
46+
}
47+
48+
message ColumnStep {
49+
string column_name = 1;
50+
Expression expression = 2;
51+
}
52+
53+
message FilterStep {
54+
Expression expression = 1;
55+
}
56+
57+
message Expression {
58+
oneof expression {
59+
string sql = 1;
60+
}
61+
}
62+
63+
message Join {
64+
string left_node_id = 1;
65+
string right_node_id = 2;
66+
JoinType join_type = 3;
67+
JoinCondition join_condition = 4;
68+
}
69+
70+
enum JoinType {
71+
JOIN_TYPE_UNSPECIFIED = 0;
72+
JOIN_TYPE_INNER = 1;
73+
JOIN_TYPE_FULL_OUTER = 2;
74+
JOIN_TYPE_LEFT = 3;
75+
JOIN_TYPE_RIGHT = 4;
76+
}
77+
78+
message JoinCondition {
79+
oneof condition {
80+
Expression expression = 1;
81+
JoinKeys keys = 2;
82+
}
83+
}
84+
85+
message JoinKeys {
86+
repeated JoinKey keys = 1;
87+
}
88+
89+
message JoinKey {
90+
string left_column = 1;
91+
string right_column = 2;
92+
}
93+
94+
message TableReference {
95+
string project = 1;
96+
string dataset = 2;
97+
string table = 3;
98+
}
99+
100+
message DataPreparationGenerated {
101+
repeated DataPreparationValidationError validation_errors = 1;
102+
optional string location = 2;
103+
}
104+
105+
message DataPreparationNodeGenerated {
106+
repeated DataPreparationSection sections = 1;
107+
repeated string sources = 2;
108+
repeated DataPreparationValidationError validation_errors = 3;
109+
optional DataPreparationSchema output_schema = 4;
110+
DataPreparationNodeSourceGenerated source_generated = 5;
111+
optional DataPreparationNodeDestinationGenerated destination_generated = 6;
112+
}
113+
114+
message DataPreparationSection {
115+
DataPreparationSectionType type = 1;
116+
string label = 2;
117+
}
118+
119+
enum DataPreparationSectionType {
120+
SECTION_TYPE_UNSPECIFIED = 0;
121+
SECTION_UNPARSEABLE = 1;
122+
SECTION_SOURCE_TABLE = 2;
123+
SECTION_SQL = 3;
124+
SECTION_DESTINATION_TABLE = 4;
125+
}
126+
127+
message DataPreparationNodeSourceGenerated {
128+
optional DataPreparationNodeSourceSourceSchema source_schema = 4;
129+
}
130+
131+
message DataPreparationNodeSourceSourceSchema {
132+
oneof source_schema {
133+
DataPreparationSchema node_schema = 1;
134+
DataPreparationSchema table_schema = 2;
135+
JoinSchema join_schema = 3;
136+
}
137+
}
138+
139+
message JoinSchema {
140+
DataPreparationSchema left_schema = 1;
141+
DataPreparationSchema right_schema = 2;
142+
}
143+
144+
message DataPreparationNodeDestinationGenerated {
145+
optional DataPreparationSchema schema = 1;
146+
}
147+
148+
message DataPreparationNodeStepGenerated {
149+
repeated string source_columns = 1;
150+
repeated DataPreparationValidationError validation_errors = 2;
151+
}
152+
153+
message DataPreparationSchema {
154+
repeated DataPreparationSchemaField field = 1;
155+
}
156+
157+
message DataPreparationSchemaField {
158+
string name = 1;
159+
optional string type = 2;
160+
optional string mode = 3;
161+
repeated DataPreparationSchemaField fields = 4;
162+
}
163+
164+
message DataPreparationValidationError {
165+
DataPreparationValidationErrorLevel level = 1;
166+
string description = 2;
167+
}
168+
169+
enum DataPreparationValidationErrorLevel {
170+
LEVEL_UNSPECIFIED = 0;
171+
LEVEL_WARN = 1;
172+
LEVEL_ERROR = 2;
173+
LEVEL_FATAL = 3;
174+
}

0 commit comments

Comments
 (0)