-
Notifications
You must be signed in to change notification settings - Fork 380
Expand file tree
/
Copy pathdefinition.py
More file actions
2499 lines (2067 loc) · 94.3 KB
/
definition.py
File metadata and controls
2499 lines (2067 loc) · 94.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import annotations
import sys
import typing as t
from collections import defaultdict
from datetime import datetime, timedelta
from enum import IntEnum
import logging
from functools import cached_property, lru_cache
from pathlib import Path
from pydantic import Field
from sqlglot import exp
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
from sqlmesh.core.config.common import (
TableNamingConvention,
VirtualEnvironmentMode,
EnvironmentSuffixTarget,
)
from sqlmesh.core import constants as c
from sqlmesh.core.audit import StandaloneAudit
from sqlmesh.core.macros import call_macro
from sqlmesh.core.model import Model, ModelKindMixin, ModelKindName, ViewKind, CustomKind
from sqlmesh.core.model.definition import _Model
from sqlmesh.core.node import IntervalUnit, NodeType
from sqlmesh.utils import sanitize_name, unique
from sqlmesh.utils.dag import DAG
from sqlmesh.utils.date import (
TimeLike,
is_date,
make_inclusive,
make_exclusive,
make_inclusive_end,
now,
now_timestamp,
time_like_to_str,
to_date,
to_datetime,
to_ds,
to_timestamp,
to_ts,
validate_date_range,
yesterday,
)
from sqlmesh.utils.errors import SQLMeshError, SignalEvalError
from sqlmesh.utils.metaprogramming import (
format_evaluated_code_exception,
Executable,
)
from sqlmesh.utils.hashing import hash_data, md5
from sqlmesh.utils.pydantic import PydanticModel, field_validator
if t.TYPE_CHECKING:
from sqlglot.dialects.dialect import DialectType
from sqlmesh.core.environment import EnvironmentNamingInfo
from sqlmesh.core.context import ExecutionContext
Interval = t.Tuple[int, int]
Intervals = t.List[Interval]
Node = t.Annotated[t.Union[Model, StandaloneAudit], Field(discriminator="source_type")]
logger = logging.getLogger(__name__)
class SnapshotChangeCategory(IntEnum):
"""
Values are ordered by decreasing severity and that ordering is required.
BREAKING: The change requires that snapshot modified and downstream dependencies be rebuilt
NON_BREAKING: The change requires that only the snapshot modified be rebuilt
FORWARD_ONLY: The change requires no rebuilding
INDIRECT_BREAKING: The change was caused indirectly and is breaking.
INDIRECT_NON_BREAKING: The change was caused indirectly by a non-breaking change.
METADATA: The change was caused by a metadata update.
"""
BREAKING = 1
NON_BREAKING = 2
# FORWARD_ONLY category is deprecated and is kept for backwards compatibility.
FORWARD_ONLY = 3
INDIRECT_BREAKING = 4
INDIRECT_NON_BREAKING = 5
METADATA = 6
@property
def is_breaking(self) -> bool:
return self == self.BREAKING
@property
def is_non_breaking(self) -> bool:
return self == self.NON_BREAKING
@property
def is_forward_only(self) -> bool:
return self == self.FORWARD_ONLY
@property
def is_metadata(self) -> bool:
return self == self.METADATA
@property
def is_indirect_breaking(self) -> bool:
return self == self.INDIRECT_BREAKING
@property
def is_indirect_non_breaking(self) -> bool:
return self == self.INDIRECT_NON_BREAKING
def __repr__(self) -> str:
return self.name
class SnapshotFingerprint(PydanticModel, frozen=True):
data_hash: str
metadata_hash: str
parent_data_hash: str = "0"
parent_metadata_hash: str = "0"
def to_version(self) -> str:
return hash_data([self.data_hash, self.parent_data_hash])
def to_identifier(self) -> str:
return hash_data(
[
self.data_hash,
self.metadata_hash,
self.parent_data_hash,
self.parent_metadata_hash,
]
)
def __str__(self) -> str:
return f"SnapshotFingerprint<{self.to_identifier()}, data: {self.data_hash}, meta: {self.metadata_hash}, pdata: {self.parent_data_hash}, pmeta: {self.parent_metadata_hash}>"
class SnapshotId(PydanticModel, frozen=True):
name: str
identifier: str
@property
def snapshot_id(self) -> SnapshotId:
"""Helper method to return self."""
return self
def __eq__(self, other: t.Any) -> bool:
return (
isinstance(other, self.__class__)
and self.name == other.name
and self.identifier == other.identifier
)
def __hash__(self) -> int:
return hash((self.__class__, self.name, self.identifier))
def __lt__(self, other: SnapshotId) -> bool:
return self.name < other.name
def __str__(self) -> str:
return f"SnapshotId<{self.name}: {self.identifier}>"
class SnapshotIdBatch(PydanticModel, frozen=True):
snapshot_id: SnapshotId
batch_id: int
class SnapshotNameVersion(PydanticModel, frozen=True):
name: str
version: str
@property
def name_version(self) -> SnapshotNameVersion:
"""Helper method to return self."""
return self
class SnapshotIntervals(PydanticModel):
name: str
identifier: t.Optional[str]
version: str
dev_version: t.Optional[str]
intervals: Intervals = []
dev_intervals: Intervals = []
pending_restatement_intervals: Intervals = []
last_altered_ts: t.Optional[int] = None
dev_last_altered_ts: t.Optional[int] = None
@property
def snapshot_id(self) -> t.Optional[SnapshotId]:
if not self.identifier:
return None
return SnapshotId(name=self.name, identifier=self.identifier)
@property
def name_version(self) -> SnapshotNameVersion:
return SnapshotNameVersion(name=self.name, version=self.version)
def add_interval(self, start: int, end: int) -> None:
self._add_interval(start, end, "intervals")
def add_dev_interval(self, start: int, end: int) -> None:
self._add_interval(start, end, "dev_intervals")
def add_pending_restatement_interval(self, start: int, end: int) -> None:
self._add_interval(start, end, "pending_restatement_intervals")
def update_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None:
self._update_last_altered_ts(last_altered_ts, "last_altered_ts")
def update_dev_last_altered_ts(self, last_altered_ts: t.Optional[int]) -> None:
self._update_last_altered_ts(last_altered_ts, "dev_last_altered_ts")
def remove_interval(self, start: int, end: int) -> None:
self._remove_interval(start, end, "intervals")
def remove_dev_interval(self, start: int, end: int) -> None:
self._remove_interval(start, end, "dev_intervals")
def remove_pending_restatement_interval(self, start: int, end: int) -> None:
self._remove_interval(start, end, "pending_restatement_intervals")
def is_empty(self) -> bool:
return (
not self.intervals and not self.dev_intervals and not self.pending_restatement_intervals
)
def _add_interval(self, start: int, end: int, interval_attr: str) -> None:
target_intervals = getattr(self, interval_attr)
target_intervals = merge_intervals([*target_intervals, (start, end)])
setattr(self, interval_attr, target_intervals)
def _update_last_altered_ts(
self, last_altered_ts: t.Optional[int], last_altered_attr: str
) -> None:
if last_altered_ts:
existing_last_altered_ts = getattr(self, last_altered_attr)
setattr(self, last_altered_attr, max(existing_last_altered_ts or 0, last_altered_ts))
def _remove_interval(self, start: int, end: int, interval_attr: str) -> None:
target_intervals = getattr(self, interval_attr)
target_intervals = remove_interval(target_intervals, start, end)
setattr(self, interval_attr, target_intervals)
class SnapshotDataVersion(PydanticModel, frozen=True):
fingerprint: SnapshotFingerprint
version: str
dev_version_: t.Optional[str] = Field(default=None, alias="dev_version")
change_category: t.Optional[SnapshotChangeCategory] = None
physical_schema_: t.Optional[str] = Field(default=None, alias="physical_schema")
dev_table_suffix: str
table_naming_convention: TableNamingConvention = Field(default=TableNamingConvention.default)
virtual_environment_mode: VirtualEnvironmentMode = Field(default=VirtualEnvironmentMode.default)
def snapshot_id(self, name: str) -> SnapshotId:
return SnapshotId(name=name, identifier=self.fingerprint.to_identifier())
@property
def dev_version(self) -> str:
return self.dev_version_ or self.fingerprint.to_version()
@property
def physical_schema(self) -> str:
# The physical schema here is optional to maintain backwards compatibility with
# records stored by previous versions of SQLMesh.
return self.physical_schema_ or c.SQLMESH
@property
def data_version(self) -> SnapshotDataVersion:
return self
@property
def is_new_version(self) -> bool:
"""Returns whether or not this version is new and requires a backfill."""
return self.fingerprint.to_version() == self.version
class QualifiedViewName(PydanticModel, frozen=True):
catalog: t.Optional[str] = None
schema_name: t.Optional[str] = None
table: str
def for_environment(
self, environment_naming_info: EnvironmentNamingInfo, dialect: DialectType = None
) -> str:
return exp.table_name(self.table_for_environment(environment_naming_info, dialect=dialect))
def table_for_environment(
self, environment_naming_info: EnvironmentNamingInfo, dialect: DialectType = None
) -> exp.Table:
return exp.table_(
self.table_name_for_environment(environment_naming_info, dialect=dialect),
db=self.schema_for_environment(environment_naming_info, dialect=dialect),
catalog=self.catalog_for_environment(environment_naming_info, dialect=dialect),
)
def catalog_for_environment(
self, environment_naming_info: EnvironmentNamingInfo, dialect: DialectType = None
) -> t.Optional[str]:
catalog_name: t.Optional[str] = None
if environment_naming_info.is_dev and environment_naming_info.suffix_target.is_catalog:
catalog_name = f"{self.catalog}__{environment_naming_info.name}"
elif environment_naming_info.catalog_name_override:
catalog_name = environment_naming_info.catalog_name_override
if catalog_name:
return (
normalize_identifiers(catalog_name, dialect=dialect).name
if environment_naming_info.normalize_name
else catalog_name
)
return self.catalog
def schema_for_environment(
self, environment_naming_info: EnvironmentNamingInfo, dialect: DialectType = None
) -> str:
normalize = environment_naming_info.normalize_name
if self.schema_name:
schema = self.schema_name
else:
schema = c.DEFAULT_SCHEMA
if normalize:
schema = normalize_identifiers(schema, dialect=dialect).name
if environment_naming_info.is_dev and environment_naming_info.suffix_target.is_schema:
env_name = environment_naming_info.name
if normalize:
env_name = normalize_identifiers(env_name, dialect=dialect).name
schema = f"{schema}__{env_name}"
return schema
def table_name_for_environment(
self, environment_naming_info: EnvironmentNamingInfo, dialect: DialectType = None
) -> str:
table = self.table
if environment_naming_info.is_dev and environment_naming_info.suffix_target.is_table:
env_name = environment_naming_info.name
if environment_naming_info.normalize_name:
env_name = normalize_identifiers(env_name, dialect=dialect).name
table = f"{table}__{env_name}"
return table
class SnapshotInfoMixin(ModelKindMixin):
name: str
dev_version_: t.Optional[str]
change_category: t.Optional[SnapshotChangeCategory]
fingerprint: SnapshotFingerprint
previous_versions: t.Tuple[SnapshotDataVersion, ...]
# Added to support Migration # 34 (default catalog)
# This can be removed from this model once Pydantic 1 support is dropped (must remain in `Snapshot` though)
base_table_name_override: t.Optional[str]
dev_table_suffix: str
table_naming_convention: TableNamingConvention
forward_only: bool
@cached_property
def identifier(self) -> str:
return self.fingerprint.to_identifier()
@cached_property
def snapshot_id(self) -> SnapshotId:
return SnapshotId(name=self.name, identifier=self.identifier)
@property
def qualified_view_name(self) -> QualifiedViewName:
view_name = exp.to_table(self.fully_qualified_table or self.name)
return QualifiedViewName(
catalog=view_name.catalog or None,
schema_name=view_name.db or None,
table=view_name.name,
)
@property
def previous_version(self) -> t.Optional[SnapshotDataVersion]:
"""Helper method to get the previous data version."""
if self.previous_versions:
return self.previous_versions[-1]
return None
@property
def dev_version(self) -> str:
return self.dev_version_ or self.fingerprint.to_version()
@property
def physical_schema(self) -> str:
raise NotImplementedError
@property
def data_version(self) -> SnapshotDataVersion:
raise NotImplementedError
@property
def is_new_version(self) -> bool:
raise NotImplementedError
@cached_property
def fully_qualified_table(self) -> t.Optional[exp.Table]:
raise NotImplementedError
@property
def virtual_environment_mode(self) -> VirtualEnvironmentMode:
raise NotImplementedError
@property
def is_forward_only(self) -> bool:
return self.forward_only or self.change_category == SnapshotChangeCategory.FORWARD_ONLY
@property
def is_metadata(self) -> bool:
return self.change_category == SnapshotChangeCategory.METADATA
@property
def is_indirect_non_breaking(self) -> bool:
return self.change_category == SnapshotChangeCategory.INDIRECT_NON_BREAKING
@property
def is_no_rebuild(self) -> bool:
"""Returns true if this snapshot doesn't require a rebuild in production."""
return self.forward_only or self.change_category in (
SnapshotChangeCategory.FORWARD_ONLY, # Backwards compatibility
SnapshotChangeCategory.METADATA,
SnapshotChangeCategory.INDIRECT_NON_BREAKING,
)
@property
def is_no_preview(self) -> bool:
"""Returns true if this snapshot doesn't require a preview in development."""
return self.forward_only and self.change_category in (
SnapshotChangeCategory.METADATA,
SnapshotChangeCategory.INDIRECT_NON_BREAKING,
)
@property
def all_versions(self) -> t.Tuple[SnapshotDataVersion, ...]:
"""Returns previous versions with the current version trimmed to DATA_VERSION_LIMIT."""
return (*self.previous_versions, self.data_version)[-c.DATA_VERSION_LIMIT :]
def display_name(
self,
environment_naming_info: EnvironmentNamingInfo,
default_catalog: t.Optional[str],
dialect: DialectType = None,
) -> str:
"""
Returns the model name as a qualified view name.
This is just used for presenting information back to the user and `qualified_view_name` should be used
when wanting a view name in all other cases.
"""
return display_name(self, environment_naming_info, default_catalog, dialect=dialect)
def data_hash_matches(self, other: t.Optional[SnapshotInfoMixin | SnapshotDataVersion]) -> bool:
return other is not None and self.fingerprint.data_hash == other.fingerprint.data_hash
def _table_name(self, version: str, is_deployable: bool) -> str:
"""Full table name pointing to the materialized location of the snapshot.
Args:
version: The snapshot version.
is_deployable: Indicates whether to return the table name for deployment to production.
"""
if self.is_external:
return self.name
if is_deployable and self.virtual_environment_mode.is_dev_only:
# Use the model name as is if the target is deployable and the virtual environment mode is set to dev-only
return self.name
is_dev_table = not is_deployable
if is_dev_table:
version = self.dev_version
if self.fully_qualified_table is None:
raise SQLMeshError(
f"Tried to get a table name for a snapshot that does not have a table. {self.name}"
)
# We want to exclude the catalog from the name but still include catalog when determining the fqn
# for the table.
if self.base_table_name_override:
base_table_name = self.base_table_name_override
else:
fqt = self.fully_qualified_table.copy()
fqt.set("catalog", None)
base_table_name = fqt.sql()
return table_name(
self.physical_schema,
base_table_name,
version,
catalog=self.fully_qualified_table.catalog,
suffix=self.dev_table_suffix if is_dev_table else None,
naming_convention=self.table_naming_convention,
)
@property
def node_type(self) -> NodeType:
raise NotImplementedError
@property
def is_model(self) -> bool:
return self.node_type == NodeType.MODEL
@property
def is_audit(self) -> bool:
return self.node_type == NodeType.AUDIT
class SnapshotTableInfo(PydanticModel, SnapshotInfoMixin, frozen=True):
name: str
fingerprint: SnapshotFingerprint
version: str
dev_version_: t.Optional[str] = Field(default=None, alias="dev_version")
physical_schema_: str = Field(alias="physical_schema")
parents: t.Tuple[SnapshotId, ...]
previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
change_category: t.Optional[SnapshotChangeCategory] = None
kind_name: t.Optional[ModelKindName] = None
node_type_: NodeType = Field(default=NodeType.MODEL, alias="node_type")
# Added to support Migration # 34 (default catalog)
# This can be removed from this model once Pydantic 1 support is dropped (must remain in `Snapshot` though)
base_table_name_override: t.Optional[str] = None
custom_materialization: t.Optional[str] = None
dev_table_suffix: str
model_gateway: t.Optional[str] = None
forward_only: bool = False
table_naming_convention: TableNamingConvention = TableNamingConvention.default
virtual_environment_mode_: VirtualEnvironmentMode = Field(
default=VirtualEnvironmentMode.default, alias="virtual_environment_mode"
)
def __lt__(self, other: SnapshotTableInfo) -> bool:
return self.name < other.name
def __eq__(self, other: t.Any) -> bool:
return isinstance(other, SnapshotTableInfo) and self.fingerprint == other.fingerprint
def __hash__(self) -> int:
return hash((self.__class__, self.name, self.fingerprint))
def table_name(self, is_deployable: bool = True) -> str:
"""Full table name pointing to the materialized location of the snapshot.
Args:
is_deployable: Indicates whether to return the table name for deployment to production.
"""
return self._table_name(self.version, is_deployable)
@property
def physical_schema(self) -> str:
return self.physical_schema_
@cached_property
def fully_qualified_table(self) -> exp.Table:
return exp.to_table(self.name)
@property
def table_info(self) -> SnapshotTableInfo:
"""Helper method to return self."""
return self
@property
def virtual_environment_mode(self) -> VirtualEnvironmentMode:
return self.virtual_environment_mode_
@property
def data_version(self) -> SnapshotDataVersion:
return SnapshotDataVersion(
fingerprint=self.fingerprint,
version=self.version,
dev_version=self.dev_version,
change_category=self.change_category,
physical_schema=self.physical_schema,
dev_table_suffix=self.dev_table_suffix,
table_naming_convention=self.table_naming_convention,
virtual_environment_mode=self.virtual_environment_mode,
)
@property
def is_new_version(self) -> bool:
"""Returns whether or not this version is new and requires a backfill."""
return self.fingerprint.to_version() == self.version
@property
def model_kind_name(self) -> t.Optional[ModelKindName]:
return self.kind_name
@property
def node_type(self) -> NodeType:
return self.node_type_
@property
def name_version(self) -> SnapshotNameVersion:
"""Returns the name and version of the snapshot."""
return SnapshotNameVersion(name=self.name, version=self.version)
@property
def id_and_version(self) -> SnapshotIdAndVersion:
return SnapshotIdAndVersion(
name=self.name,
kind_name=self.kind_name,
identifier=self.identifier,
version=self.version,
dev_version=self.dev_version,
fingerprint=self.fingerprint,
)
class SnapshotIdAndVersion(PydanticModel, ModelKindMixin):
"""A stripped down version of a snapshot that is used in situations where we want to fetch the main fields of the snapshots table
without the overhead of parsing the full snapshot payload and fetching intervals.
"""
name: str
version: str
kind_name_: t.Optional[ModelKindName] = Field(default=None, alias="kind_name")
dev_version_: t.Optional[str] = Field(alias="dev_version")
identifier: str
fingerprint_: t.Union[str, SnapshotFingerprint] = Field(alias="fingerprint")
@property
def snapshot_id(self) -> SnapshotId:
return SnapshotId(name=self.name, identifier=self.identifier)
@property
def id_and_version(self) -> SnapshotIdAndVersion:
return self
@property
def name_version(self) -> SnapshotNameVersion:
return SnapshotNameVersion(name=self.name, version=self.version)
@property
def fingerprint(self) -> SnapshotFingerprint:
value = self.fingerprint_
if isinstance(value, str):
self.fingerprint_ = value = SnapshotFingerprint.parse_raw(value)
return value
@property
def dev_version(self) -> str:
return self.dev_version_ or self.fingerprint.to_version()
@property
def model_kind_name(self) -> t.Optional[ModelKindName]:
return self.kind_name_
def display_name(
self,
environment_naming_info: EnvironmentNamingInfo,
default_catalog: t.Optional[str],
dialect: DialectType = None,
) -> str:
return model_display_name(
self.name, environment_naming_info, default_catalog, dialect=dialect
)
class Snapshot(PydanticModel, SnapshotInfoMixin):
"""A snapshot represents a node at a certain point in time.
Snapshots are used to encapsulate everything needed to evaluate a node.
They are standalone objects that hold all state and dynamic content necessary
to render a node's query including things like macros. Snapshots also store intervals
(timestamp ranges for what data we've processed).
Nodes can be dynamically rendered due to macros. Rendering a node to its full extent
requires storing variables and macro definitions. We store all of the macro definitions and
global variable references in `python_env` in raw text to avoid pickling. The helper methods
to achieve this are defined in utils.metaprogramming.
Args:
name: The snapshot name which is the same as the node name and should be unique per node.
fingerprint: A unique hash of the node definition so that nodes can be reused across environments.
node: Node object that the snapshot encapsulates.
parents: The list of parent snapshots (upstream dependencies).
intervals: List of [start, end) intervals showing which time ranges a snapshot has data for.
dev_intervals: List of [start, end) intervals showing development intervals (forward-only).
created_ts: Epoch millis timestamp when a snapshot was first created.
updated_ts: Epoch millis timestamp when a snapshot was last updated.
ttl: The time-to-live of a snapshot determines when it should be deleted after it's no longer referenced
in any environment.
previous: The snapshot data version that this snapshot was based on. If this snapshot is new, then previous will be None.
version: User specified version for a snapshot that is used for physical storage.
By default, the version is the fingerprint, but not all changes to nodes require a backfill.
If a user passes a previous version, that will be used instead and no backfill will be required.
change_category: User specified change category indicating which nodes require backfill from node changes made in this snapshot.
unpaused_ts: The timestamp which indicates when this snapshot was unpaused. Unpaused means that
this snapshot is evaluated on a recurring basis. None indicates that this snapshot is paused.
effective_from: The timestamp which indicates when this snapshot should be considered effective.
Applicable for forward-only snapshots only.
migrated: Whether or not this snapshot has been created as a result of migration.
unrestorable: Whether or not this snapshot can be used to revert its model to a previous version.
next_auto_restatement_ts: The timestamp which indicates when is the next time this snapshot should be restated.
table_naming_convention: Convention to follow when generating the physical table name
"""
name: str
fingerprint: SnapshotFingerprint
physical_schema_: t.Optional[str] = Field(default=None, alias="physical_schema")
node: Node
parents: t.Tuple[SnapshotId, ...]
intervals: Intervals = []
dev_intervals: Intervals = []
pending_restatement_intervals: Intervals = []
created_ts: int
updated_ts: int
ttl: str
previous_versions: t.Tuple[SnapshotDataVersion, ...] = ()
version: t.Optional[str] = None
dev_version_: t.Optional[str] = Field(default=None, alias="dev_version")
change_category: t.Optional[SnapshotChangeCategory] = None
unpaused_ts: t.Optional[int] = None
effective_from: t.Optional[TimeLike] = None
migrated: bool = False
unrestorable: bool = False
# Added to support Migration # 34 (default catalog)
base_table_name_override: t.Optional[str] = None
next_auto_restatement_ts: t.Optional[int] = None
dev_table_suffix: str = "dev"
table_naming_convention: TableNamingConvention = TableNamingConvention.default
forward_only: bool = False
# Physical table last modified timestamp, not to be confused with the "updated_ts" field
# which is for the snapshot record itself
last_altered_ts: t.Optional[int] = None
dev_last_altered_ts: t.Optional[int] = None
@field_validator("ttl")
@classmethod
def _time_delta_must_be_positive(cls, v: str) -> str:
current_time = now()
if to_datetime(v, current_time) < current_time:
raise ValueError(
"Must be positive. Use the 'in' keyword to denote a positive time interval. For example, 'in 7 days'."
)
return v
@staticmethod
def hydrate_with_intervals_by_version(
snapshots: t.Iterable[Snapshot],
intervals: t.Iterable[SnapshotIntervals],
) -> t.List[Snapshot]:
"""Hydrates target snapshots with given intervals.
This will match snapshots with intervals by name and version rather than identifiers.
Args:
snapshots: Target snapshots.
intervals: Target snapshot intervals.
Returns:
List of target snapshots with hydrated intervals.
"""
intervals_by_name_version = defaultdict(list)
for interval in intervals:
intervals_by_name_version[(interval.name, interval.version)].append(interval)
result = []
for snapshot in snapshots:
snapshot_intervals = intervals_by_name_version.get(
(snapshot.name, snapshot.version_get_or_generate()), []
)
for interval in snapshot_intervals:
snapshot.merge_intervals(interval)
result.append(snapshot)
return result
@classmethod
def from_node(
cls,
node: Node,
*,
nodes: t.Dict[str, Node],
ttl: str = c.DEFAULT_SNAPSHOT_TTL,
version: t.Optional[str] = None,
cache: t.Optional[t.Dict[str, SnapshotFingerprint]] = None,
table_naming_convention: TableNamingConvention = TableNamingConvention.default,
) -> Snapshot:
"""Creates a new snapshot for a node.
Args:
Node: Node to snapshot.
nodes: Dictionary of all nodes in the graph to make the fingerprint dependent on parent changes.
If no dictionary is passed in the fingerprint will not be dependent on a node's parents.
ttl: A TTL to determine how long orphaned (snapshots that are not promoted anywhere) should live.
version: The version that a snapshot is associated with. Usually set during the planning phase.
cache: Cache of node name to fingerprints.
table_naming_convention: Convention to follow when generating the physical table name
Returns:
The newly created snapshot.
"""
created_ts = now_timestamp()
return cls(
name=node.fqn,
fingerprint=fingerprint_from_node(
node,
nodes=nodes,
cache=cache,
),
node=node,
parents=tuple(
SnapshotId(
name=parent_node.fqn,
identifier=fingerprint_from_node(
parent_node,
nodes=nodes,
cache=cache,
).to_identifier(),
)
for parent_node in _parents_from_node(node, nodes).values()
),
intervals=[],
dev_intervals=[],
created_ts=created_ts,
updated_ts=created_ts,
ttl=ttl,
version=version,
table_naming_convention=table_naming_convention,
)
def __eq__(self, other: t.Any) -> bool:
return isinstance(other, Snapshot) and self.fingerprint == other.fingerprint
def __hash__(self) -> int:
return hash((self.__class__, self.name, self.fingerprint))
def __lt__(self, other: Snapshot) -> bool:
return self.name < other.name
def add_interval(self, start: TimeLike, end: TimeLike, is_dev: bool = False) -> None:
"""Add a newly processed time interval to the snapshot.
The actual stored intervals are [start_ts, end_ts) or start epoch timestamp inclusive and end epoch
timestamp exclusive. This allows merging of ranges to be easier.
Args:
start: The start date/time of the interval (inclusive)
end: The end date/time of the interval. If end is a date, then it is considered inclusive.
If it is a datetime object, then it is exclusive.
is_dev: Indicates whether the given interval is being added while in development mode.
"""
if to_timestamp(start) > to_timestamp(end):
raise ValueError(
f"Attempted to add an Invalid interval ({start}, {end}) to snapshot {self.snapshot_id}"
)
start_ts, end_ts = self.inclusive_exclusive(start, end, strict=False, expand=False)
if start_ts >= end_ts:
# Skipping partial interval.
return
intervals = self.dev_intervals if is_dev else self.intervals
intervals.append((start_ts, end_ts))
if len(intervals) < 2:
return
merged_intervals = merge_intervals(intervals)
if is_dev:
self.dev_intervals = merged_intervals
else:
self.intervals = merged_intervals
def remove_interval(self, interval: Interval) -> None:
"""Remove an interval from the snapshot.
Args:
interval: The interval to remove.
"""
self.intervals = remove_interval(self.intervals, *interval)
self.dev_intervals = remove_interval(self.dev_intervals, *interval)
def get_removal_interval(
self,
start: TimeLike,
end: TimeLike,
execution_time: t.Optional[TimeLike] = None,
*,
strict: bool = True,
is_preview: bool = False,
) -> Interval:
"""Get the interval that should be removed from the snapshot.
Args:
start: The start date/time of the interval to remove.
end: The end date/time of the interval to removed.
execution_time: The time the interval is being removed.
strict: Whether to fail when the inclusive start is the same as the exclusive end.
is_preview: Whether the interval needs to be removed for a preview of forward-only changes.
When previewing, we are not actually restating a model, but removing an interval to trigger
a run.
"""
end = execution_time or now_timestamp() if self.depends_on_past else end
removal_interval = self.inclusive_exclusive(start, end, strict)
if not is_preview and self.full_history_restatement_only and self.intervals:
expanded_removal_interval = self.inclusive_exclusive(self.intervals[0][0], end, strict)
requested_start, requested_end = removal_interval
expanded_start, expanded_end = expanded_removal_interval
# only warn if the requested removal interval was a subset of the actual model intervals and was automatically expanded
# if the requested interval was the same or wider than the actual model intervals, no need to warn
if (
requested_start > expanded_start or requested_end < expanded_end
) and self.is_incremental:
from sqlmesh.core.console import get_console
get_console().log_warning(
f"Model '{self.model.name}' is '{self.model_kind_name}' which does not support partial restatement.\n"
f"Expanding the requested restatement intervals from [{to_ts(requested_start)} - {to_ts(requested_end)}] "
f"to [{to_ts(expanded_start)} - {to_ts(expanded_end)}] in order to fully restate the model."
)
removal_interval = expanded_removal_interval
return removal_interval
@property
def allow_partials(self) -> bool:
return self.is_model and self.model.allow_partials
def inclusive_exclusive(
self,
start: TimeLike,
end: TimeLike,
strict: bool = True,
allow_partial: t.Optional[bool] = None,
expand: bool = True,
) -> Interval:
"""Transform the inclusive start and end into a [start, end) pair.
Args:
start: The start date/time of the interval (inclusive)
end: The end date/time of the interval (inclusive)
strict: Whether to fail when the inclusive start is the same as the exclusive end.
allow_partial: Whether the interval can be partial or not.
expand: Whether or not partial intervals are expanded outwards.
Returns:
A [start, end) pair.
"""
return inclusive_exclusive(
start,
end,
self.node.interval_unit,
strict=strict,
allow_partial=self.allow_partials if allow_partial is None else allow_partial,
expand=expand,
)
def merge_intervals(self, other: t.Union[Snapshot, SnapshotIntervals]) -> None:
"""Inherits intervals from the target snapshot.
Args:
other: The target snapshot to inherit intervals from.
"""
effective_from_ts = self.normalized_effective_from_ts or 0
apply_effective_from = effective_from_ts > 0 and self.identifier != other.identifier
for start, end in other.intervals:
# If the effective_from is set, then intervals that come after it must come from
# the current snapshots.
if apply_effective_from and start < effective_from_ts:
end = min(end, effective_from_ts)
if not apply_effective_from or end <= effective_from_ts:
self.add_interval(start, end)
if other.last_altered_ts:
self.last_altered_ts = max(self.last_altered_ts or 0, other.last_altered_ts)
if self.dev_version == other.dev_version:
# Merge dev intervals if the dev versions match which would mean
# that this and the other snapshot are pointing to the same dev table.
for start, end in other.dev_intervals:
self.add_interval(start, end, is_dev=True)
if other.dev_last_altered_ts:
self.dev_last_altered_ts = max(
self.dev_last_altered_ts or 0, other.dev_last_altered_ts
)
self.pending_restatement_intervals = merge_intervals(
[*self.pending_restatement_intervals, *other.pending_restatement_intervals]
)
@property
def evaluatable(self) -> bool:
"""Whether or not a snapshot should be evaluated and have intervals."""