11import collections
2- from typing import Any , Optional , List , Dict , Tuple
2+ from enum import Enum
3+ from typing import Any , Optional , List , Dict , Tuple , Type
34
45from runtype import dataclass
56from data_diff .diff_tables import DiffResultWrapper
7+ from data_diff .sqeleton .abcs .database_types import (
8+ JSON ,
9+ Boolean ,
10+ ColType ,
11+ Array ,
12+ ColType_UUID ,
13+ Date ,
14+ FractionalType ,
15+ NumericType ,
16+ Struct ,
17+ TemporalType ,
18+ ColType_Alphanum ,
19+ String_Alphanum ,
20+ )
621
722
823def jsonify_error (table1 : List [str ], table2 : List [str ], dbt_model : str , error : str ) -> "FailedDiff" :
@@ -15,11 +30,16 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
1530 ).json ()
1631
1732
33+ Columns = List [Tuple [str , str , ColType ]]
34+
35+
1836def jsonify (
1937 diff : DiffResultWrapper ,
2038 dbt_model : str ,
39+ dataset1_columns : Columns ,
40+ dataset2_columns : Columns ,
41+ columns_diff : Dict [str , List [str ]],
2142 with_summary : bool = False ,
22- with_columns : Optional [Dict [str , List [str ]]] = None ,
2343) -> "JsonDiff" :
2444 """
2545 Converts the diff result into a JSON-serializable format.
@@ -53,16 +73,13 @@ def jsonify(
5373 if with_summary :
5474 summary = _jsonify_diff_summary (diff .get_stats_dict (is_dbt = True ))
5575
56- columns = None
57- if with_columns :
58- columns = _jsonify_columns_diff (with_columns , list (key_columns ))
76+ columns = _jsonify_columns_diff (dataset1_columns , dataset2_columns , columns_diff , list (key_columns ))
5977
6078 is_different = bool (
6179 t1_exclusive_rows
6280 or t2_exclusive_rows
6381 or diff_rows
64- or with_columns
65- and (with_columns ["added" ] or with_columns ["removed" ] or with_columns ["changed" ])
82+ or (columns_diff ["added" ] or columns_diff ["removed" ] or columns_diff ["changed" ])
6683 )
6784 return JsonDiff (
6885 status = "success" ,
@@ -138,8 +155,44 @@ class ExclusiveColumns:
138155 dataset2 : List [str ]
139156
140157
158+ class ColumnKind (Enum ):
159+ INTEGER = "integer"
160+ FLOAT = "float"
161+ STRING = "string"
162+ DATE = "date"
163+ TIME = "time"
164+ DATETIME = "datetime"
165+ BOOL = "boolean"
166+ UNSUPPORTED = "unsupported"
167+
168+
169+ KIND_MAPPING : List [Tuple [Type [ColType ], ColumnKind ]] = [
170+ (Boolean , ColumnKind .BOOL ),
171+ (Date , ColumnKind .DATE ),
172+ (TemporalType , ColumnKind .DATETIME ),
173+ (FractionalType , ColumnKind .FLOAT ),
174+ (NumericType , ColumnKind .INTEGER ),
175+ (ColType_UUID , ColumnKind .STRING ),
176+ (ColType_Alphanum , ColumnKind .STRING ),
177+ (String_Alphanum , ColumnKind .STRING ),
178+ (JSON , ColumnKind .STRING ),
179+ (Array , ColumnKind .STRING ),
180+ (Struct , ColumnKind .STRING ),
181+ (ColType , ColumnKind .UNSUPPORTED ),
182+ ]
183+
184+
185+ @dataclass
186+ class Column :
187+ name : str
188+ type : str
189+ kind : str
190+
191+
141192@dataclass
142193class JsonColumnsSummary :
194+ dataset1 : List [Column ]
195+ dataset2 : List [Column ]
143196 primaryKey : List [str ]
144197 exclusive : ExclusiveColumns
145198 typeChanged : List [str ]
@@ -179,7 +232,7 @@ class JsonDiff:
179232 summary : Optional [JsonDiffSummary ]
180233 columns : Optional [JsonColumnsSummary ]
181234
182- version : str = "1.0 .0"
235+ version : str = "1.1 .0"
183236
184237
185238def _group_rows (
@@ -262,12 +315,27 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
262315 )
263316
264317
265- def _jsonify_columns_diff (columns_diff : Dict [str , List [str ]], key_columns : List [str ]) -> JsonColumnsSummary :
318+ def _jsonify_columns_diff (
319+ dataset1_columns : Columns , dataset2_columns : Columns , columns_diff : Dict [str , List [str ]], key_columns : List [str ]
320+ ) -> JsonColumnsSummary :
266321 return JsonColumnsSummary (
322+ dataset1 = [
323+ Column (name = name , type = type_ , kind = _map_kind (kind ).value ) for (name , type_ , kind ) in dataset1_columns
324+ ],
325+ dataset2 = [
326+ Column (name = name , type = type_ , kind = _map_kind (kind ).value ) for (name , type_ , kind ) in dataset2_columns
327+ ],
267328 primaryKey = key_columns ,
268329 exclusive = ExclusiveColumns (
269330 dataset2 = list (columns_diff .get ("added" , [])),
270331 dataset1 = list (columns_diff .get ("removed" , [])),
271332 ),
272333 typeChanged = list (columns_diff .get ("changed" , [])),
273334 )
335+
336+
337+ def _map_kind (kind : ColType ) -> ColumnKind :
338+ for raw_kind , json_kind in KIND_MAPPING :
339+ if isinstance (kind , raw_kind ):
340+ return json_kind
341+ return ColumnKind .UNSUPPORTED
0 commit comments