Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit da1a0af

Browse files
committed
enhance cloud event metadata
1 parent cfd941f commit da1a0af

5 files changed

Lines changed: 41 additions & 18 deletions

File tree

data_diff/cloud/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from .datafold_api import DatafoldAPI, TCloudApiDataDiff
1+
from .datafold_api import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta
22
from .data_source import get_or_create_data_source

data_diff/cloud/datafold_api.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ class TCloudApiDataDiff(pydantic.BaseModel):
105105
filter2: Optional[str] = None
106106

107107

108+
class TCloudApiOrgMeta(pydantic.BaseModel):
109+
org_id: int
110+
org_name: str
111+
user_id: int
112+
113+
108114
class TSummaryResultPrimaryKeyStats(pydantic.BaseModel):
109115
total_rows: Tuple[int, int]
110116
nulls: Tuple[int, int]
@@ -267,3 +273,10 @@ def check_data_source_test_results(self, job_id: int) -> List[TCloudApiDataSourc
267273
)
268274
for item in rv.json()["results"]
269275
]
276+
277+
def get_org_meta(self) -> TCloudApiOrgMeta:
278+
response = self.make_get_request(f"api/v1/organization/meta")
279+
response_json = response.json()
280+
return TCloudApiOrgMeta(
281+
org_id=response_json["org_id"], org_name=response_json["org_name"], user_id=response_json["user_id"]
282+
)

data_diff/dbt.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import keyring
1313

14-
from .cloud import DatafoldAPI, TCloudApiDataDiff, get_or_create_data_source
14+
from .cloud import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta, get_or_create_data_source
1515
from .dbt_parser import DbtParser, PROJECT_FILE
1616

1717

@@ -79,6 +79,7 @@ def dbt_diff(
7979
# exit so the user can set the key
8080
if not api:
8181
return
82+
org_meta = api.get_org_meta()
8283

8384
if datasource_id is None:
8485
rich.print("[red]Data source ID not found in dbt_project.yml")
@@ -113,7 +114,7 @@ def dbt_diff(
113114

114115
if diff_vars.primary_keys:
115116
if is_cloud:
116-
diff_thread = run_as_daemon(_cloud_diff, diff_vars, datasource_id, api)
117+
diff_thread = run_as_daemon(_cloud_diff, diff_vars, datasource_id, api, org_meta)
117118
diff_threads.append(diff_thread)
118119
else:
119120
_local_diff(diff_vars)
@@ -263,7 +264,7 @@ def _initialize_api() -> Optional[DatafoldAPI]:
263264
return DatafoldAPI(api_key=api_key, host=datafold_host)
264265

265266

266-
def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI) -> None:
267+
def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI, org_meta: TCloudApiOrgMeta) -> None:
267268
diff_output_str = _diff_output_base(".".join(diff_vars.dev_path), ".".join(diff_vars.prod_path))
268269
payload = TCloudApiDataDiff(
269270
data_source1_id=datasource_id,
@@ -337,6 +338,9 @@ def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI) -> No
337338
error=err_message,
338339
diff_id=diff_id,
339340
is_cloud=True,
341+
org_id=org_meta.org_id,
342+
org_name=org_meta.org_name,
343+
user_id=org_meta.user_id
340344
)
341345
send_event_json(event_json)
342346

data_diff/tracking.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def create_end_event_json(
116116
error: Optional[str],
117117
diff_id: Optional[int] = None,
118118
is_cloud: bool = False,
119+
org_id: Optional[int] = None,
120+
org_name: Optional[str] = None,
121+
user_id: Optional[int] = None,
119122
):
120123
return {
121124
"event": "os_diff_run_end",
@@ -138,6 +141,9 @@ def create_end_event_json(
138141
"dbt_user_id": dbt_user_id,
139142
"dbt_version": dbt_version,
140143
"dbt_project_id": dbt_project_id,
144+
"org_id": org_id,
145+
"org_name": org_name,
146+
"user_id": user_id,
141147
},
142148
}
143149

tests/test_dbt.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pathlib import Path
44
import yaml
5+
from data_diff.cloud.datafold_api import TCloudApiOrgMeta
56
from data_diff.diff_tables import Algorithm
67
from .test_cli import run_datadiff_cli
78

@@ -482,6 +483,7 @@ def test_local_diff_no_diffs(self, mock_diff_tables):
482483
@patch("data_diff.dbt.os.environ")
483484
@patch("data_diff.dbt.DatafoldAPI")
484485
def test_cloud_diff(self, mock_api, mock_os_environ, mock_print):
486+
org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1)
485487
expected_api_key = "an_api_key"
486488
mock_api.create_data_diff.return_value = {"id": 123}
487489
mock_os_environ.get.return_value = expected_api_key
@@ -493,7 +495,7 @@ def test_cloud_diff(self, mock_api, mock_os_environ, mock_print):
493495
threads = None
494496
where = "a_string"
495497
diff_vars = DiffVars(dev_qualified_list, prod_qualified_list, expected_primary_keys, connection, threads, where)
496-
_cloud_diff(diff_vars, expected_datasource_id, api=mock_api)
498+
_cloud_diff(diff_vars, expected_datasource_id, org_meta=org_meta, api=mock_api)
497499

498500
mock_api.create_data_diff.assert_called_once()
499501
self.assertEqual(mock_print.call_count, 2)
@@ -516,17 +518,17 @@ def test_cloud_diff(self, mock_api, mock_os_environ, mock_print):
516518
def test_diff_is_cloud(
517519
self, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api
518520
):
521+
org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1)
519522
mock_dbt_parser_inst = Mock()
520523
mock_model = Mock()
521524
expected_dbt_vars_dict = {
522525
"prod_database": "prod_db",
523526
"prod_schema": "prod_schema",
524527
"datasource_id": 1,
525528
}
526-
host = "a_host"
527-
api_key = "a_api_key"
528-
api = DatafoldAPI(api_key=api_key, host=host)
529-
mock_initialize_api.return_value = api
529+
mock_api = Mock()
530+
mock_initialize_api.return_value = mock_api
531+
mock_api.get_org_meta.return_value = org_meta
530532
connection = None
531533
threads = None
532534
where = "a_string"
@@ -541,7 +543,7 @@ def test_diff_is_cloud(
541543
mock_dbt_parser_inst.set_connection.assert_not_called()
542544

543545
mock_initialize_api.assert_called_once()
544-
mock_cloud_diff.assert_called_once_with(expected_diff_vars, 1, api)
546+
mock_cloud_diff.assert_called_once_with(expected_diff_vars, 1, mock_api, org_meta)
545547
mock_local_diff.assert_not_called()
546548
mock_print.assert_called_once()
547549

@@ -555,16 +557,16 @@ def test_diff_is_cloud(
555557
def test_diff_is_cloud_no_ds_id(
556558
self, _, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api
557559
):
560+
org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1)
558561
mock_dbt_parser_inst = Mock()
559562
mock_model = Mock()
560563
expected_dbt_vars_dict = {
561564
"prod_database": "prod_db",
562565
"prod_schema": "prod_schema",
563566
}
564-
host = "a_host"
565-
api_key = "a_api_key"
566-
api = DatafoldAPI(api_key=api_key, host=host)
567-
mock_initialize_api.return_value = api
567+
mock_api = Mock()
568+
mock_initialize_api.return_value = mock_api
569+
mock_api.get_org_meta.return_value = org_meta
568570
connection = None
569571
threads = None
570572
where = "a_string"
@@ -723,10 +725,8 @@ def test_diff_is_cloud_no_pks(
723725
"prod_schema": "prod_schema",
724726
"datasource_id": 1,
725727
}
726-
host = "a_host"
727-
api_key = "a_api_key"
728-
api = DatafoldAPI(api_key=api_key, host=host)
729-
mock_initialize_api.return_value = api
728+
mock_api = Mock()
729+
mock_initialize_api.return_value = mock_api
730730

731731
mock_dbt_parser_inst.get_models.return_value = [mock_model]
732732
mock_dbt_parser_inst.get_datadiff_variables.return_value = expected_dbt_vars_dict

0 commit comments

Comments
 (0)