diff --git a/scrunch/datasets.py b/scrunch/datasets.py index 8ab11750..f4d889ed 100644 --- a/scrunch/datasets.py +++ b/scrunch/datasets.py @@ -696,6 +696,17 @@ class DatasetVariablesMixin(Mapping): Handles dataset variable iteration in a dict-like way """ + create_material_variables = True + """New variables created by methods of this class are usually defined by + an expression which is evaluated on the server. If create_material_variables + is True (the default), such an expression is evaluated eagerly, and its + output at that moment is stored as a "material" variable: part of the schema. + Changes to any of the input variables will not update the material variable. + If False, the expression itself is stored instead and re-evaluated on each + read; the resulting "derived" variable is an artifact rather than part of + the schema, and will change if any of its inputs change. + """ + def __getitem__(self, item): """ Returns a Variable() instance, `item` can be either a variable alias, @@ -744,11 +755,13 @@ def order(self): self._order = DatasetVariablesOrder(self._catalog, order) return self._order - def _var_create_reload_return(self, payload): + def _var_create_reload_return(self, entity_body): """ helper function for POSTing to variables, reload the catalog of variables and return newly created var """ + entity_body.setdefault("derived", not self.create_material_variables) + payload = shoji_entity_wrapper(entity_body) new_var = self.resource.variables.create(payload) # needed to update the variables collection self._reload_variables() @@ -1166,13 +1179,13 @@ def create_fill_values(self, variables, name, alias, description=''): {"map": fill_map} ] } - payload = shoji_entity_wrapper({ + body = { "alias": alias, "name": name, "description": description, "derivation": fill_expr - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def create_single_response(self, categories, name, alias, description='', missing=True, notes=''): @@ -1223,13 +1236,13 @@ def create_single_response(self, categories, name, alias, description='', expr = dict(function='case', args=args + more_args) - payload = shoji_entity_wrapper(dict( + body = dict( alias=alias, name=name, - expr=expr, + derivation=expr, description=description, - notes=notes)) - return self._var_create_reload_return(payload) + notes=notes) + return self._var_create_reload_return(body) def rollup(self, variable_alias, name, alias, resolution, description='', notes=''): @@ -1249,27 +1262,17 @@ def rollup(self, variable_alias, name, alias, resolution, description='', expr = { 'function': 'rollup', 'args': [ - { - 'variable': self[variable_alias].url - }, - { - 'value': resolution - } + {'variable': self[variable_alias].url}, + {'value': resolution} ] } - - payload = shoji_entity_wrapper(dict( + body = dict( alias=alias, name=name, - expr=expr, + derivation=expr, description=description, - notes=notes)) - - new_var = self.resource.variables.create(payload) - # needed to update the variables collection - self._reload_variables() - # return the variable instance - return self[new_var['body']['alias']] + notes=notes) + return self._var_create_reload_return(body) def derive_multiple_response(self, categories, subvariables, name, alias, description='', notes='', uniform_basis=False): @@ -1320,7 +1323,7 @@ def derive_multiple_response(self, categories, subvariables, name, alias, categories=categories ) - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, @@ -1336,8 +1339,8 @@ def derive_multiple_response(self, categories, subvariables, name, alias, ] }] } - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def create_multiple_response(self, responses, name, alias, description='', notes=''): @@ -1359,7 +1362,7 @@ def create_multiple_response(self, responses, name, alias, description='', alias='%s_%d' % (alias, resp['id']) ) - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, @@ -1374,8 +1377,8 @@ def create_multiple_response(self, responses, name, alias, description='', ] }] } - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def variable_aliases(self, include_subvariables=False): existing_aliases = set() @@ -1445,14 +1448,14 @@ def bind_categorical_array(self, name, alias, subvariables, description='', "subreferences": subreferences } } - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, 'notes': notes, 'derivation': expression - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def create_numeric(self, alias, name, derivation, description='', notes=''): """ @@ -1464,14 +1467,14 @@ def create_numeric(self, alias, name, derivation, description='', notes=''): if not hasattr(self.resource, 'variables'): self.resource.refresh() - payload = shoji_entity_wrapper(dict( + body = dict( alias=alias, name=name, derivation=expr, description=description, notes=notes - )) - return self._var_create_reload_return(payload) + ) + return self._var_create_reload_return(body) def create_categorical(self, categories, alias, name, multiple, description='', notes='', missing_case=None, uniform_basis=False): @@ -1690,33 +1693,33 @@ def create_variable(self, var_type, name, alias=None, description='', values = [1,4,5,2,1,3,1] """ self._validate_vartypes(var_type, resolution, subvariables, categories) - payload = { + body = { 'type': var_type, 'name': name, 'description': description, } if alias: - payload['alias'] = alias + body['alias'] = alias if resolution: - payload['resolution'] = resolution + body['resolution'] = resolution if var_type == 'multiple_response' and categories is None: - payload['categories'] = [ + body['categories'] = [ {'name': 'Not selected', 'id': NOT_SELECTED_ID, 'numeric_value': 2, 'missing': False}, {'name': 'Selected', 'id': SELECTED_ID, 'numeric_value': 1, 'missing': False, 'selected': True}, ] if categories: - payload['categories'] = categories + body['categories'] = categories if subvariables: - payload['subreferences'] = [] + body['subreferences'] = [] for item in subvariables: subrefs = {'name': item['name']} if item.get('alias'): subrefs['alias'] = item['alias'] - payload['subreferences'].append(subrefs) + body['subreferences'].append(subrefs) if values: - payload['values'] = values + body['values'] = values - self._var_create_reload_return(shoji_entity_wrapper(payload)) + self._var_create_reload_return(body) def copy_variable(self, variable, name, alias, derived=None, subvariable_codes=None): """ @@ -1760,17 +1763,14 @@ def subrefs(_variable, _alias): derivation = abs_url(variable.resource.body['derivation'], variable.resource.self) derivation.pop('references', None) - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'derivation': derivation}) + body = {'name': name, 'alias': alias, 'derivation': derivation} if variable.type == _MR_TYPE: # We are re-executing a multiple_response derivation. # We need to update the complex `array` function expression # to contain the new suffixed aliases. Given that the map is # unordered, we have to iterated and find a name match. - _ob = payload['body']['derivation']['args'][0]['args'][0] + _ob = body['derivation']['args'][0]['args'][0] subvars = _ob['map'] subreferences = subrefs(variable, alias) for subref in subreferences: @@ -1782,15 +1782,9 @@ def subrefs(_variable, _alias): else: derivation = { 'function': 'copy_variable', - 'args': [{ - 'variable': variable_resource.self - }] + 'args': [{'variable': variable_resource.self}] } - payload = shoji_entity_wrapper({ - 'name': name, - 'alias': alias, - 'derivation': derivation - }) + body = {'name': name, 'alias': alias, 'derivation': derivation} if "subvariables" in variable_resource.body: api_subreferences = variable_resource.body["subreferences"] @@ -1814,9 +1808,9 @@ def subrefs(_variable, _alias): derivation["references"] = {"subreferences": subreferences} if derived is False or derived: - payload['body']['derived'] = derived + body['derived'] = derived - return self._var_create_reload_return(payload) + return self._var_create_reload_return(body) def combine_categories(self, variable, map, categories, missing=None, default=None, name='', alias='', description=''): @@ -1857,14 +1851,14 @@ def combine_categorical(self, variable, map, categories=None, missing=None, # TODO: Implement `default` parameter in Crunch API combinations = combinations_from_map( map, categories or {}, missing or []) - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, 'derivation': combine_categories_expr( variable.resource.self, combinations) - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def combine_multiple_response(self, variable, map, categories=None, default=None, name='', alias='', description=''): @@ -1889,14 +1883,14 @@ def combine_multiple_response(self, variable, map, categories=None, default=None # TODO: Implement `default` parameter in Crunch API responses = responses_from_map(variable, map, categories or {}, alias, parent_alias) - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, 'derivation': combine_responses_expr( variable.resource.self, responses) - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) def cast_summary(self, variable, cast_type): """ @@ -2778,7 +2772,7 @@ def derive_weight(self, targets, alias, name, description=''): 'targets': list(map(list, val.items())) }) - payload = shoji_entity_wrapper({ + body = { 'name': name, 'alias': alias, 'description': description, @@ -2786,8 +2780,8 @@ def derive_weight(self, targets, alias, name, description=''): 'function': 'rake', 'args': _targets } - }) - return self._var_create_reload_return(payload) + } + return self._var_create_reload_return(body) @property def is_view(self): @@ -3041,7 +3035,7 @@ def edit_categorical(self, categories, rules): more_args = process_expr(more_args, self.dataset) # epression value building expr = dict(function='case', args=args + more_args) - payload = shoji_entity_wrapper(dict(expr=expr)) + payload = shoji_entity_wrapper(dict(derivation=expr)) # patch the variable with the new payload resp = self.resource.patch(payload) self._reload_variables() diff --git a/scrunch/tests/test_datasets.py b/scrunch/tests/test_datasets.py index 63782aa4..46b7102f 100644 --- a/scrunch/tests/test_datasets.py +++ b/scrunch/tests/test_datasets.py @@ -357,6 +357,7 @@ def test_create_numeric(self, mocked_process): 'body': { 'alias': 'monthly_rent', 'name': 'Monthly rent', + "derived": False, 'derivation': { 'function': '/', 'args': [ @@ -398,7 +399,8 @@ def test_rollup(self, mocked_process): 'body': { 'alias': 'new_rolledup_var', 'name': 'new_rolledup_var', - 'expr': { + "derived": False, + 'derivation': { 'function': 'rollup', 'args': [ {'variable': 'https://test.crunch.io/api/datasets/123456/variables/001/'}, @@ -1483,7 +1485,7 @@ def getitem(key): assert not var_tuple.entity.edit.called # check we call `edit` for derived variables - body['derived'] = True + body["derived"] = True var.integrate() var_tuple.entity.edit.assert_called_once_with(derived=False) @@ -2150,6 +2152,7 @@ def test_recode_w_fill(self): "element": "shoji:entity", "body": { "alias": "filled", + "derived": False, "derivation": fill_expr, "name": "Filled var", "description": "" @@ -2208,6 +2211,7 @@ def test_else_code(self): "element": "shoji:entity", "body": { "alias": "filled", + "derived": False, "derivation": fill_expr, "name": "Filled var", "description": "" @@ -2266,6 +2270,7 @@ def test_else_var(self): "element": "shoji:entity", "body": { "alias": "filled", + "derived": False, "derivation": fill_expr, "name": "Filled var", "description": "" @@ -2332,7 +2337,8 @@ def test_recode_single_categorical(self): 'notes': '', 'alias': 'cat', 'name': 'My cat', - 'expr': { + "derived": False, + 'derivation': { 'function': 'case', 'args': [{ 'column': [1, 2, 3, -1], @@ -2452,6 +2458,7 @@ def test_recode_multiple_response(self): 'description': '', 'notes': '', 'name': 'my mr', + "derived": False, 'derivation': { 'function': 'array', 'args': [{ @@ -2625,7 +2632,8 @@ def test_create_categorical_else_case(self): "body": { "alias": "agerange", "name": "Age Range", - "expr": { + "derived": False, + "derivation": { "function": "case", "args": [ { @@ -2777,12 +2785,13 @@ def test_create_2_multiple_response_else_case(self): } } ds.resource.variables.create.assert_called_with({ - "element": "shoji:entity", + "element": "shoji:entity", "body": { "name": "Age range multi", "alias": "agerange_multi", "description": "", "notes": "", + "derived": False, "derivation": { "function": "array", "args": [ @@ -2970,6 +2979,7 @@ def test_create_3_multiple_response_else_case(self): "description": "", "notes": "", "uniform_basis": False, + "derived": False, "derivation": { "function": "array", "args": [ @@ -3351,6 +3361,7 @@ def test_create_categorical_missing_case(self): "description": "", "notes": "", "uniform_basis": False, + "derived": False, "derivation": { "function": "array", "args": [ @@ -3595,6 +3606,7 @@ def test_derive_multiple_response(self): 'description': '', 'notes': '', 'uniform_basis': False, + "derived": False, 'derivation': { 'function': 'array', 'args': [{ @@ -3691,7 +3703,7 @@ def test_base_variable(self): var_res.entity.body = {'type': 'numeric'} def getitem(key): - if key == 'derived': + if key == "derived": return False var_res.__getitem__.side_effect = getitem var_res.entity.self = '/variable/url/' @@ -3703,6 +3715,7 @@ def getitem(key): 'body': { 'alias': 'copy', 'name': 'copy', + "derived": False, 'derivation': { 'function': 'copy_variable', 'args': [{'variable': '/variable/url/'}] @@ -3715,7 +3728,7 @@ def test_derived_variable(self): var_res = mock.MagicMock() def getitem(key): - if key == 'derived': + if key == "derived": return True var_res.__getitem__.side_effect = getitem var_res.entity.body = { @@ -3745,6 +3758,7 @@ def getitem(key): 'body': { 'alias': 'copy', 'name': 'copy', + "derived": False, 'derivation': { 'function': 'array', 'args': [{ @@ -6694,7 +6708,7 @@ class TestHeadingSubtotals(TestDatasetBase): 'categories': TEST_CATEGORIES(), 'is_subvar': False, 'view': {}, - 'derived': False, + "derived": False, }, } @@ -6899,14 +6913,11 @@ def test_bind_categorical_array_without_codes(self): "references": {"subreferences": subreferences} } assert args == [{ - "element": "shoji:entity", - "body": { - "name": "My Array", - "alias": "my_array", - "notes": "", - "description": "", - "derivation": expression, - } + "name": "My Array", + "alias": "my_array", + "notes": "", + "description": "", + "derivation": expression, }] def test_bind_categorical_array_with_codes(self): @@ -6935,14 +6946,11 @@ def test_bind_categorical_array_with_codes(self): "references": {"subreferences": subreferences} } assert args == [{ - "element": "shoji:entity", - "body": { - "name": "My Array", - "alias": "my_array", - "notes": "", - "description": "", - "derivation": expression, - } + "name": "My Array", + "alias": "my_array", + "notes": "", + "description": "", + "derivation": expression, }] def test_copy_variable_no_codes(self): @@ -6998,12 +7006,9 @@ def test_copy_variable_no_codes(self): "references": {"subreferences": subreferences} } assert args == [{ - "element": "shoji:entity", - "body": { - "name": "copied", - "alias": "copied", - "derivation": expression, - } + "name": "copied", + "alias": "copied", + "derivation": expression, }] def test_copy_variable_with_codes(self): @@ -7061,11 +7066,7 @@ def test_copy_variable_with_codes(self): "references": {"subreferences": subreferences} } assert args == [{ - "element": "shoji:entity", - "body": { - "name": "copied", - "alias": "copied", - "derivation": expression, - } + "name": "copied", + "alias": "copied", + "derivation": expression, }] - diff --git a/scrunch/tests/test_recodes.py b/scrunch/tests/test_recodes.py index e1189d99..acf3ce92 100644 --- a/scrunch/tests/test_recodes.py +++ b/scrunch/tests/test_recodes.py @@ -55,6 +55,7 @@ def mr_in(mr_self, mr_alias, groups, parent_subvariables): 'element': 'shoji:entity', 'body': { 'alias': 'alias', + "derived": False, 'derivation': { 'function': 'combine_categories', 'args': [ @@ -76,6 +77,7 @@ def mr_in(mr_self, mr_alias, groups, parent_subvariables): 'name': 'name', 'description': '', 'alias': 'alias', + "derived": False, 'derivation': { 'function': 'combine_responses', 'args': [ @@ -351,7 +353,8 @@ def test_recode_categoricals(self, get_dataset_mock): 'alias': 'sexuality2', 'description': '', 'notes': '', - 'expr': { + "derived": False, + 'derivation': { 'function': 'case', 'args': [{ 'column': [1, 2, -1], @@ -491,6 +494,7 @@ def test_recode_multiple_responses(self, get_dataset_mock): 'description': '', 'notes': '', 'alias': 'Q1_recoded', + "derived": False, 'derivation': { 'function': 'array', 'args': [{