diff --git a/taiga/projects/history/migrations/0015_anonymize_cancelled_users_history.py b/taiga/projects/history/migrations/0015_anonymize_cancelled_users_history.py new file mode 100644 index 000000000..9c36cad4d --- /dev/null +++ b/taiga/projects/history/migrations/0015_anonymize_cancelled_users_history.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright (c) 2021-present Kaleidos INC + +""" +Data migration to anonymize history entries for users that have already +been cancelled. This is a retroactive fix for GDPR compliance (issue #96). +""" + +from django.db import migrations + + +def anonymize_cancelled_users_history(apps, schema_editor): + User = apps.get_model("users", "User") + HistoryEntry = apps.get_model("history", "HistoryEntry") + connection = schema_editor.connection + + cancelled_users = User.objects.filter(date_cancelled__isnull=False) + + for user in cancelled_users: + anon_user = {"pk": user.pk, "name": "Deleted user"} + + # Anonymize 'user' field + HistoryEntry.objects.filter( + user__pk=user.pk + ).update( + user=anon_user, + values_diff_cache=None + ) + + # Anonymize 'delete_comment_user' field + HistoryEntry.objects.filter( + delete_comment_user__pk=user.pk + ).update( + delete_comment_user=anon_user + ) + + # Anonymize user references inside 'comment_versions' JSONB arrays + with connection.cursor() as cursor: + cursor.execute(""" + UPDATE history_historyentry + SET comment_versions = ( + SELECT jsonb_agg( + CASE + WHEN (elem->'user'->>'id')::int = %s + THEN jsonb_set(elem, '{user}', '{"id": null}'::jsonb) + ELSE elem + END + ) + FROM jsonb_array_elements(comment_versions) AS elem + ) + WHERE comment_versions IS NOT NULL + AND comment_versions::text LIKE %s + """, [user.pk, '%"id": {}%'.format(user.pk)]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("history", "0014_json_to_jsonb"), + ("users", "0033_auto_20211110_1526"), + ] + + operations = [ + migrations.RunPython( + anonymize_cancelled_users_history, + migrations.RunPython.noop, # Not reversible + ), + ] diff --git a/taiga/users/models.py b/taiga/users/models.py index eb4171724..0ac42eb3f 100644 --- a/taiga/users/models.py +++ b/taiga/users/models.py @@ -289,12 +289,63 @@ def cancel(self): self.save() self.auth_data.all().delete() + # Anonymize history entries to prevent re-identification (GDPR) + self._anonymize_history_entries() + # Blocking all owned projects self.owned_projects.update(blocked_code=BLOCKED_BY_OWNER_LEAVING) # Remove all memberships self.memberships.all().delete() + def _anonymize_history_entries(self): + """ + Remove identifying information from history entries to comply with + GDPR requirements. Replaces the user's real name with 'Deleted user' + in all JSON fields that store user info, while preserving the PK + for internal permission checks. + """ + from taiga.projects.history.models import HistoryEntry + from django.db import connection + + anon_user = {"pk": self.pk, "name": "Deleted user"} + + # Anonymize 'user' field and clear values_diff_cache (may contain + # cached user names) + HistoryEntry.objects.filter( + user__pk=self.pk + ).update( + user=anon_user, + values_diff_cache=None + ) + + # Anonymize 'delete_comment_user' field + HistoryEntry.objects.filter( + delete_comment_user__pk=self.pk + ).update( + delete_comment_user=anon_user + ) + + # Anonymize user references inside 'comment_versions' JSONB arrays. + # Django ORM cannot do in-place JSONB array element updates, so we + # use raw SQL. + with connection.cursor() as cursor: + cursor.execute(""" + UPDATE history_historyentry + SET comment_versions = ( + SELECT jsonb_agg( + CASE + WHEN (elem->'user'->>'id')::int = %s + THEN jsonb_set(elem, '{user}', '{"id": null}'::jsonb) + ELSE elem + END + ) + FROM jsonb_array_elements(comment_versions) AS elem + ) + WHERE comment_versions IS NOT NULL + AND comment_versions::text LIKE %s + """, [self.pk, '%"id": {}%'.format(self.pk)]) + class Role(models.Model): name = models.CharField(max_length=200, null=False, blank=False, diff --git a/tests/integration/test_users.py b/tests/integration/test_users.py index d9d4d97f2..8d52ab569 100644 --- a/tests/integration/test_users.py +++ b/tests/integration/test_users.py @@ -334,9 +334,90 @@ def test_deleted_user_can_not_use_its_token(client): assert response.status_code == 401, response.data -############################## -## Cancel account -############################## +def test_delete_self_user_anonymizes_history_entries(client): + """ + When a user cancels their account, all identifying information in + history entries should be anonymized (GDPR compliance, issue #96). + """ + from taiga.projects.history.models import HistoryEntry + + user = f.UserFactory.create(full_name="Real Username") + project = f.ProjectFactory.create(owner=user) + original_user_pk = user.pk + + issue = f.create_issue(owner=user, project=project) + issue_key = "issues.issue:{}".format(issue.pk) + + # Create history entries with the user's real name + entry1 = HistoryEntry.objects.create( + user={"pk": user.pk, "name": "Real Username"}, + project=project, + type=1, + key=issue_key, + diff={}, + values={}, + comment="A test comment", + values_diff_cache={"some": "cached_data"}, + ) + entry2 = HistoryEntry.objects.create( + user={"pk": user.pk, "name": "Real Username"}, + project=project, + type=1, + key=issue_key, + diff={}, + values={}, + delete_comment_user={"pk": user.pk, "name": "Real Username"}, + ) + + # Sanity check: entries have original user name + assert entry1.user["name"] == "Real Username" + assert entry2.delete_comment_user["name"] == "Real Username" + + # Cancel the user + user.cancel() + + # Verify history entries are anonymized + entry1.refresh_from_db() + entry2.refresh_from_db() + + assert entry1.user["pk"] == original_user_pk + assert entry1.user["name"] == "Deleted user" + assert entry1.values_diff_cache is None + + assert entry2.user["pk"] == original_user_pk + assert entry2.user["name"] == "Deleted user" + assert entry2.delete_comment_user["pk"] == original_user_pk + assert entry2.delete_comment_user["name"] == "Deleted user" + + +def test_delete_self_user_does_not_anonymize_other_users_history(client): + """ + Cancelling one user should not affect history entries owned by other users. + """ + from taiga.projects.history.models import HistoryEntry + + user_to_delete = f.UserFactory.create(full_name="User To Delete") + other_user = f.UserFactory.create(full_name="Other User") + project = f.ProjectFactory.create(owner=user_to_delete) + + issue = f.create_issue(owner=other_user, project=project) + issue_key = "issues.issue:{}".format(issue.pk) + + entry = HistoryEntry.objects.create( + user={"pk": other_user.pk, "name": "Other User"}, + project=project, + type=1, + key=issue_key, + diff={}, + values={}, + ) + + user_to_delete.cancel() + + entry.refresh_from_db() + assert entry.user["name"] == "Other User" + + def test_cancel_self_user_with_valid_token(client): user = f.UserFactory.create()