diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f78d4b8cac..ab7d993778 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network): except requests.exceptions.RequestException as err: error_context = "Unknown Error" exception_text = str(err) + except UnicodeError as err: + error_context = "Encoding Error" + exception_text = str(err) return response, error_context, exception_text diff --git a/tests/test_unicode.py b/tests/test_unicode.py new file mode 100644 index 0000000000..fa6e3a3038 --- /dev/null +++ b/tests/test_unicode.py @@ -0,0 +1,47 @@ +"""Tests for handling usernames with special/unicode characters.""" + +from concurrent.futures import Future + +from sherlock_project.sherlock import get_response + + +def _make_future_with_exception(exc): + """Create a Future that raises the given exception.""" + future = Future() + future.set_exception(exc) + return future + + +def test_get_response_handles_unicode_decode_error(): + """Regression test for issue #2730. + + Usernames with special characters (e.g. 'Émile') can trigger a + UnicodeDecodeError inside the requests library during redirect + handling. This must not crash the program. + """ + future = _make_future_with_exception( + UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "utf-8" in exception_text + + +def test_get_response_handles_unicode_encode_error(): + """UnicodeEncodeError should also be caught (subclass of UnicodeError).""" + future = _make_future_with_exception( + UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)") + ) + response, error_context, exception_text = get_response( + request_future=future, + error_type=["status_code"], + social_network="TestSite", + ) + assert response is None + assert error_context == "Encoding Error" + assert "ascii" in exception_text diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b50..1feaf88a19 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -4,7 +4,7 @@ from sherlock_interactives import InteractivesSubprocessError def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = 'Xvideos' assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} @@ -12,8 +12,8 @@ def test_remove_nsfw(sites_obj): # Parametrized sites should *not* include Motherless, which is acting as the control @pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], + ['Xvideos'], + ['Xvideos', 'Erome'], ]) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: