diff --git a/devel/site-list.py b/devel/site-list.py index e0fd21d53f..e594d04d0f 100644 --- a/devel/site-list.py +++ b/devel/site-list.py @@ -14,7 +14,7 @@ # Removes schema-specific keywords for proper processing social_networks = data.copy() -social_networks.pop('$schema', None) +social_networks.pop("$schema", None) # Sort the social networks in alphanumeric order social_networks = sorted(social_networks.items()) diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py index 89d39750e1..4d91d59c25 100644 --- a/devel/summarize_site_validation.py +++ b/devel/summarize_site_validation.py @@ -6,10 +6,11 @@ import sys from pathlib import Path + def summarize_junit_xml(xml_path: Path) -> str: tree = ET.parse(xml_path) root = tree.getroot() - suite = root.find('testsuite') + suite = root.find("testsuite") pass_message: str = ":heavy_check_mark:   Pass" fail_message: str = ":x:   Fail" @@ -22,42 +23,53 @@ def summarize_junit_xml(xml_path: Path) -> str: summary_lines.append("| Target | F+ Check | F- Check |") summary_lines.append("|---|---|---|") - failures = int(suite.get('failures', 0)) + failures = int(suite.get("failures", 0)) errors_detected: bool = False results: dict[str, dict[str, str]] = {} - for testcase in suite.findall('testcase'): - test_name = testcase.get('name').split('[')[0] - site_name = testcase.get('name').split('[')[1].rstrip(']') - failure = testcase.find('failure') - error = testcase.find('error') + for testcase in suite.findall("testcase"): + test_name = testcase.get("name").split("[")[0] + site_name = testcase.get("name").split("[")[1].rstrip("]") + failure = testcase.find("failure") + error = testcase.find("error") if site_name not in results: results[site_name] = {} if test_name == "test_false_neg": - results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message + results[site_name]["F- Check"] = ( + pass_message if failure is None and error is None else fail_message + ) elif test_name == "test_false_pos": - results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message + results[site_name]["F+ Check"] = ( + pass_message if failure is None and error is None else fail_message + ) if error is not None: errors_detected = True for result in results: - summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |") + summary_lines.append( + f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |" + ) if failures > 0: - summary_lines.append("\n___\n" + - "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + - " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).") + summary_lines.append( + "\n___\n" + + "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + + " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences)." + ) if errors_detected: - summary_lines.append("\n___\n" + - "\n**Errors were detected during validation. Please review the workflow logs.**") + summary_lines.append( + "\n___\n" + + "\n**Errors were detected during validation. Please review the workflow logs.**" + ) return "\n".join(summary_lines) + if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: summarize_site_validation.py ") diff --git a/pyproject.toml b/pyproject.toml index 2ebad06406..8192594a58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,3 +66,7 @@ defusedxml = "^0.7.1" [tool.poetry.scripts] sherlock = 'sherlock_project.sherlock:main' + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" diff --git a/sherlock_project/__init__.py b/sherlock_project/__init__.py index ad6c9e308f..89a7b28df8 100644 --- a/sherlock_project/__init__.py +++ b/sherlock_project/__init__.py @@ -1,4 +1,4 @@ -""" Sherlock Module +"""Sherlock Module This module contains the main logic to search for usernames at social networks. @@ -15,16 +15,21 @@ def get_version() -> str: try: return pkg_version("sherlock_project") except PackageNotFoundError: - pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml" + pyproject_path: pathlib.Path = ( + pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml" + ) with pyproject_path.open("rb") as f: pyproject_data = tomli.load(f) return pyproject_data["tool"]["poetry"]["version"] + # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package import_error_test_var = None -__shortname__ = "Sherlock" -__longname__ = "Sherlock: Find Usernames Across Social Networks" -__version__ = get_version() +__shortname__ = "Sherlock" +__longname__ = "Sherlock: Find Usernames Across Social Networks" +__version__ = get_version() -forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +forge_api_latest_release = ( + "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +) diff --git a/sherlock_project/__main__.py b/sherlock_project/__main__.py index a252de0fc1..47b65a76d7 100644 --- a/sherlock_project/__main__.py +++ b/sherlock_project/__main__.py @@ -15,8 +15,11 @@ python_version = sys.version.split()[0] if sys.version_info < (3, 9): - print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.") + print( + f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock." + ) sys.exit(1) from sherlock_project import sherlock + sherlock.main() diff --git a/sherlock_project/notify.py b/sherlock_project/notify.py index f6c785d63f..a00d3b9259 100644 --- a/sherlock_project/notify.py +++ b/sherlock_project/notify.py @@ -3,6 +3,7 @@ This module defines the objects for notifying the caller about the results of queries. """ + from sherlock_project.result import QueryStatus from colorama import Fore, Style import webbrowser @@ -155,13 +156,21 @@ def start(self, message): title = "Checking username" - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + f"] {title}" + - Fore.WHITE + f" {message}" + - Fore.GREEN + " on:") + print( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + f"] {title}" + + Fore.WHITE + + f" {message}" + + Fore.GREEN + + " on:" + ) # An empty line between first line and the result(more clear output) - print('\r') + print("\r") return @@ -201,52 +210,92 @@ def update(self, result): # Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: self.countResults() - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + - f" {self.result.site_name}: " + - Style.RESET_ALL + - f"{self.result.site_url_user}") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.GREEN + + "+" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}: " + + Style.RESET_ALL + + f"{self.result.site_url_user}" + ) if self.browse: webbrowser.open(self.result.site_url_user, 2) elif result.status == QueryStatus.AVAILABLE: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + " Not Found!") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + " Not Found!" + ) elif result.status == QueryStatus.UNKNOWN: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + f" {self.result.context}" + - Fore.YELLOW + " ") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + f" {self.result.context}" + + Fore.YELLOW + + " " + ) elif result.status == QueryStatus.ILLEGAL: if self.print_all: msg = "Illegal Username Format For This Site!" - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + f" {msg}") - + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + f" {msg}" + ) + elif result.status == QueryStatus.WAF: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + " Blocked by bot detection" + - Fore.YELLOW + " (proxy may help)") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + " Blocked by bot detection" + + Fore.YELLOW + + " (proxy may help)" + ) else: # It should be impossible to ever get here... @@ -267,12 +316,20 @@ def finish(self, message="The processing has been finished."): """ NumberOfResults = self.countResults() - 1 - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Search completed with" + - Fore.WHITE + f" {NumberOfResults} " + - Fore.GREEN + "results" + Style.RESET_ALL - ) + print( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + "] Search completed with" + + Fore.WHITE + + f" {NumberOfResults} " + + Fore.GREEN + + "results" + + Style.RESET_ALL + ) def __str__(self): """Convert Object To String. diff --git a/sherlock_project/result.py b/sherlock_project/result.py index c4d68b1c88..4e5b6c92f8 100644 --- a/sherlock_project/result.py +++ b/sherlock_project/result.py @@ -2,6 +2,7 @@ This module defines various objects for recording the results of queries. """ + from enum import Enum @@ -10,11 +11,12 @@ class QueryStatus(Enum): Describes status of query about a given username. """ - CLAIMED = "Claimed" # Username Detected - AVAILABLE = "Available" # Username Not Detected - UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username - ILLEGAL = "Illegal" # Username Not Allowable For This Site - WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) + + CLAIMED = "Claimed" # Username Detected + AVAILABLE = "Available" # Username Not Detected + UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username + ILLEGAL = "Illegal" # Username Not Allowable For This Site + WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) def __str__(self): """Convert Object To String. @@ -27,13 +29,16 @@ def __str__(self): """ return self.value -class QueryResult(): + +class QueryResult: """Query Result Object. Describes result of query about a given username. """ - def __init__(self, username, site_name, site_url_user, status, - query_time=None, context=None): + + def __init__( + self, username, site_name, site_url_user, status, query_time=None, context=None + ): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -62,12 +67,12 @@ def __init__(self, username, site_name, site_url_user, status, Nothing. """ - self.username = username - self.site_name = site_name + self.username = username + self.site_name = site_name self.site_url_user = site_url_user - self.status = status - self.query_time = query_time - self.context = context + self.status = status + self.query_time = query_time + self.context = context return diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f78d4b8cac..e2688ba6d3 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -10,10 +10,12 @@ import sys try: - from sherlock_project.__init__ import import_error_test_var # noqa: F401 + from sherlock_project.__init__ import import_error_test_var # noqa: F401 except ImportError: print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?") - print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.") + print( + "This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions." + ) sys.exit(1) import csv @@ -243,7 +245,7 @@ def sherlock( headers.update(net_info["headers"]) # URL of user on site (if it exists) - url = interpolate_string(net_info["url"], username.replace(' ', '%20')) + url = interpolate_string(net_info["url"], username.replace(" ", "%20")) # Don't make request if username is invalid for the site regex_check = net_info.get("regexCheck") @@ -383,10 +385,10 @@ def sherlock( # be highly targetted. Comment at the end of each fingerprint to # indicate target and date fingerprinted. WAFHitMsgs = [ - r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare - r'', # 2024-11-11 Cloudflare error page - r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS) - r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security + r".loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark", # 2024-05-13 Cloudflare + r'', # 2024-11-11 Cloudflare error page + r"AwsWafIntegration.forceRefreshToken", # 2024-11-11 Cloudfront (AWS) + r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:', # 2024-04-09 PerimeterX / Human Security ] if error_text is not None: @@ -396,8 +398,13 @@ def sherlock( query_status = QueryStatus.WAF else: - if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): - error_context = f"Unknown error type '{error_type}' for {social_network}" + if any( + errtype not in ["message", "status_code", "response_url"] + for errtype in error_type + ): + error_context = ( + f"Unknown error type '{error_type}' for {social_network}" + ) query_status = QueryStatus.UNKNOWN else: if "message" in error_type: @@ -426,7 +433,10 @@ def sherlock( else: query_status = QueryStatus.AVAILABLE - if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + if ( + "status_code" in error_type + and query_status is not QueryStatus.AVAILABLE + ): error_codes = net_info.get("errorCode") query_status = QueryStatus.CLAIMED @@ -439,7 +449,10 @@ def sherlock( elif r.status_code >= 300 or r.status_code < 200: query_status = QueryStatus.AVAILABLE - if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + if ( + "response_url" in error_type + and query_status is not QueryStatus.AVAILABLE + ): # For this detection method, we have turned off the redirect. # So, there is no need to check the response URL: it will always # match the request. Instead, we will ensure that the response @@ -925,8 +938,8 @@ def main(): { "username": usernames, "name": names, - "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main], - "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user], + "url_main": [f'=HYPERLINK("{u}")' for u in url_main], + "url_user": [f'=HYPERLINK("{u}")' for u in url_user], "exists": exists, "http_status": http_status, "response_time_s": response_time_s, diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index b7aaf4c58b..8d653fa592 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -3,6 +3,7 @@ This module supports storing information about websites. This is the raw data that will be used to search for usernames. """ + import json import requests import secrets @@ -11,9 +12,18 @@ MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json" EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt" + class SiteInformation: - def __init__(self, name, url_home, url_username_format, username_claimed, - information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)): + def __init__( + self, + name, + url_home, + url_username_format, + username_claimed, + information, + is_nsfw, + username_unclaimed=secrets.token_urlsafe(10), + ): """Create Site Information Object. Contains information about a specific website. @@ -58,7 +68,7 @@ def __init__(self, name, url_home, url_username_format, username_claimed, self.username_claimed = username_claimed self.username_unclaimed = secrets.token_urlsafe(32) self.information = information - self.is_nsfw = is_nsfw + self.is_nsfw = is_nsfw return @@ -77,11 +87,11 @@ def __str__(self): class SitesInformation: def __init__( - self, - data_file_path: str|None = None, - honor_exclusions: bool = True, - do_not_exclude: list[str] = [], - ): + self, + data_file_path: str | None = None, + honor_exclusions: bool = True, + do_not_exclude: list[str] = [], + ): """Create Sites Information Object. Contains information about all supported websites. @@ -123,7 +133,9 @@ def __init__( # Ensure that specified data file has correct extension. if not data_file_path.lower().endswith(".json"): - raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.") + raise FileNotFoundError( + f"Incorrect JSON file extension for data file '{data_file_path}'." + ) # if "http://" == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower(): if data_file_path.lower().startswith("http"): @@ -136,9 +148,9 @@ def __init__( ) if response.status_code != 200: - raise FileNotFoundError(f"Bad response while accessing " - f"data file URL '{data_file_path}'." - ) + raise FileNotFoundError( + f"Bad response while accessing data file URL '{data_file_path}'." + ) try: site_data = response.json() except Exception as error: @@ -158,11 +170,11 @@ def __init__( ) except FileNotFoundError: - raise FileNotFoundError(f"Problem while attempting to access " - f"data file '{data_file_path}'." - ) + raise FileNotFoundError( + f"Problem while attempting to access data file '{data_file_path}'." + ) - site_data.pop('$schema', None) + site_data.pop("$schema", None) if honor_exclusions: try: @@ -191,22 +203,22 @@ def __init__( # Add all site information from the json file to internal site list. for site_name in site_data: try: - - self.sites[site_name] = \ - SiteInformation(site_name, - site_data[site_name]["urlMain"], - site_data[site_name]["url"], - site_data[site_name]["username_claimed"], - site_data[site_name], - site_data[site_name].get("isNSFW",False) - - ) + self.sites[site_name] = SiteInformation( + site_name, + site_data[site_name]["urlMain"], + site_data[site_name]["url"], + site_data[site_name]["username_claimed"], + site_data[site_name], + site_data[site_name].get("isNSFW", False), + ) except KeyError as error: raise ValueError( f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." ) except TypeError: - print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") + print( + f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n" + ) return @@ -226,7 +238,7 @@ def remove_nsfw_sites(self, do_not_remove: list = []): if self.sites[site].is_nsfw and site.casefold() not in do_not_remove: continue sites[site] = self.sites[site] - self.sites = sites + self.sites = sites def site_name_list(self): """Get Site Name List. diff --git a/tests/conftest.py b/tests/conftest.py index 69fce756c6..b78daeea52 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,27 +4,43 @@ import pytest from sherlock_project.sites import SitesInformation + def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]: - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions) - sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj} + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ), + honor_exclusions=honor_exclusions, + ) + sites_iterable: dict[str, dict[str, str]] = { + site.name: site.information for site in sites_obj + } return sites_iterable + @pytest.fixture() def sites_obj(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ) + ) yield sites_obj + @pytest.fixture(scope="session") def sites_info(): yield fetch_local_manifest() + @pytest.fixture(scope="session") def remote_schema(): - schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json' + schema_url: str = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json" with urllib.request.urlopen(schema_url) as remoteschema: schemadat = json.load(remoteschema) yield schemadat + def pytest_addoption(parser): parser.addoption( "--chunked-sites", @@ -33,6 +49,7 @@ def pytest_addoption(parser): help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.", ) + def pytest_generate_tests(metafunc): if "chunked_sites" in metafunc.fixturenames: sites_info = fetch_local_manifest(honor_exclusions=False) @@ -40,9 +57,12 @@ def pytest_generate_tests(metafunc): # Ingest and apply site selections site_filter: str | None = metafunc.config.getoption("--chunked-sites") if site_filter: - selected_sites: list[str] = [site.strip() for site in site_filter.split(",")] + selected_sites: list[str] = [ + site.strip() for site in site_filter.split(",") + ] sites_info = { - site: data for site, data in sites_info.items() + site: data + for site, data in sites_info.items() if site in selected_sites } diff --git a/tests/few_test_basic.py b/tests/few_test_basic.py index f704032263..9ee48f879a 100644 --- a/tests/few_test_basic.py +++ b/tests/few_test_basic.py @@ -1,7 +1,8 @@ import sherlock_project -#from sherlock.sites import SitesInformation -#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") +# from sherlock.sites import SitesInformation +# local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") + def test_username_via_message(): sherlock_project.__main__("--version") diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index c28b9dc06a..b484a68a64 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -3,36 +3,39 @@ import re import subprocess + class Interactives: - def run_cli(args:str = "") -> str: + def run_cli(args: str = "") -> str: """Pass arguments to Sherlock as a normal user on the command line""" # Adapt for platform differences (Windows likes to be special) if platform.system() == "Windows": - command:str = f"py -m sherlock_project {args}" + command: str = f"py -m sherlock_project {args}" else: - command:str = f"sherlock {args}" + command: str = f"sherlock {args}" - proc_out:str = "" + proc_out: str = "" try: - proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + proc_out = subprocess.check_output( + command, shell=True, stderr=subprocess.STDOUT + ) return proc_out.decode() except subprocess.CalledProcessError as e: raise InteractivesSubprocessError(e.output.decode()) - def walk_sherlock_for_files_with(pattern: str) -> list[str]: """Check all files within the Sherlock package for matching patterns""" - pattern:re.Pattern = re.compile(pattern) - matching_files:list[str] = [] + pattern: re.Pattern = re.compile(pattern) + matching_files: list[str] = [] for root, dirs, files in os.walk("sherlock_project"): for file in files: - file_path = os.path.join(root,file) + file_path = os.path.join(root, file) if "__pycache__" in file_path: continue - with open(file_path, 'r', errors='ignore') as f: + with open(file_path, "r", errors="ignore") as f: if pattern.search(f.read()): matching_files.append(file_path) return matching_files + class InteractivesSubprocessError(Exception): pass diff --git a/tests/test_manifest.py b/tests/test_manifest.py index b73e92408f..b40bfa8970 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,17 +3,18 @@ import pytest from jsonschema import validate + def test_validate_manifest_against_local_schema(): """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" - json_relative: str = '../sherlock_project/resources/data.json' - schema_relative: str = '../sherlock_project/resources/data.schema.json' + json_relative: str = "../sherlock_project/resources/data.json" + schema_relative: str = "../sherlock_project/resources/data.schema.json" json_path: str = os.path.join(os.path.dirname(__file__), json_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) - with open(schema_path, 'r') as f: + with open(schema_path, "r") as f: schemadat = json.load(f) validate(instance=jsondat, schema=schemadat) @@ -22,18 +23,22 @@ def test_validate_manifest_against_local_schema(): @pytest.mark.online def test_validate_manifest_against_remote_schema(remote_schema): """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" - json_relative: str = '../sherlock_project/resources/data.json' + json_relative: str = "../sherlock_project/resources/data.json" json_path: str = os.path.join(os.path.dirname(__file__), json_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) validate(instance=jsondat, schema=remote_schema) + # Ensure that the expected values are beind returned by the site list -@pytest.mark.parametrize("target_name,target_expected_err_type", [ - ('GitHub', 'status_code'), - ('GitLab', 'message'), -]) -def test_site_list_iterability (sites_info, target_name, target_expected_err_type): - assert sites_info[target_name]['errorType'] == target_expected_err_type +@pytest.mark.parametrize( + "target_name,target_expected_err_type", + [ + ("GitHub", "status_code"), + ("GitLab", "message"), + ], +) +def test_site_list_iterability(sites_info, target_name, target_expected_err_type): + assert sites_info[target_name]["errorType"] == target_expected_err_type diff --git a/tests/test_probes.py b/tests/test_probes.py index 11fc8f8377..fb66cd4683 100644 --- a/tests/test_probes.py +++ b/tests/test_probes.py @@ -5,7 +5,7 @@ from sherlock_project.sherlock import sherlock from sherlock_project.notify import QueryNotify from sherlock_project.result import QueryStatus -#from sherlock_interactives import Interactives +# from sherlock_interactives import Interactives def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: @@ -16,90 +16,113 @@ def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: username=username, site_data=site_data, query_notify=query_notify, - )[site]['status'].status + )[site]["status"].status @pytest.mark.online class TestLiveTargets: """Actively test probes against live and trusted targets""" + # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitLab', 'ppfeister'), - ('AllMyLinks', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitLab", "ppfeister"), + ("AllMyLinks", "blue"), + ], + ) def test_known_positives_via_message(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitHub', 'ppfeister'), - ('GitHub', 'sherlock-project'), - ('Docker Hub', 'ppfeister'), - ('Docker Hub', 'sherlock'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitHub", "ppfeister"), + ("GitHub", "sherlock-project"), + ("Docker Hub", "ppfeister"), + ("Docker Hub", "sherlock"), + ], + ) def test_known_positives_via_status_code(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('Keybase', 'blue'), - ('devRant', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("Keybase", "blue"), + ("devRant", "blue"), + ], + ) def test_known_positives_via_response_url(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitLab', 255), - ('Codecademy', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitLab", 255), ("Codecademy", 30)]) def test_likely_negatives_via_message(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." - + assert status is QueryStatus.AVAILABLE, ( + f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + ) # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitHub', 39), - ('Docker Hub', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitHub", 39), ("Docker Hub", 30)]) def test_likely_negatives_via_status_code(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + assert status is QueryStatus.AVAILABLE, ( + f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + ) def test_username_illegal_regex(sites_info): - site: str = 'BitBucket' - invalid_handle: str = '*#$Y&*JRE' - pattern = re.compile(sites_info[site]['regexCheck']) + site: str = "BitBucket" + invalid_handle: str = "*#$Y&*JRE" + pattern = re.compile(sites_info[site]["regexCheck"]) # Ensure that the username actually fails regex before testing sherlock assert pattern.match(invalid_handle) is None - assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL - + assert ( + simple_query(sites_info=sites_info, site=site, username=invalid_handle) + is QueryStatus.ILLEGAL + ) diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b50..84d7ed414a 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -3,41 +3,56 @@ from sherlock_interactives import Interactives from sherlock_interactives import InteractivesSubprocessError + def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = "Pornhub" assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} # Parametrized sites should *not* include Motherless, which is acting as the control -@pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], -]) +@pytest.mark.parametrize( + "nsfwsites", + [ + ["Pornhub"], + ["Pornhub", "Xvideos"], + ], +) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites) for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} - assert 'Motherless' not in {site.name: site.information for site in sites_obj} + assert "Motherless" not in {site.name: site.information for site in sites_obj} + def test_wildcard_username_expansion(): - assert sherlock.check_for_parameter('test{?}test') is True - assert sherlock.check_for_parameter('test{.}test') is False - assert sherlock.check_for_parameter('test{}test') is False - assert sherlock.check_for_parameter('testtest') is False - assert sherlock.check_for_parameter('test{?test') is False - assert sherlock.check_for_parameter('test?}test') is False - assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] - - -@pytest.mark.parametrize('cliargs', [ - '', - '--site urghrtuight --egiotr', - '--', -]) + assert sherlock.check_for_parameter("test{?}test") is True + assert sherlock.check_for_parameter("test{.}test") is False + assert sherlock.check_for_parameter("test{}test") is False + assert sherlock.check_for_parameter("testtest") is False + assert sherlock.check_for_parameter("test{?test") is False + assert sherlock.check_for_parameter("test?}test") is False + assert sherlock.multiple_usernames("test{?}test") == [ + "test_test", + "test-test", + "test.test", + ] + + +@pytest.mark.parametrize( + "cliargs", + [ + "", + "--site urghrtuight --egiotr", + "--", + ], +) def test_no_usernames_provided(cliargs): - with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): + with pytest.raises( + InteractivesSubprocessError, + match=r"error: the following arguments are required: USERNAMES", + ): Interactives.run_cli(cliargs) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 33922c5e9d..91ff00d005 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -7,26 +7,36 @@ from sherlock_project.result import QueryResult, QueryStatus -FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit +FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable) -FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry +FALSE_POSITIVE_DEFAULT_PATTERN: str = ( + r"^[a-zA-Z0-9]{7,20}$" # Used in absence of a regexCheck entry +) -def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str: +def set_pattern_upper_bound( + pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND +) -> str: """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`.""" - def replace_upper_bound(match: re.Match) -> str: # type: ignore - lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore + + def replace_upper_bound(match: re.Match) -> str: # type: ignore + lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore nonlocal upper_bound - upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 - return f'{{{lower_bound},{upper_bound}}}' + upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 + return f"{{{lower_bound},{upper_bound}}}" - pattern = re.sub(r'(? QueryStatus: + +def false_positive_check( + sites_info: dict[str, dict[str, str]], site: str, pattern: str +) -> QueryStatus: """Check if a site is likely to produce false positives.""" status: QueryStatus = QueryStatus.UNKNOWN @@ -38,13 +48,17 @@ def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, patte username=username, site_data=sites_info, query_notify=query_notify, - )[site]['status'] - - if not hasattr(result, 'status'): - raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") - if type(result.status) is not QueryStatus: # type: ignore - raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore - status = result.status # type: ignore + )[site]["status"] + + if not hasattr(result, "status"): + raise TypeError( + f"Result for site {site} does not have 'status' attribute. Actual result: {result}" + ) + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError( + f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}" + ) # type: ignore + status = result.status # type: ignore if status in (QueryStatus.AVAILABLE, QueryStatus.WAF): return status @@ -52,36 +66,42 @@ def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, patte return status -def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus: +def false_negative_check( + sites_info: dict[str, dict[str, str]], site: str +) -> QueryStatus: """Check if a site is likely to produce false negatives.""" status: QueryStatus = QueryStatus.UNKNOWN query_notify: QueryNotify = QueryNotify() result: QueryResult | str = sherlock( - username=sites_info[site]['username_claimed'], + username=sites_info[site]["username_claimed"], site_data=sites_info, query_notify=query_notify, - )[site]['status'] - - if not hasattr(result, 'status'): - raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") - if type(result.status) is not QueryStatus: # type: ignore - raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore - status = result.status # type: ignore + )[site]["status"] + + if not hasattr(result, "status"): + raise TypeError( + f"Result for site {site} does not have 'status' attribute. Actual result: {result}" + ) + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError( + f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}" + ) # type: ignore + status = result.status # type: ignore return status + @pytest.mark.validate_targets @pytest.mark.online class Test_All_Targets: - @pytest.mark.validate_targets_fp def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-positive inducting targets.""" pattern: str for site in chunked_sites: try: - pattern = chunked_sites[site]['regexCheck'] + pattern = chunked_sites[site]["regexCheck"] except KeyError: pattern = FALSE_POSITIVE_DEFAULT_PATTERN @@ -89,12 +109,15 @@ def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]): pattern = set_pattern_upper_bound(pattern) result: QueryStatus = false_positive_check(chunked_sites, site, pattern) - assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + assert result is QueryStatus.AVAILABLE, ( + f"{site} produced false positive with pattern {pattern}, result was {result}" + ) @pytest.mark.validate_targets_fn def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-negative inducting targets.""" for site in chunked_sites: result: QueryStatus = false_negative_check(chunked_sites, site) - assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}" - + assert result is QueryStatus.CLAIMED, ( + f"{site} produced false negative, result was {result}" + ) diff --git a/tests/test_version.py b/tests/test_version.py index 2de64dddd0..ba3add659a 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,13 +2,14 @@ from sherlock_interactives import Interactives import sherlock_project + def test_versioning() -> None: # Ensure __version__ matches version presented to the user assert sherlock_project.__version__ in Interactives.run_cli("--version") # Ensure __init__ is single source of truth for __version__ in package # Temporarily allows sherlock.py so as to not trigger early upgrades - found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *') - expected:list = [ + found: list = Interactives.walk_sherlock_for_files_with(r"__version__ *= *") + expected: list = [ # Normalization is REQUIRED for Windows ( / vs \ ) os.path.normpath("sherlock_project/__init__.py"), ] diff --git a/tox.ini b/tox.ini index 8c43ac3014..055083ad15 100644 --- a/tox.ini +++ b/tox.ini @@ -28,11 +28,12 @@ commands = pytest -v -m "not online" [testenv:lint] -description = Lint with Ruff +description = Lint and format check with Ruff deps = ruff commands = ruff check + ruff format --check [gh-actions] python =