Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions courlan/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
status_forcelist=[
429,
499,
500,
502,
503,
504,
Expand Down Expand Up @@ -61,11 +60,19 @@ def redirection_test(url: str) -> str:
try:
rhead = HTTP_POOL.request("HEAD", url) # type:ignore[no-untyped-call]
except Exception as err:
LOGGER.exception("unknown error: %s %s", url, err)
LOGGER.exception("unknown HEAD error: %s %s", url, err)
else:
# response
if rhead.status in ACCEPTABLE_CODES:
LOGGER.debug("result found: %s %s", rhead.geturl(), rhead.status)
return rhead.geturl() # type: ignore
# Some sites don't implement HEAD, fallback to GET
elif rhead.status == 500:
try:
rhead = HTTP_POOL.request("GET", url) # type:ignore[no-untyped-call]
if rhead.status in ACCEPTABLE_CODES:
LOGGER.debug("result found with GET: %s %s", rhead.geturl(), rhead.status)
return rhead.geturl() # type: ignore
except Exception as err:
LOGGER.exception("unknown GET error: %s %s", url, err)
# else:
raise ValueError(f"cannot reach URL: ${url}")
34 changes: 33 additions & 1 deletion tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,14 +750,46 @@ def test_domain_filter():

def test_urlcheck_redirects():
"Test redirection checks."
assert check_url("https://httpbun.org/redirect-to?url=http%3A%2F%2Fexample.org", with_redirects=True) == (
assert check_url("https://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.org", with_redirects=True) == (
"http://example.org",
"example.org",
)
assert check_url("https://httpbun.org/status/404", with_redirects=True) is None
assert check_url("https://www.ht.or", with_redirects=True) is None


def test_redirection_fallback_from_head_to_get():
"""Test that redirection_test falls back to GET when HEAD returns 500."""
from unittest.mock import Mock, patch
from courlan.network import redirection_test

test_url = "https://example.org/test"

# Create mock responses
mock_head_response = Mock()
mock_head_response.status = 500
mock_head_response.geturl = Mock(return_value=test_url)

mock_get_response = Mock()
mock_get_response.status = 200
mock_get_response.geturl = Mock(return_value=test_url)

# Patch HTTP_POOL.request to return 500 for HEAD, then 200 for GET
with patch('courlan.network.HTTP_POOL.request') as mock_request:
# Set up side_effect to return different responses for HEAD and GET
mock_request.side_effect = [mock_head_response, mock_get_response]

result = redirection_test(test_url)

# Verify the function returned the correct URL
assert result == test_url

# Verify that both HEAD and GET were called
assert mock_request.call_count == 2
assert mock_request.call_args_list[0][0] == ("HEAD", test_url)
assert mock_request.call_args_list[1][0] == ("GET", test_url)


def test_urlutils():
"""Test URL manipulation tools"""
# domain extraction
Expand Down
Loading