Skip to content

Commit aecfc1c

Browse files
VinciGit00claude
andcommitted
feat: replace stealth/render_js booleans with FetchMode enum in FetchConfig
Align FetchConfig with the v2 API schema. Instead of separate `stealth` and `render_js` boolean fields, use a single `mode` enum with values: auto, fast, js, direct+stealth, js+stealth. Also rename `wait_ms` to `wait` and add `timeout` field to match the API contract. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8a316b0 commit aecfc1c

9 files changed

Lines changed: 110 additions & 65 deletions

examples/crawl_with_fetch_config_example.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Crawl with custom fetch configuration.
33
4-
Use FetchConfig to enable stealth mode, JS rendering, etc. for all
4+
Use FetchConfig to set the fetch mode, wait time, etc. for all
55
pages during the crawl.
66
"""
77

@@ -17,9 +17,8 @@
1717
max_pages=10,
1818
format="html",
1919
fetch_config=FetchConfig(
20-
stealth=True,
21-
render_js=True,
22-
wait_ms=1000,
20+
mode="js",
21+
wait=1000,
2322
),
2423
)
2524
print("Crawl started:", json.dumps(job, indent=2))

examples/extract_with_fetch_config_example.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Extract data from a JavaScript-heavy page using FetchConfig.
33
4-
Use FetchConfig to enable stealth mode, JS rendering, scrolling,
4+
Use FetchConfig to set the fetch mode, wait time, scrolling,
55
and other options needed for dynamic pages.
66
"""
77

@@ -15,9 +15,8 @@
1515
url="https://example.com",
1616
prompt="Extract all visible text content",
1717
fetch_config=FetchConfig(
18-
stealth=True,
19-
render_js=True,
20-
wait_ms=2000,
18+
mode="js+stealth",
19+
wait=2000,
2120
scrolls=3,
2221
),
2322
)

examples/monitor_with_config_example.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414
prompt="Extract the top 5 news headlines with their dates",
1515
cron="0 */6 * * *", # Every 6 hours
1616
fetch_config=FetchConfig(
17-
stealth=True,
18-
render_js=True,
19-
wait_ms=2000,
17+
mode="js+stealth",
18+
wait=2000,
2019
),
2120
llm_config=LlmConfig(
2221
temperature=0.1,

examples/scrape_with_fetch_config_example.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
"""
22
Scrape a webpage with custom fetch configuration.
33
4-
FetchConfig allows you to control stealth mode, JavaScript rendering,
4+
FetchConfig allows you to control the fetch mode (proxy strategy),
55
wait times, cookies, headers, country-based geolocation, and more.
6+
7+
Available modes:
8+
- auto: Automatically selects the best provider chain (default)
9+
- fast: Direct HTTP fetch, fastest option, no JS rendering
10+
- js: Headless browser rendering for JavaScript-heavy pages
11+
- direct+stealth: Residential proxy with stealth headers (no JS)
12+
- js+stealth: JS rendering combined with stealth/residential proxy
613
"""
714

815
import json
@@ -15,9 +22,8 @@
1522
"https://example.com",
1623
format="markdown",
1724
fetch_config=FetchConfig(
18-
stealth=True,
19-
render_js=True,
20-
wait_ms=3000,
25+
mode="js+stealth",
26+
wait=3000,
2127
headers={"User-Agent": "MyBot/1.0"},
2228
cookies={"session": "abc123"},
2329
country="us",

scrapegraph-py/MIGRATION_V2.md

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,11 @@ response = client.extract(
9797
prompt="Extract the main heading and description",
9898
output_schema=MyPydanticModel,
9999
fetch_config=FetchConfig(
100+
mode="js+stealth",
100101
headers={"User-Agent": "MyBot"},
101102
cookies={"session": "abc123"},
102103
scrolls=3,
103-
render_js=True,
104-
stealth=True,
105-
wait_ms=2000,
104+
wait=2000,
106105
),
107106
)
108107
```
@@ -115,9 +114,9 @@ response = client.extract(
115114
| `headers` | `fetch_config=FetchConfig(headers=...)` |
116115
| `cookies` | `fetch_config=FetchConfig(cookies=...)` |
117116
| `number_of_scrolls` | `fetch_config=FetchConfig(scrolls=...)` |
118-
| `render_heavy_js` | `fetch_config=FetchConfig(render_js=...)` |
119-
| `stealth` | `fetch_config=FetchConfig(stealth=...)` |
120-
| `wait_ms` | `fetch_config=FetchConfig(wait_ms=...)` |
117+
| `render_heavy_js` | `fetch_config=FetchConfig(mode="js")` or `mode="js+stealth"` |
118+
| `stealth` | `fetch_config=FetchConfig(mode="direct+stealth")` or `mode="js+stealth"` |
119+
| `wait_ms` | `fetch_config=FetchConfig(wait=...)` |
121120
| `mock` | Removed |
122121
| `plain_text` | Removed |
123122
| `total_pages` | Removed |
@@ -163,7 +162,7 @@ response = client.search(
163162
| `num_results` | `num_results` (unchanged) |
164163
| `output_schema` | `output_schema` (unchanged) |
165164
| `extraction_mode` | Removed (always AI extraction) |
166-
| `stealth` | Removed |
165+
| `stealth` | Removed (use `fetch_config=FetchConfig(mode=...)` on other endpoints) |
167166
| `location_geo_code` | Removed |
168167
| `time_range` | Removed |
169168
| `mock` | Removed |
@@ -211,9 +210,8 @@ response = client.scrape(
211210
"https://example.com",
212211
format="markdown",
213212
fetch_config=FetchConfig(
214-
render_js=True,
215-
stealth=True,
216-
wait_ms=2000,
213+
mode="js+stealth",
214+
wait=2000,
217215
headers={"User-Agent": "MyBot"},
218216
),
219217
)
@@ -222,11 +220,11 @@ response = client.scrape(
222220
| v1 parameter | v2 equivalent |
223221
|---|---|
224222
| `website_url` | `url` (positional) |
225-
| `render_heavy_js` | `fetch_config=FetchConfig(render_js=...)` |
223+
| `render_heavy_js` | `fetch_config=FetchConfig(mode="js")` or `mode="js+stealth"` |
226224
| `branding` | `format="branding"` |
227225
| `headers` | `fetch_config=FetchConfig(headers=...)` |
228-
| `stealth` | `fetch_config=FetchConfig(stealth=...)` |
229-
| `wait_ms` | `fetch_config=FetchConfig(wait_ms=...)` |
226+
| `stealth` | `fetch_config=FetchConfig(mode="direct+stealth")` or `mode="js+stealth"` |
227+
| `wait_ms` | `fetch_config=FetchConfig(wait=...)` |
230228
| `mock` | Removed |
231229
| `return_toon` | Removed |
232230

@@ -254,7 +252,7 @@ response = client.markdownify(
254252
response = client.scrape(
255253
"https://example.com",
256254
format="markdown",
257-
fetch_config=FetchConfig(render_js=True, stealth=True),
255+
fetch_config=FetchConfig(mode="js+stealth"),
258256
)
259257
```
260258

@@ -305,9 +303,8 @@ response = client.crawl.start(
305303
include_patterns=["/blog/*"],
306304
exclude_patterns=["/admin/*"],
307305
fetch_config=FetchConfig(
308-
render_js=True,
309-
stealth=True,
310-
wait_ms=1000,
306+
mode="js+stealth",
307+
wait=1000,
311308
headers={"User-Agent": "MyBot"},
312309
),
313310
)
@@ -332,7 +329,7 @@ client.crawl.resume(crawl_id)
332329
| `max_pages` | `max_pages` (unchanged) |
333330
| `include_paths` | `include_patterns` |
334331
| `exclude_paths` | `exclude_patterns` |
335-
| `headers`, `stealth`, `render_heavy_js`, `wait_ms` | Moved to `fetch_config=FetchConfig(...)` |
332+
| `headers`, `stealth`, `render_heavy_js`, `wait_ms` | Moved to `fetch_config=FetchConfig(mode=..., wait=..., headers=...)` |
336333
| `same_domain_only` | Removed |
337334
| `batch_size` | Removed |
338335
| `sitemap` | Removed |
@@ -402,7 +399,7 @@ monitor = client.monitor.create(
402399
prompt="Extract company info",
403400
cron="0 9 * * *",
404401
output_schema={"type": "object", "properties": {"name": {"type": "string"}}},
405-
fetch_config=FetchConfig(stealth=True),
402+
fetch_config=FetchConfig(mode="direct+stealth"),
406403
llm_config=LlmConfig(temperature=0.1),
407404
)
408405

@@ -530,17 +527,27 @@ Controls how pages are fetched. Used by `scrape()`, `extract()`, `crawl.start()`
530527
from scrapegraph_py import FetchConfig
531528

532529
config = FetchConfig(
533-
mock=False, # Use mock mode for testing
534-
stealth=True, # Avoid bot detection
535-
scrolls=3, # Number of page scrolls (0-100)
536-
country="us", # Geo-located requests
537-
cookies={"k": "v"}, # Cookies to send
530+
mode="js+stealth", # Fetch mode: auto, fast, js, direct+stealth, js+stealth
531+
timeout=30000, # Request timeout in ms (1000-60000)
532+
wait=2000, # Wait after page load in ms (0-30000)
538533
headers={"k": "v"}, # Custom HTTP headers
539-
wait_ms=2000, # Wait before scraping (ms)
540-
render_js=True, # Render heavy JavaScript
534+
cookies={"k": "v"}, # Cookies to send
535+
country="us", # Two-letter country code for geo-located requests
536+
scrolls=3, # Number of page scrolls (0-100)
537+
mock=False, # Use mock mode for testing
541538
)
542539
```
543540

541+
**Available fetch modes:**
542+
543+
| Mode | Description |
544+
|---|---|
545+
| `auto` | Automatically selects the best provider chain (default) |
546+
| `fast` | Direct HTTP fetch via impit — fastest, no JS rendering |
547+
| `js` | Headless browser rendering for JavaScript-heavy pages |
548+
| `direct+stealth` | Residential proxy with stealth headers, no JS |
549+
| `js+stealth` | JS rendering combined with stealth/residential proxy |
550+
544551
### LlmConfig
545552

546553
Controls the AI model used for extraction. Used by `extract()`, `search()`, and `monitor.create()`.
@@ -639,7 +646,7 @@ For a fast migration, search your codebase for these patterns:
639646
| `client.replace_scheduled_job(` | Remove |
640647
| `client.get_job_executions(` | Remove |
641648
| `return_toon=True` | Remove |
642-
| `render_heavy_js=` | `fetch_config=FetchConfig(render_js=...)` |
649+
| `render_heavy_js=` | `fetch_config=FetchConfig(mode="js")` or `mode="js+stealth"` |
643650
| `from scrapegraph_py.models.smartscraper import` | Remove |
644651
| `from scrapegraph_py.models.searchscraper import` | Remove |
645652
| `from scrapegraph_py.models.markdownify import` | Remove |

scrapegraph-py/scrapegraph_py/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from .models.monitor import MonitorCreateRequest
3333
from .models.scrape import ScrapeFormat, ScrapeRequest
3434
from .models.search import SearchRequest
35-
from .models.shared import FetchConfig, LlmConfig
35+
from .models.shared import FetchConfig, FetchMode, LlmConfig
3636

3737
__version__ = VERSION
3838

@@ -41,6 +41,7 @@
4141
"AsyncClient",
4242
# Shared config
4343
"FetchConfig",
44+
"FetchMode",
4445
"LlmConfig",
4546
# Scrape
4647
"ScrapeFormat",

scrapegraph-py/scrapegraph_py/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from .monitor import MonitorCreateRequest
99
from .scrape import GetScrapeRequest, ScrapeFormat, ScrapeRequest
1010
from .search import SearchRequest
11-
from .shared import FetchConfig, LlmConfig
11+
from .shared import FetchConfig, FetchMode, LlmConfig
1212

1313
__all__ = [
1414
# Shared
1515
"FetchConfig",
16+
"FetchMode",
1617
"LlmConfig",
1718
# Scrape
1819
"ScrapeFormat",

scrapegraph-py/scrapegraph_py/models/shared.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,38 +4,61 @@
44
These models are used across multiple endpoints for fetch and LLM configuration.
55
"""
66

7+
from enum import Enum
78
from typing import Any, Dict, Optional
89

910
from pydantic import BaseModel, Field
1011

1112

13+
class FetchMode(str, Enum):
14+
"""Fetch/proxy mode controlling how pages are retrieved.
15+
16+
- AUTO: Automatically selects the best provider chain.
17+
- FAST: Direct HTTP fetch via impit (fastest, no JS).
18+
- JS: Headless browser rendering for JavaScript-heavy pages.
19+
- DIRECT_STEALTH: Residential proxy with stealth headers (no JS).
20+
- JS_STEALTH: JS rendering combined with stealth/residential proxy.
21+
"""
22+
23+
AUTO = "auto"
24+
FAST = "fast"
25+
JS = "js"
26+
DIRECT_STEALTH = "direct+stealth"
27+
JS_STEALTH = "js+stealth"
28+
29+
1230
class FetchConfig(BaseModel):
1331
"""Configuration for how pages are fetched."""
1432

15-
mock: bool = Field(default=False, description="Use mock mode for testing")
16-
stealth: bool = Field(
17-
default=False, description="Enable stealth mode to avoid bot detection"
18-
)
19-
scrolls: Optional[int] = Field(
20-
default=None, ge=0, le=100, description="Number of scrolls to perform (0-100)"
33+
mode: FetchMode = Field(
34+
default=FetchMode.AUTO,
35+
description="Fetch/proxy mode: 'auto', 'fast', 'js', 'direct+stealth', 'js+stealth'",
2136
)
22-
country: Optional[str] = Field(
23-
default=None, description="Country code for geo-located requests (e.g. 'us')"
37+
timeout: Optional[int] = Field(
38+
default=None,
39+
ge=1000,
40+
le=60000,
41+
description="Request timeout in milliseconds (1000-60000)",
2442
)
25-
cookies: Optional[Dict[str, str]] = Field(
26-
default=None, description="Cookies to send with the request"
43+
wait: Optional[int] = Field(
44+
default=None,
45+
ge=0,
46+
le=30000,
47+
description="Milliseconds to wait after page load before scraping (0-30000)",
2748
)
2849
headers: Optional[Dict[str, str]] = Field(
2950
default=None, description="Custom HTTP headers to send with the request"
3051
)
31-
wait_ms: Optional[int] = Field(
32-
default=None,
33-
ge=0,
34-
description="Milliseconds to wait before scraping for JS rendering",
52+
cookies: Optional[Dict[str, str]] = Field(
53+
default=None, description="Cookies to send with the request"
3554
)
36-
render_js: bool = Field(
37-
default=False, description="Whether to render heavy JavaScript"
55+
country: Optional[str] = Field(
56+
default=None, description="Two-letter country code for geo-located requests (e.g. 'us')"
3857
)
58+
scrolls: Optional[int] = Field(
59+
default=None, ge=0, le=100, description="Number of scrolls to perform (0-100)"
60+
)
61+
mock: bool = Field(default=False, description="Use mock mode for testing")
3962

4063
def model_dump(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
4164
kwargs.setdefault("exclude_none", True)

scrapegraph-py/tests/test_models.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from scrapegraph_py.models.monitor import MonitorCreateRequest
99
from scrapegraph_py.models.scrape import ScrapeFormat, ScrapeRequest
1010
from scrapegraph_py.models.search import SearchRequest
11-
from scrapegraph_py.models.shared import FetchConfig, LlmConfig
11+
from scrapegraph_py.models.shared import FetchConfig, FetchMode, LlmConfig
1212

1313
# ------------------------------------------------------------------
1414
# Shared models
@@ -18,15 +18,25 @@
1818
def test_fetch_config_defaults():
1919
config = FetchConfig()
2020
assert config.mock is False
21-
assert config.stealth is False
22-
assert config.render_js is False
21+
assert config.mode == FetchMode.AUTO
2322

2423

2524
def test_fetch_config_excludes_none():
26-
config = FetchConfig(stealth=True)
25+
config = FetchConfig(mode="fast")
2726
data = config.model_dump()
2827
assert "cookies" not in data
29-
assert data["stealth"] is True
28+
assert data["mode"] == "fast"
29+
30+
31+
def test_fetch_config_all_modes():
32+
for mode in FetchMode:
33+
config = FetchConfig(mode=mode)
34+
assert config.mode == mode
35+
36+
37+
def test_fetch_config_invalid_mode():
38+
with pytest.raises(ValueError):
39+
FetchConfig(mode="invalid")
3040

3141

3242
def test_llm_config_excludes_none():

0 commit comments

Comments
 (0)