Skip to content

Commit d9c9e28

Browse files
committed
Merge main into pre/beta (align pre/beta with main)
2 parents 8e31b6a + 2dd5809 commit d9c9e28

73 files changed

Lines changed: 19254 additions & 3110 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
lint:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
- uses: astral-sh/setup-uv@v3
15+
- run: uv sync --frozen
16+
- run: uv run ruff check src/ tests/
17+
- run: uv run ruff format --check src/
18+
19+
test:
20+
runs-on: ubuntu-latest
21+
strategy:
22+
matrix:
23+
python: ["3.12", "3.14"]
24+
steps:
25+
- uses: actions/checkout@v4
26+
- uses: astral-sh/setup-uv@v3
27+
- run: uv python install ${{ matrix.python }}
28+
- run: uv sync --python ${{ matrix.python }}
29+
- run: uv run pytest tests/test_client.py -v

.github/workflows/codeql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@ jobs:
6060
# Prefix the list here with "+" to use these queries and those in the config file.
6161

6262
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
63-
# queries: security-extended,security-and-quality
63+
# queries: security-extended,security-and-quality

.github/workflows/dependency-review.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ jobs:
3636
comment-summary-in-pr: always
3737
# fail-on-severity: moderate
3838
# deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later
39-
# retry-on-snapshot-warnings: true
39+
# retry-on-snapshot-warnings: true
Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
1-
# This workflow will upload a Python Package using Twine when a release is created
2-
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3-
4-
name: Upload Python Package
1+
name: Publish to PyPI
52

63
on:
74
release:
85
types: [published]
96

107
jobs:
11-
deploy:
12-
8+
test:
139
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/checkout@v4
12+
- uses: astral-sh/setup-uv@v3
13+
- run: uv sync --frozen
14+
- run: uv run pytest tests/test_client.py -v
1415

16+
publish:
17+
needs: test
18+
runs-on: ubuntu-latest
1519
steps:
16-
- uses: actions/checkout@v4
17-
- name: Set up Python
18-
uses: actions/setup-python@v5
19-
with:
20-
python-version: '3.x'
21-
- name: Install dependencies
22-
run: |
23-
python -m pip install --upgrade pip
24-
pip install setuptools wheel twine
25-
- name: Build and publish
26-
env:
27-
TWINE_USERNAME: mvincig11
28-
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29-
run: |
30-
git fetch --all --tags
31-
python setup.py sdist bdist_wheel
32-
twine upload dist/*
20+
- uses: actions/checkout@v4
21+
- uses: astral-sh/setup-uv@v3
22+
23+
- name: Build
24+
run: uv build
25+
26+
- name: Publish to PyPI
27+
env:
28+
TWINE_USERNAME: __token__
29+
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
30+
run: |
31+
uv pip install twine
32+
uv run twine upload dist/*

.github/workflows/release.yml

Lines changed: 20 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,46 @@
11
name: Release
2+
23
on:
34
push:
4-
branches:
5-
- main
6-
- pre/*
5+
branches: [main]
76

87
jobs:
9-
build:
10-
name: Build
11-
runs-on: ubuntu-latest
12-
steps:
13-
- name: Install git
14-
run: |
15-
sudo apt update
16-
sudo apt install -y git
17-
- name: Install uv
18-
uses: astral-sh/setup-uv@v3
19-
- name: Install Node Env
20-
uses: actions/setup-node@v4
21-
with:
22-
node-version: 20
23-
- name: Checkout
24-
uses: actions/checkout@v4.1.1
25-
with:
26-
fetch-depth: 0
27-
persist-credentials: false
28-
- name: Build app
29-
run: |
30-
cd scrapegraph-py
31-
uv sync --frozen
32-
uv build
33-
id: build_cache
34-
if: success()
35-
- name: Cache build
36-
uses: actions/cache@v4
37-
with:
38-
path: scrapegraph-py/dist
39-
key: ${{ runner.os }}-build-${{ hashFiles('scrapegraph-py/dist/**') }}
40-
if: steps.build_cache.outputs.id != ''
41-
428
release:
43-
name: Release
449
runs-on: ubuntu-latest
45-
needs: build
46-
environment: development
47-
if: |
48-
github.event_name == 'push' && github.ref == 'refs/heads/main' ||
49-
github.event_name == 'push' && github.ref == 'refs/heads/pre/beta' ||
50-
github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'main' ||
51-
github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged && github.event.pull_request.base.ref == 'pre/beta'
5210
permissions:
5311
contents: write
5412
issues: write
5513
pull-requests: write
5614
id-token: write
5715
steps:
58-
- name: Checkout repo
59-
uses: actions/checkout@v4.1.1
16+
- uses: actions/checkout@v4
6017
with:
6118
fetch-depth: 0
6219
persist-credentials: false
63-
- name: Install uv
64-
uses: astral-sh/setup-uv@v3
65-
- name: Setup Python environment
66-
run: |
67-
cd ./scrapegraph-py
68-
uv sync
69-
- name: Restore build artifacts
70-
uses: actions/cache@v4
20+
21+
- uses: astral-sh/setup-uv@v3
22+
23+
- uses: actions/setup-node@v4
7124
with:
72-
path: ./scrapegraph-py/dist
73-
key: ${{ runner.os }}-build-${{ hashFiles('./scrapegraph-py/dist/**') }}
25+
node-version: 20
26+
27+
- name: Build
28+
run: |
29+
uv sync --frozen
30+
uv build
31+
7432
- name: Semantic Release
75-
uses: cycjimmy/semantic-release-action@v4.1.0
33+
uses: cycjimmy/semantic-release-action@v4
7634
env:
7735
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
7836
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
7937
with:
80-
working_directory: ./scrapegraph-py
8138
semantic_version: 23
8239
extra_plugins: |
8340
semantic-release-pypi@3
84-
@semantic-release/git
85-
@semantic-release/commit-analyzer@12
86-
@semantic-release/release-notes-generator@13
87-
@semantic-release/github@10
41+
@semantic-release/git
42+
@semantic-release/commit-analyzer@12
43+
@semantic-release/release-notes-generator@13
44+
@semantic-release/github@10
8845
@semantic-release/changelog@6
89-
conventional-changelog-conventionalcommits@7
46+
conventional-changelog-conventionalcommits@7

.gitignore

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,47 @@
11
.env
2-
# Ignore .DS_Store files anywhere in the repository
2+
.env.*
3+
*.csv
4+
5+
# OS
36
.DS_Store
47
**/.DS_Store
5-
*.csv
8+
9+
# Python
10+
__pycache__/
11+
*.py[cod]
12+
*$py.class
13+
*.so
14+
.Python
15+
build/
16+
dist/
17+
*.egg-info/
18+
*.egg
19+
.eggs/
20+
21+
# Virtual environments
22+
venv/
23+
.venv/
24+
env/
25+
26+
# Testing
27+
.pytest_cache/
28+
.coverage
29+
htmlcov/
30+
.tox/
31+
.nox/
32+
33+
# Linting/formatting
34+
.ruff_cache/
35+
.mypy_cache/
36+
37+
# IDE
38+
.idea/
39+
.vscode/
40+
*.swp
41+
*.swo
42+
43+
# Build artifacts
44+
*.whl
45+
46+
# Misc
47+
.bfg-report/

CLAUDE.md

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# CLAUDE.md
2+
3+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4+
5+
## Project Overview
6+
7+
**scrapegraph-py** is the official Python SDK for the ScrapeGraph AI API. It provides a Python client for intelligent web scraping powered by AI.
8+
9+
## Repository Structure
10+
11+
```
12+
scrapegraph-py/
13+
├── scrapegraph_py/ # Python SDK source
14+
├── tests/ # Test suite
15+
├── examples/ # Usage examples
16+
├── docs/ # MkDocs documentation
17+
├── cookbook/ # Tutorials and recipes
18+
└── .github/workflows/ # CI/CD
19+
```
20+
21+
## Tech Stack
22+
23+
- **Language**: Python 3.10+
24+
- **Package Manager**: uv (recommended) or pip
25+
- **Core Dependencies**: requests, pydantic, python-dotenv, aiohttp
26+
- **Testing**: pytest, pytest-asyncio, pytest-mock, aioresponses
27+
- **Code Quality**: ruff
28+
- **Build**: hatchling
29+
- **Release**: semantic-release
30+
31+
## Commands
32+
33+
```bash
34+
# Install
35+
uv sync
36+
37+
# Test
38+
uv run pytest tests/ -v
39+
40+
# Format & lint
41+
uv run ruff format src tests
42+
uv run ruff check src tests --fix
43+
44+
# Build
45+
uv build
46+
```
47+
48+
## Before completing any task
49+
50+
Always run these commands before committing or saying a task is done:
51+
52+
```bash
53+
uv run ruff format src tests
54+
uv run ruff check src tests --fix
55+
uv build
56+
uv run pytest tests/ -v
57+
```
58+
59+
No exceptions.
60+
61+
## Architecture
62+
63+
**Core Components:**
64+
65+
1. **Clients** (`scrapegraph_py/`):
66+
- `client.py` - Sync client
67+
- `async_client.py` - Async client
68+
69+
2. **Models** (`scrapegraph_py/models/`):
70+
- Pydantic models for request/response validation
71+
72+
3. **Config** (`scrapegraph_py/`):
73+
- `config.py` - API base URL, timeouts
74+
- `exceptions.py` - Custom exceptions
75+
76+
## API Endpoints
77+
78+
| Endpoint | Method | Purpose |
79+
|----------|--------|---------|
80+
| SmartScraper | `smartscraper()` | AI data extraction |
81+
| SearchScraper | `searchscraper()` | Multi-URL search |
82+
| Markdownify | `markdownify()` | HTML to Markdown |
83+
| Crawler | `crawler()` | Sitemap & crawling |
84+
| AgenticScraper | `agentic_scraper()` | Browser automation |
85+
| Scrape | `scrape()` | Basic HTML fetch |
86+
| Credits | `get_credits()` | Balance check |
87+
88+
## Adding New Endpoint
89+
90+
1. Add models in `scrapegraph_py/models/`
91+
2. Add sync method to `client.py`
92+
3. Add async method to `async_client.py`
93+
4. Export in `models/__init__.py`
94+
5. Add tests in `tests/`
95+
96+
## Environment Variables
97+
98+
- `SGAI_API_KEY` - API key for authentication
99+
100+
## Usage
101+
102+
```python
103+
from scrapegraph_py import Client
104+
105+
client = Client(api_key="your-key")
106+
response = client.smartscraper(
107+
website_url="https://example.com",
108+
user_prompt="Extract title"
109+
)
110+
print(response.result)
111+
```
112+
113+
## Links
114+
115+
- [API Docs](https://docs.scrapegraphai.com)
116+
- [PyPI](https://pypi.org/project/scrapegraph-py/)

0 commit comments

Comments
 (0)