Skip to content
Merged
10 changes: 5 additions & 5 deletions examples/deployment/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
services:
html2rss:
html2rss-web:
image: html2rss/web:latest
env_file: .env
Comment thread
gildesmarais marked this conversation as resolved.
Outdated

caddy:
image: caddy:2-alpine
depends_on:
- html2rss
- html2rss-web
command:
- caddy
- reverse-proxy
- --from
- ${CADDY_HOST}
- --to
- html2rss:3000
- html2rss-web:4000
ports:
- "80:80"
- "443:443"
Expand All @@ -23,13 +23,13 @@ services:
watchtower:
image: containrrr/watchtower
depends_on:
- html2rss
- html2rss-web
- caddy
command:
- --cleanup
- --interval
- "300"
- html2rss
- html2rss-web
- caddy
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
Expand Down
79 changes: 56 additions & 23 deletions src/components/docs/DockerComposeSnippet.astro
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@ const snippets: Record<Props["variant"], string> = {
restart: unless-stopped
ports:
- "127.0.0.1:4000:4000"
env_file:
- path: .env
required: false
environment:
Comment thread
gildesmarais marked this conversation as resolved.
RACK_ENV: production
PORT: 4000
HTML2RSS_SECRET_KEY: your-generated-secret-key
HEALTH_CHECK_TOKEN: your-health-check-token
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: your-browserless-token
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}
Comment thread
gildesmarais marked this conversation as resolved.

browserless:
image: "${browserlessImage}"
Expand All @@ -35,6 +39,7 @@ const snippets: Record<Props["variant"], string> = {
productionCaddy: `services:
caddy:
image: ${caddyImage}
restart: unless-stopped
ports:
- "80:80"
- "443:443"
Expand All @@ -46,39 +51,67 @@ const snippets: Record<Props["variant"], string> = {
- --from
- \${CADDY_HOST}
- --to
- html2rss:3000
html2rss:
- html2rss-web:4000

html2rss-web:
image: ${webImage}
env_file: .env
restart: unless-stopped
env_file:
- path: .env
required: false
environment:
RACK_ENV: production
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}

browserless:
image: "${browserlessImage}"
restart: unless-stopped
environment:
PORT: 4002
CONCURRENT: 10
TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}

volumes:
caddy_data:`,
secure: `services:
html2rss:
html2rss-web:
image: ${webImage}
restart: unless-stopped
env_file:
- path: .env
required: false
environment:
RACK_ENV: production
LOG_LEVEL: warn
HEALTH_CHECK_USERNAME: your-secure-username
HEALTH_CHECK_PASSWORD: your-very-secure-password
BASE_URL: https://yourdomain.com`,
PORT: 4000
HTML2RSS_SECRET_KEY: \${HTML2RSS_SECRET_KEY:?set HTML2RSS_SECRET_KEY}
HEALTH_CHECK_TOKEN: \${HEALTH_CHECK_TOKEN:?set HEALTH_CHECK_TOKEN}
SENTRY_DSN: \${SENTRY_DSN:-}
BROWSERLESS_IO_WEBSOCKET_URL: ws://browserless:4002
BROWSERLESS_IO_API_TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}

browserless:
image: "${browserlessImage}"
restart: unless-stopped
environment:
PORT: 4002
CONCURRENT: 10
TOKEN: \${BROWSERLESS_IO_API_TOKEN:?set BROWSERLESS_IO_API_TOKEN}`,
watchtower: `services:
watchtower:
image: ${watchtowerImage}
depends_on:
- html2rss
- caddy
command:
- --cleanup
- --interval
- "300"
- html2rss
- caddy
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
restart: unless-stopped`,
- /var/run/docker.sock:/var/run/docker.sock
# Optional for private registries only:
# - "\${HOME}/.docker/config.json:/config.json:ro"
command: --cleanup --interval 7200`,
Comment thread
gildesmarais marked this conversation as resolved.
Outdated
resourceGuardrails: `services:
html2rss:
html2rss-web:
image: ${webImage}
deploy:
resources:
Expand Down
50 changes: 12 additions & 38 deletions src/content/docs/get-involved/self-hosting.mdx
Original file line number Diff line number Diff line change
@@ -1,54 +1,28 @@
---
title: "Self-Host Your Own Instance"
description: "Take control of your information diet. Host your own html2rss instance and join the decentralized web movement."
description: "Start self-hosting with the current html2rss-web setup and deployment docs."
sidebar:
order: 3
---

Turn any website into an RSS feed. Self-host your own instance to take back control of your information diet and help the html2rss ecosystem grow for everyone.
This page is the short routing point for self-hosting. The current setup and deployment instructions live under the `html2rss-web` docs so the Docker, token, and Browserless guidance only exists in one place.

## Before You Begin
## Recommended Path

This guide walks you through running a production-ready instance that friends, teams, or communities can rely on. You'll need:
1. **[Run html2rss-web locally](/web-application/getting-started/)** to verify your own instance with an included feed first.
2. **[Deploy html2rss-web to production](/web-application/how-to/deployment/)** when you are ready to expose or operate it.
3. **[Use automatic feed generation](/web-application/how-to/use-automatic-feed-generation/)** only if you want the token-gated page-URL workflow.

- A server you control (a VPS, home lab, or cloud instance) with Docker support.
- Comfort running a few terminal commands and editing configuration files.
## What To Expect

If that feels new, start with the [Getting Started guide](/web-application/getting-started/) for a friendly local install. It introduces the same concepts at a slower pace. When you're ready to go live, come back here and review the full [Deployment & Production guide](/web-application/how-to/deployment) for sizing tips, proxy examples, and hardening advice.

Before you deploy, double-check this quick checklist:

- Docker Engine and Docker Compose Plugin are installed on the host.
- Ports 80/443 (or the ports used by your TLS terminator) are open to the internet if you plan to serve other users.
- You can publish DNS for your chosen domain.

## Deployment Overview

1. Generate your `docker-compose.yml` and `config/feeds.yml` by following [Step 2 of the Getting Started guide](/web-application/getting-started/#step-2-create-the-configuration-file), then copy the resulting files into your deployment directory.
2. Create an `.env` file with production credentials and the values documented in the [environment reference](/web-application/reference/env-variables). Generate new secrets (`openssl rand -hex 32`) and avoid reusing the samples from local testing.
3. Adjust the compose file to match your host (volumes, proxy service, watchtower, resource limits). The [deployment guide](/web-application/how-to/deployment) shows complete examples for Caddy, health-check protection, and automatic updates.
4. Start the stack with `docker compose up -d` and verify the application is reachable at your chosen domain or internal endpoint.

For extra reliability, integrate the instance with your existing reverse proxy, DNS, or platform tooling rather than running it ad hoc on a laptop. Treat it like any other production service so readers can trust it.

## Harden & Secure

- Follow the [Secure Your Instance](/web-application/how-to/deployment#secure-your-instance) checklist to lock down credentials, TLS, and network access.
- Enforce HTTPS by configuring a reverse proxy (see [Option A: Caddy](/web-application/how-to/deployment#option-a-caddy-automatic-https)) or your preferred terminator. If you manage certificates separately, document the renewal procedure alongside your deployment scripts.
- Review the [production preparation guidelines](/web-application/how-to/deployment#prepare-for-production) and keep secrets outside of version control.

## Monitor & Maintain

- Point your uptime monitor at `/health_check.txt` and review container logs regularly. The [Operate & Monitor](/web-application/how-to/deployment#operate--monitor) section outlines suggested thresholds.
- Automate updates with Watchtower (a Docker container that updates running containers) or your container management platform to receive the latest html2rss-web releases quickly.
- Track storage usage for feed cache volumes and prune unused images. Schedule periodic configuration reviews so feeds and credentials remain accurate.
- `html2rss-web` is the recommended self-hosted product surface.
- Included feeds are the lowest-maintenance way to prove a deployment.
- Automatic feed generation is disabled by default in production.
- The generated API contract is published as OpenAPI at `/openapi.yaml`.
- Custom config work belongs in the core `html2rss` docs and JSON Schema.

## Share Your Instance

Running a reliable deployment benefits the broader community. Share your server with the broader community by adding it to the [community instance list](https://github.com/html2rss/html2rss-web/wiki/Instances) once it is stable and you are ready for other readers. Include details such as uptime expectations, moderation policy, and contact information so people know what to expect.

Thanks for investing the time to share html2rss with others. Each new instance expands the open web and helps readers stay in control of the stories they follow.

## License

[MIT](https://github.com/html2rss/html2rss/blob/main/LICENSE)
6 changes: 6 additions & 0 deletions src/content/docs/ruby-gem/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ sidebar:

This section provides comprehensive documentation for the `html2rss` Ruby gem.

If you are looking for the stable machine-readable contract for config authoring, use the JSON Schema exported by the core repo:

- Repository file: [`schema/html2rss-config.schema.json`](https://github.com/html2rss/html2rss/blob/master/schema/html2rss-config.schema.json)
- CLI export: `html2rss schema`
- Runtime validation: `html2rss validate config.yml`

## Getting Started

If you are getting started with `html2rss`, we recommend starting with the [tutorials](/ruby-gem/tutorials).
Expand Down
54 changes: 54 additions & 0 deletions src/content/docs/ruby-gem/reference/cli-reference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,60 @@ html2rss auto https://example.com/articles --items_selector ".post-card"

Command: `html2rss auto URL`

#### URL Surface Guidance For `auto`

`auto` works best when the input URL already exposes a server-rendered list of entries.

- High-success surfaces:
- newsroom or press listing pages
- blog/category/tag listing pages
- changelog/release notes/update listing pages
- paginated archive/list views
- Low-success surfaces:
- generic homepages with heavy promo/navigation chrome
- search results pages
- client-rendered app shells (`#app`, `#root`, `#__next`, etc.)

When possible, pass a direct listing/update URL instead of a top-level homepage or app entrypoint.

#### Failure Outcomes You Should Expect

When no extractable items are found, `auto` now classifies likely causes instead of only returning a generic message:

- `blocked surface likely (anti-bot or interstitial)`:
- retry with `--strategy browserless`
- try a more specific public listing URL
- `app-shell surface detected`:
- retry with `--strategy browserless`
- switch to a direct listing/update URL
- `unsupported extraction surface for auto mode`:
- switch to listing/changelog/category URLs
- use explicit selectors in a feed config

Known anti-bot interstitial responses (for example Cloudflare challenge pages) are surfaced explicitly as blocked-surface errors.

#### Browserless Setup And Diagnostics (CLI)

`browserless` is opt-in for CLI usage.

```bash
# Start a local Browserless container (default local token)
docker run --rm -p 3000:3000 -e "CONCURRENT=10" -e "TOKEN=6R0W53R135510" ghcr.io/browserless/chromium

# Run auto with Browserless
BROWSERLESS_IO_WEBSOCKET_URL="ws://127.0.0.1:3000" \
BROWSERLESS_IO_API_TOKEN="6R0W53R135510" \
html2rss auto https://example.com/updates --strategy browserless
```

If you see `Browserless connection failed`, check:

- `BROWSERLESS_IO_WEBSOCKET_URL` points to a reachable Browserless endpoint
- `BROWSERLESS_IO_API_TOKEN` matches the Browserless `TOKEN`
- the Browserless service is running and reachable from your shell environment

For custom Browserless endpoints, `BROWSERLESS_IO_API_TOKEN` is required.

### Feed

Loads a YAML config, builds the feed, and prints the RSS XML to stdout.
Expand Down
14 changes: 14 additions & 0 deletions src/content/docs/ruby-gem/reference/strategy.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ The `strategy` key defines how `html2rss` fetches a website's content.

`strategy` is a top-level config key. Request-specific controls live under `request`.

Use `faraday` first for direct newsroom/listing/changelog pages. Prefer `browserless` when the target is client-rendered, protected by anti-bot checks, or otherwise requires JavaScript to expose article links.

## `browserless`

To use the `browserless` strategy, you need a running instance of [Browserless.io](https://www.browserless.io/).
Expand Down Expand Up @@ -126,6 +128,18 @@ html2rss feed my_config.yml --max-redirects 5 --max-requests 6
html2rss feed my_config.yml
```

### Browserless Troubleshooting

If Browserless cannot connect, html2rss surfaces a `Browserless connection failed (...)` error with endpoint/token hints.

Check these first:

- `BROWSERLESS_IO_WEBSOCKET_URL` is reachable from where html2rss runs
- `BROWSERLESS_IO_API_TOKEN` matches your Browserless `TOKEN`
- your Browserless service is running and accepting connections

For custom Browserless websocket endpoints, `BROWSERLESS_IO_API_TOKEN` is mandatory. The local default endpoint (`ws://127.0.0.1:3000`) can use the default local token `6R0W53R135510`.

---

For detailed documentation on the Ruby API, see the [official YARD documentation](https://www.rubydoc.info/gems/html2rss).
Loading
Loading