feat(graph): extend QueryBuilder and migrate production queries #262
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Deploy | |
| on: | |
| push: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| force_deploy_all: | |
| description: 'Force deploy all services' | |
| required: false | |
| default: 'false' | |
| type: boolean | |
| force_infra: | |
| description: 'Force infrastructure apply' | |
| required: false | |
| default: 'false' | |
| type: boolean | |
| concurrency: | |
| group: deploy-${{ github.ref }} | |
| cancel-in-progress: false | |
| env: | |
| DOMAIN: engram.rawcontext.com | |
| ROOT_DOMAIN: rawcontext.com # For Vercel DNS (must be root domain) | |
| jobs: | |
| # Wait for CI to pass | |
| ci: | |
| uses: ./.github/workflows/ci.yml | |
| secrets: inherit | |
| # Detect which components changed | |
| changes: | |
| runs-on: ubuntu-latest | |
| needs: ci | |
| outputs: | |
| infra: ${{ steps.filter.outputs.infra }} | |
| api: ${{ steps.filter.outputs.api }} | |
| search: ${{ steps.filter.outputs.search }} | |
| tuner: ${{ steps.filter.outputs.tuner }} | |
| observatory: ${{ steps.filter.outputs.observatory }} | |
| console: ${{ steps.filter.outputs.console }} | |
| ingestion: ${{ steps.filter.outputs.ingestion }} | |
| memory: ${{ steps.filter.outputs.memory }} | |
| docker_compose: ${{ steps.filter.outputs.docker_compose }} | |
| caddy: ${{ steps.filter.outputs.caddy }} | |
| any_service: ${{ steps.any.outputs.result }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: dorny/paths-filter@v3 | |
| id: filter | |
| with: | |
| filters: | | |
| infra: | |
| - 'packages/infra/**' | |
| api: | |
| - 'apps/api/**' | |
| - 'packages/common/**' | |
| - 'packages/logger/**' | |
| - 'packages/events/**' | |
| - 'packages/storage/**' | |
| - 'packages/graph/**' | |
| search: | |
| - 'apps/search/**' | |
| tuner: | |
| - 'apps/tuner/**' | |
| observatory: | |
| - 'apps/observatory/**' | |
| console: | |
| - 'apps/console/**' | |
| ingestion: | |
| - 'apps/ingestion/**' | |
| - 'packages/common/**' | |
| - 'packages/logger/**' | |
| - 'packages/events/**' | |
| - 'packages/storage/**' | |
| - 'packages/parser/**' | |
| memory: | |
| - 'apps/memory/**' | |
| - 'packages/common/**' | |
| - 'packages/logger/**' | |
| - 'packages/events/**' | |
| - 'packages/storage/**' | |
| - 'packages/graph/**' | |
| docker_compose: | |
| - 'docker-compose.prod.yml' | |
| caddy: | |
| - 'configs/Caddyfile' | |
| - name: Check if any service changed | |
| id: any | |
| run: | | |
| if [[ "${{ steps.filter.outputs.api }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.search }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.tuner }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.observatory }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.console }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.ingestion }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.memory }}" == "true" ]] || \ | |
| [[ "${{ steps.filter.outputs.docker_compose }}" == "true" ]] || \ | |
| [[ "${{ inputs.force_deploy_all }}" == "true" ]]; then | |
| echo "result=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "result=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Infrastructure deployment with OpenTofu | |
| # Runs after deploy-api because it uses the API as the Terraform state backend | |
| infrastructure: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync, deploy-api] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.deploy-api.result == 'success' || needs.deploy-api.result == 'skipped') && | |
| (needs.changes.outputs.infra == 'true' || inputs.force_infra == true) | |
| environment: production | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Setup OpenTofu | |
| uses: opentofu/setup-opentofu@v1 | |
| with: | |
| tofu_version: 1.8.0 | |
| - name: Verify Secrets | |
| run: | | |
| if [ -z "${{ secrets.TF_HTTP_PASSWORD }}" ]; then | |
| echo "::error::TF_HTTP_PASSWORD secret is not set!" | |
| exit 1 | |
| fi | |
| echo "TF_HTTP_PASSWORD is set (length: $(echo -n '${{ secrets.TF_HTTP_PASSWORD }}' | wc -c))" | |
| - name: Wait for API State Backend | |
| run: | | |
| echo "Checking if API state backend is available..." | |
| # Quick check - if API responds, great. If not, continue anyway | |
| # (state backend will fail later if truly unavailable, but we can | |
| # use -lock=false and -migrate-state to bootstrap) | |
| for i in {1..6}; do | |
| if curl -sf --max-time 5 "https://api.engram.rawcontext.com/v1/health" > /dev/null 2>&1; then | |
| echo "API is healthy, proceeding with OpenTofu init" | |
| exit 0 | |
| fi | |
| echo "Attempt $i/6: API not ready, waiting 10s..." | |
| sleep 10 | |
| done | |
| echo "::warning::API state backend not available - will attempt to continue anyway" | |
| - name: OpenTofu Init | |
| working-directory: packages/infra | |
| run: | | |
| tofu init -reconfigure \ | |
| -backend-config="username=tofu" \ | |
| -backend-config="password=${{ secrets.TF_HTTP_PASSWORD }}" | |
| - name: OpenTofu Validate | |
| working-directory: packages/infra | |
| run: tofu validate | |
| - name: Import Existing Resources | |
| working-directory: packages/infra | |
| env: | |
| TF_HTTP_USERNAME: tofu | |
| TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }} | |
| TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }} | |
| TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }} | |
| TF_VAR_domain: ${{ env.ROOT_DOMAIN }} | |
| TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }} | |
| HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }} | |
| VERCEL_API_TOKEN: ${{ secrets.VERCEL_API_TOKEN }} | |
| run: | | |
| # Install hcloud CLI | |
| curl -sL https://github.com/hetznercloud/cli/releases/download/v1.49.0/hcloud-linux-amd64.tar.gz | tar xz | |
| chmod +x hcloud | |
| # Check if all resources exist in state already | |
| STATE_COUNT=$(tofu state list 2>/dev/null | wc -l || echo "0") | |
| if [ "$STATE_COUNT" -ge 8 ]; then | |
| echo "All resources appear to be in state ($STATE_COUNT items), skipping import" | |
| exit 0 | |
| fi | |
| echo "Found $STATE_COUNT items in state, checking for missing resources..." | |
| # Import Hetzner resources | |
| echo "=== Importing Hetzner resources ===" | |
| # Get SSH key ID | |
| SSH_KEY_ID=$(./hcloud ssh-key list -o noheader | grep "engram-key" | awk '{print $1}' || true) | |
| if [ -n "$SSH_KEY_ID" ]; then | |
| if ! tofu state list 2>/dev/null | grep -q "hcloud_ssh_key.engram"; then | |
| echo "Importing SSH key (ID: $SSH_KEY_ID)..." | |
| tofu import -lock=false hcloud_ssh_key.engram "$SSH_KEY_ID" || echo "SSH key import failed or already exists" | |
| fi | |
| fi | |
| # Get server ID | |
| SERVER_ID=$(./hcloud server list -o noheader | grep "engram" | awk '{print $1}' || true) | |
| if [ -n "$SERVER_ID" ]; then | |
| if ! tofu state list 2>/dev/null | grep -q "hcloud_server.engram"; then | |
| echo "Importing server (ID: $SERVER_ID)..." | |
| tofu import -lock=false hcloud_server.engram "$SERVER_ID" || echo "Server import failed or already exists" | |
| fi | |
| fi | |
| # Get firewall ID | |
| FIREWALL_ID=$(./hcloud firewall list -o noheader | grep "engram-firewall" | awk '{print $1}' || true) | |
| if [ -n "$FIREWALL_ID" ]; then | |
| if ! tofu state list 2>/dev/null | grep -q "hcloud_firewall.engram"; then | |
| echo "Importing firewall (ID: $FIREWALL_ID)..." | |
| tofu import -lock=false hcloud_firewall.engram "$FIREWALL_ID" || echo "Firewall import failed or already exists" | |
| fi | |
| # Import firewall attachment if both firewall and server exist | |
| if [ -n "$SERVER_ID" ]; then | |
| if ! tofu state list 2>/dev/null | grep -q "hcloud_firewall_attachment.engram"; then | |
| echo "Importing firewall attachment..." | |
| tofu import -lock=false hcloud_firewall_attachment.engram "$FIREWALL_ID" || echo "Firewall attachment import failed or already exists" | |
| fi | |
| fi | |
| fi | |
| # Import Vercel DNS records | |
| echo "=== Importing Vercel DNS records ===" | |
| DOMAIN="rawcontext.com" | |
| # Clear any corrupted DNS record states (domain mismatch bug) | |
| for record in apex api observatory console; do | |
| if tofu state list 2>/dev/null | grep -q "vercel_dns_record.$record"; then | |
| echo "Removing potentially corrupted state for vercel_dns_record.$record..." | |
| tofu state rm -lock=false "vercel_dns_record.$record" 2>/dev/null || true | |
| fi | |
| done | |
| # Fetch all DNS records from Vercel | |
| DNS_RECORDS=$(curl -s -H "Authorization: Bearer $VERCEL_API_TOKEN" \ | |
| "https://api.vercel.com/v4/domains/$DOMAIN/records" | jq -r '.records // []') | |
| # Import apex record (engram.rawcontext.com) | |
| if ! tofu state list 2>/dev/null | grep -q "vercel_dns_record.apex"; then | |
| APEX_ID=$(echo "$DNS_RECORDS" | jq -r '.[] | select(.name == "engram" and .type == "A") | .id' | head -1) | |
| if [ -n "$APEX_ID" ] && [ "$APEX_ID" != "null" ]; then | |
| echo "Importing apex DNS record (ID: $APEX_ID)..." | |
| tofu import -lock=false vercel_dns_record.apex "$APEX_ID" || echo "Apex DNS import failed" | |
| fi | |
| fi | |
| # Import api record (api.engram.rawcontext.com) | |
| if ! tofu state list 2>/dev/null | grep -q "vercel_dns_record.api"; then | |
| API_ID=$(echo "$DNS_RECORDS" | jq -r '.[] | select(.name == "api.engram" and .type == "A") | .id' | head -1) | |
| if [ -n "$API_ID" ] && [ "$API_ID" != "null" ]; then | |
| echo "Importing api DNS record (ID: $API_ID)..." | |
| tofu import -lock=false vercel_dns_record.api "$API_ID" || echo "API DNS import failed" | |
| fi | |
| fi | |
| # Import observatory record (observatory.engram.rawcontext.com) | |
| if ! tofu state list 2>/dev/null | grep -q "vercel_dns_record.observatory"; then | |
| OBS_ID=$(echo "$DNS_RECORDS" | jq -r '.[] | select(.name == "observatory.engram" and .type == "A") | .id' | head -1) | |
| if [ -n "$OBS_ID" ] && [ "$OBS_ID" != "null" ]; then | |
| echo "Importing observatory DNS record (ID: $OBS_ID)..." | |
| tofu import -lock=false vercel_dns_record.observatory "$OBS_ID" || echo "Observatory DNS import failed" | |
| fi | |
| fi | |
| # Import console record (console.engram.rawcontext.com) | |
| if ! tofu state list 2>/dev/null | grep -q "vercel_dns_record.console"; then | |
| CONSOLE_ID=$(echo "$DNS_RECORDS" | jq -r '.[] | select(.name == "console.engram" and .type == "A") | .id' | head -1) | |
| if [ -n "$CONSOLE_ID" ] && [ "$CONSOLE_ID" != "null" ]; then | |
| echo "Importing console DNS record (ID: $CONSOLE_ID)..." | |
| tofu import -lock=false vercel_dns_record.console "$CONSOLE_ID" || echo "Console DNS import failed" | |
| fi | |
| fi | |
| echo "Import complete. Checking state..." | |
| tofu state list || true | |
| - name: OpenTofu Plan | |
| working-directory: packages/infra | |
| env: | |
| TF_HTTP_USERNAME: tofu | |
| TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }} | |
| TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }} | |
| TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }} | |
| TF_VAR_domain: ${{ env.ROOT_DOMAIN }} | |
| TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }} | |
| TF_VAR_engram_api_client_secret: ${{ secrets.ENGRAM_API_CLIENT_SECRET }} | |
| TF_VAR_engram_search_client_secret: ${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }} | |
| TF_VAR_engram_tuner_client_secret: ${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }} | |
| TF_VAR_engram_ingestion_client_secret: ${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }} | |
| TF_VAR_engram_memory_client_secret: ${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }} | |
| TF_VAR_engram_console_client_secret: ${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }} | |
| run: | | |
| # Disable state locking - workflow concurrency already prevents parallel runs | |
| tofu plan -out=tfplan -no-color -lock=false | |
| - name: OpenTofu Apply | |
| working-directory: packages/infra | |
| env: | |
| TF_HTTP_USERNAME: tofu | |
| TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }} | |
| TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }} | |
| TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }} | |
| TF_VAR_domain: ${{ env.ROOT_DOMAIN }} | |
| TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }} | |
| TF_VAR_engram_api_client_secret: ${{ secrets.ENGRAM_API_CLIENT_SECRET }} | |
| TF_VAR_engram_search_client_secret: ${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }} | |
| TF_VAR_engram_tuner_client_secret: ${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }} | |
| TF_VAR_engram_ingestion_client_secret: ${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }} | |
| TF_VAR_engram_memory_client_secret: ${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }} | |
| TF_VAR_engram_console_client_secret: ${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }} | |
| run: | | |
| tofu apply -auto-approve -lock=false tfplan | |
| - name: Get Server IP | |
| id: server | |
| working-directory: packages/infra | |
| run: | | |
| echo "ip=$(tofu output -raw server_ip)" >> $GITHUB_OUTPUT | |
| outputs: | |
| server_ip: ${{ steps.server.outputs.ip }} | |
| # Sync files to server (runs if any service changed) | |
| sync: | |
| runs-on: ubuntu-latest | |
| needs: [changes] | |
| if: | | |
| always() && | |
| needs.changes.result == 'success' && | |
| needs.changes.outputs.any_service == 'true' | |
| environment: production | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Cleanup conflicting paths | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| # Remove conflicting paths and fix permissions | |
| sudo rm -rf /opt/engram/configs/Caddyfile 2>/dev/null || true | |
| # Ensure configs directory exists with correct ownership | |
| sudo mkdir -p /opt/engram/configs | |
| sudo chown -R engram:engram /opt/engram/configs | |
| EOF | |
| - name: Sync files to server | |
| run: | | |
| rsync -avz --delete --no-group --no-owner \ | |
| --exclude '.git' \ | |
| --exclude 'node_modules' \ | |
| --exclude '.venv' \ | |
| --exclude '__pycache__' \ | |
| --exclude '*.pyc' \ | |
| --exclude 'dist' \ | |
| --exclude 'data' \ | |
| --exclude '.turbo' \ | |
| --exclude '.next' \ | |
| --exclude '.terraform' \ | |
| --exclude 'terraform.tfstate*' \ | |
| . engram@${{ env.DOMAIN }}:/opt/engram/ | |
| - name: Create .env file on server | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} "cat > /opt/engram/.env << 'ENVEOF' | |
| POSTGRES_USER=${{ secrets.POSTGRES_USER }} | |
| POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }} | |
| POSTGRES_DB=${{ secrets.POSTGRES_DB }} | |
| HF_API_TOKEN=${{ secrets.HF_API_TOKEN }} | |
| BETTER_AUTH_SECRET=${{ secrets.BETTER_AUTH_SECRET }} | |
| GOOGLE_CLIENT_ID=${{ secrets.GOOGLE_CLIENT_ID }} | |
| GOOGLE_CLIENT_SECRET=${{ secrets.GOOGLE_CLIENT_SECRET }} | |
| ENGRAM_API_CLIENT_SECRET=${{ secrets.ENGRAM_API_CLIENT_SECRET }} | |
| ENGRAM_SEARCH_CLIENT_SECRET=${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }} | |
| ENGRAM_TUNER_CLIENT_SECRET=${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }} | |
| ENGRAM_INGESTION_CLIENT_SECRET=${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }} | |
| ENGRAM_MEMORY_CLIENT_SECRET=${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }} | |
| ENGRAM_CONSOLE_CLIENT_SECRET=${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }} | |
| ENVEOF" | |
| - name: Fix postgres permissions and run migrations | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| # Ensure postgres data directory has correct permissions (UID 70 for alpine) | |
| echo "Fixing postgres permissions..." | |
| sudo mkdir -p /var/lib/engram/postgres | |
| sudo chown -R 70:70 /var/lib/engram/postgres | |
| # Restart postgres if it's unhealthy due to permission issues | |
| if ! docker compose -f docker-compose.prod.yml exec -T postgres pg_isready -U ${POSTGRES_USER:-engram} 2>/dev/null; then | |
| echo "Postgres not ready, restarting..." | |
| docker compose -f docker-compose.prod.yml up -d postgres | |
| sleep 5 | |
| fi | |
| # Wait for postgres to be ready | |
| for i in {1..30}; do | |
| if docker compose -f docker-compose.prod.yml exec -T postgres pg_isready -U ${POSTGRES_USER:-engram} 2>/dev/null; then | |
| echo "Postgres is ready" | |
| break | |
| fi | |
| echo "Waiting for postgres... ($i/30)" | |
| sleep 2 | |
| done | |
| # Run auth migrations on the engram database | |
| cat /opt/engram/scripts/migrate-auth.sql | docker compose -f docker-compose.prod.yml exec -T postgres psql -U ${POSTGRES_USER:-engram} -d engram || echo "Migration may have already been applied" | |
| EOF | |
| # Deploy API service | |
| deploy-api: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.api == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy API | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying API..." | |
| docker compose -f docker-compose.prod.yml build api | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate api | |
| echo "Waiting for API health check..." | |
| sleep 10 | |
| if ! docker compose -f docker-compose.prod.yml ps api | grep -q "(healthy)"; then | |
| echo "=== API Container Logs ===" | |
| docker compose -f docker-compose.prod.yml logs --tail=100 api | |
| echo "===========================" | |
| fi | |
| docker compose -f docker-compose.prod.yml ps api | |
| EOF | |
| # Deploy Search service | |
| deploy-search: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.search == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Search | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Search..." | |
| docker compose -f docker-compose.prod.yml build --no-cache search | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate search | |
| echo "Waiting for Search health check..." | |
| sleep 5 | |
| docker compose -f docker-compose.prod.yml ps search | |
| EOF | |
| # Deploy Tuner service | |
| deploy-tuner: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.tuner == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Tuner | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Tuner..." | |
| docker compose -f docker-compose.prod.yml build --no-cache tuner | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate tuner | |
| echo "Waiting for Tuner health check..." | |
| sleep 10 | |
| if ! docker compose -f docker-compose.prod.yml ps tuner | grep -q "(healthy)"; then | |
| echo "=== Tuner Container Logs ===" | |
| docker compose -f docker-compose.prod.yml logs --tail=100 tuner | |
| echo "===========================" | |
| fi | |
| docker compose -f docker-compose.prod.yml ps tuner | |
| EOF | |
| # Deploy Observatory service | |
| deploy-observatory: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.observatory == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Observatory | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Observatory..." | |
| docker compose -f docker-compose.prod.yml build observatory | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate observatory | |
| echo "Waiting for Observatory health check..." | |
| sleep 5 | |
| docker compose -f docker-compose.prod.yml ps observatory | |
| EOF | |
| # Deploy Console service | |
| deploy-console: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.console == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Console | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Console..." | |
| docker compose -f docker-compose.prod.yml build console | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate console | |
| echo "Waiting for Console health check..." | |
| sleep 5 | |
| docker compose -f docker-compose.prod.yml ps console | |
| EOF | |
| # Deploy Ingestion service | |
| deploy-ingestion: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.ingestion == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Ingestion | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Ingestion..." | |
| docker compose -f docker-compose.prod.yml build ingestion | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate ingestion | |
| echo "Waiting for Ingestion health check..." | |
| sleep 5 | |
| docker compose -f docker-compose.prod.yml ps ingestion | |
| EOF | |
| # Deploy Memory service | |
| deploy-memory: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.memory == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Memory | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| echo "Building and deploying Memory..." | |
| docker compose -f docker-compose.prod.yml build memory | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate memory | |
| echo "Waiting for Memory startup..." | |
| sleep 5 | |
| docker compose -f docker-compose.prod.yml ps memory | |
| EOF | |
| # Deploy databases/infrastructure services if docker-compose changed | |
| deploy-infrastructure-services: | |
| runs-on: ubuntu-latest | |
| needs: [changes, sync] | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' && | |
| (needs.changes.outputs.docker_compose == 'true' || inputs.force_deploy_all == true) | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Deploy Infrastructure Services | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| cd /opt/engram | |
| # Create data directories at /var/lib/engram (outside Docker build context) | |
| echo "Creating data directories at /var/lib/engram..." | |
| sudo mkdir -p /var/lib/engram/{qdrant,falkordb,postgres,nats,caddy/data,caddy/config} | |
| # Set ownership - postgres:17-alpine uses UID 70, others can use engram | |
| sudo chown -R engram:engram /var/lib/engram/qdrant /var/lib/engram/falkordb /var/lib/engram/nats /var/lib/engram/caddy | |
| sudo chown -R 70:70 /var/lib/engram/postgres | |
| # Migrate existing data if present (one-time migration) | |
| if [ -d "/opt/engram/data" ] && [ "$(ls -A /opt/engram/data 2>/dev/null)" ]; then | |
| echo "Migrating existing data from /opt/engram/data..." | |
| for dir in qdrant falkordb nats; do | |
| if [ -d "/opt/engram/data/$dir" ] && [ "$(ls -A /opt/engram/data/$dir 2>/dev/null)" ]; then | |
| echo " Migrating $dir..." | |
| sudo rsync -a /opt/engram/data/$dir/ /var/lib/engram/$dir/ || true | |
| fi | |
| done | |
| # PostgreSQL requires special handling - migration would need DB stopped first | |
| # We'll let it re-initialize for now if migration is needed | |
| if [ -d "/opt/engram/data/caddy" ]; then | |
| echo " Migrating caddy..." | |
| sudo rsync -a /opt/engram/data/caddy/ /var/lib/engram/caddy/ || true | |
| fi | |
| fi | |
| echo "Pulling infrastructure images..." | |
| docker compose -f docker-compose.prod.yml pull nats qdrant falkordb postgres | |
| echo "Starting infrastructure services..." | |
| docker compose -f docker-compose.prod.yml up -d nats qdrant falkordb postgres | |
| echo "Waiting for services to be healthy..." | |
| sleep 15 | |
| docker compose -f docker-compose.prod.yml ps | |
| EOF | |
| # Start/reload Caddy reverse proxy (only full restart if Caddyfile changed) | |
| deploy-caddy: | |
| runs-on: ubuntu-latest | |
| needs: | |
| - changes | |
| - sync | |
| - deploy-api | |
| - deploy-search | |
| - deploy-tuner | |
| - deploy-observatory | |
| - deploy-console | |
| if: | | |
| always() && | |
| needs.sync.result == 'success' | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Reload or Start Caddy | |
| env: | |
| CADDY_CHANGED: ${{ needs.changes.outputs.caddy }} | |
| DOCKER_COMPOSE_CHANGED: ${{ needs.changes.outputs.docker_compose }} | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << EOF | |
| cd /opt/engram | |
| # Check if Caddy is already running | |
| CADDY_RUNNING=\$(docker compose -f docker-compose.prod.yml ps caddy --format json 2>/dev/null | grep -c '"running"' || echo "0") | |
| if [[ "$CADDY_CHANGED" == "true" ]] || [[ "$DOCKER_COMPOSE_CHANGED" == "true" ]]; then | |
| echo "Caddyfile or docker-compose changed - full Caddy restart..." | |
| # Only restart Caddy, not all services | |
| docker compose -f docker-compose.prod.yml up -d --force-recreate caddy | |
| echo "Waiting for Caddy to be ready..." | |
| sleep 10 | |
| elif [[ "\$CADDY_RUNNING" == "0" ]]; then | |
| echo "Caddy not running - starting it..." | |
| docker compose -f docker-compose.prod.yml up -d caddy | |
| echo "Waiting for Caddy to be ready..." | |
| sleep 10 | |
| else | |
| echo "Caddy already running and no config changes - skipping restart" | |
| fi | |
| docker compose -f docker-compose.prod.yml ps | |
| EOF | |
| # Cleanup old Docker images | |
| cleanup: | |
| runs-on: ubuntu-latest | |
| needs: [deploy-caddy, sync] | |
| if: | | |
| always() && | |
| needs.deploy-caddy.result == 'success' | |
| environment: production | |
| steps: | |
| - name: Setup SSH | |
| uses: webfactory/ssh-agent@v0.9.0 | |
| with: | |
| ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }} | |
| - name: Add host to known_hosts | |
| run: | | |
| for i in {1..3}; do | |
| if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then | |
| echo "SSH key scan successful" | |
| exit 0 | |
| fi | |
| echo "Attempt $i failed, retrying in 5s..." | |
| sleep 5 | |
| done | |
| echo "::error::Failed to scan SSH host after 3 attempts" | |
| exit 1 | |
| - name: Prune old Docker images | |
| run: | | |
| ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF' | |
| echo "Pruning unused Docker images..." | |
| docker image prune -af | |
| echo "" | |
| echo "Pruning Docker build cache..." | |
| docker builder prune -af | |
| echo "" | |
| echo "Pruning unused Docker volumes..." | |
| docker volume prune -f | |
| echo "" | |
| echo "Pruning unused Docker networks..." | |
| docker network prune -f | |
| echo "" | |
| echo "Disk usage after cleanup:" | |
| docker system df | |
| df -h / | |
| EOF | |
| # Summary job | |
| summary: | |
| runs-on: ubuntu-latest | |
| needs: | |
| - changes | |
| - infrastructure | |
| - sync | |
| - deploy-api | |
| - deploy-search | |
| - deploy-tuner | |
| - deploy-observatory | |
| - deploy-console | |
| - deploy-ingestion | |
| - deploy-memory | |
| - deploy-infrastructure-services | |
| - deploy-caddy | |
| - cleanup | |
| if: always() | |
| steps: | |
| - name: Deployment Summary | |
| run: | | |
| echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "| Component | Changed | Status |" >> $GITHUB_STEP_SUMMARY | |
| echo "|-----------|---------|--------|" >> $GITHUB_STEP_SUMMARY | |
| echo "| Infrastructure | ${{ needs.changes.outputs.infra }} | ${{ needs.infrastructure.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| API | ${{ needs.changes.outputs.api }} | ${{ needs.deploy-api.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Search | ${{ needs.changes.outputs.search }} | ${{ needs.deploy-search.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Tuner | ${{ needs.changes.outputs.tuner }} | ${{ needs.deploy-tuner.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Observatory | ${{ needs.changes.outputs.observatory }} | ${{ needs.deploy-observatory.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Console | ${{ needs.changes.outputs.console }} | ${{ needs.deploy-console.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Ingestion | ${{ needs.changes.outputs.ingestion }} | ${{ needs.deploy-ingestion.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Memory | ${{ needs.changes.outputs.memory }} | ${{ needs.deploy-memory.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Docker Compose | ${{ needs.changes.outputs.docker_compose }} | ${{ needs.deploy-infrastructure-services.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| Caddy | ${{ needs.changes.outputs.caddy }} | ${{ needs.deploy-caddy.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Service URLs" >> $GITHUB_STEP_SUMMARY | |
| echo "- API: https://api.engram.rawcontext.com/v1" >> $GITHUB_STEP_SUMMARY | |
| echo "- Search: https://api.engram.rawcontext.com/v1/search" >> $GITHUB_STEP_SUMMARY | |
| echo "- Tuner: https://api.engram.rawcontext.com/v1/tuner" >> $GITHUB_STEP_SUMMARY | |
| echo "- Observatory: https://observatory.engram.rawcontext.com" >> $GITHUB_STEP_SUMMARY | |
| echo "- Console: https://console.engram.rawcontext.com" >> $GITHUB_STEP_SUMMARY |