feat(graph): extend QueryBuilder and migrate production queries #262

Workflow file for this run

	name: Deploy

	on:
	push:
	branches: [main]
	workflow_dispatch:
	inputs:
	force_deploy_all:
	description: 'Force deploy all services'
	required: false
	default: 'false'
	type: boolean
	force_infra:
	description: 'Force infrastructure apply'
	required: false
	default: 'false'
	type: boolean

	concurrency:
	group: deploy-${{ github.ref }}
	cancel-in-progress: false

	env:
	DOMAIN: engram.rawcontext.com
	ROOT_DOMAIN: rawcontext.com # For Vercel DNS (must be root domain)

	jobs:
	# Wait for CI to pass
	ci:
	uses: ./.github/workflows/ci.yml
	secrets: inherit

	# Detect which components changed
	changes:
	runs-on: ubuntu-latest
	needs: ci
	outputs:
	infra: ${{ steps.filter.outputs.infra }}
	api: ${{ steps.filter.outputs.api }}
	search: ${{ steps.filter.outputs.search }}
	tuner: ${{ steps.filter.outputs.tuner }}
	observatory: ${{ steps.filter.outputs.observatory }}
	console: ${{ steps.filter.outputs.console }}
	ingestion: ${{ steps.filter.outputs.ingestion }}
	memory: ${{ steps.filter.outputs.memory }}
	docker_compose: ${{ steps.filter.outputs.docker_compose }}
	caddy: ${{ steps.filter.outputs.caddy }}
	any_service: ${{ steps.any.outputs.result }}
	steps:
	- uses: actions/checkout@v4

	- uses: dorny/paths-filter@v3
	id: filter
	with:
	filters: \|
	infra:
	- 'packages/infra/**'
	api:
	- 'apps/api/**'
	- 'packages/common/**'
	- 'packages/logger/**'
	- 'packages/events/**'
	- 'packages/storage/**'
	- 'packages/graph/**'
	search:
	- 'apps/search/**'
	tuner:
	- 'apps/tuner/**'
	observatory:
	- 'apps/observatory/**'
	console:
	- 'apps/console/**'
	ingestion:
	- 'apps/ingestion/**'
	- 'packages/common/**'
	- 'packages/logger/**'
	- 'packages/events/**'
	- 'packages/storage/**'
	- 'packages/parser/**'
	memory:
	- 'apps/memory/**'
	- 'packages/common/**'
	- 'packages/logger/**'
	- 'packages/events/**'
	- 'packages/storage/**'
	- 'packages/graph/**'
	docker_compose:
	- 'docker-compose.prod.yml'
	caddy:
	- 'configs/Caddyfile'

	- name: Check if any service changed
	id: any
	run: \|
	if [[ "${{ steps.filter.outputs.api }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.search }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.tuner }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.observatory }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.console }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.ingestion }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.memory }}" == "true" ]] \|\| \
	[[ "${{ steps.filter.outputs.docker_compose }}" == "true" ]] \|\| \
	[[ "${{ inputs.force_deploy_all }}" == "true" ]]; then
	echo "result=true" >> $GITHUB_OUTPUT
	else
	echo "result=false" >> $GITHUB_OUTPUT
	fi

	# Infrastructure deployment with OpenTofu
	# Runs after deploy-api because it uses the API as the Terraform state backend
	infrastructure:
	runs-on: ubuntu-latest
	needs: [changes, sync, deploy-api]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.deploy-api.result == 'success' \|\| needs.deploy-api.result == 'skipped') &&
	(needs.changes.outputs.infra == 'true' \|\| inputs.force_infra == true)
	environment: production
	permissions:
	contents: read
	pull-requests: write
	steps:
	- uses: actions/checkout@v4

	- name: Setup OpenTofu
	uses: opentofu/setup-opentofu@v1
	with:
	tofu_version: 1.8.0

	- name: Verify Secrets
	run: \|
	if [ -z "${{ secrets.TF_HTTP_PASSWORD }}" ]; then
	echo "::error::TF_HTTP_PASSWORD secret is not set!"
	exit 1
	fi
	echo "TF_HTTP_PASSWORD is set (length: $(echo -n '${{ secrets.TF_HTTP_PASSWORD }}' \| wc -c))"

	- name: Wait for API State Backend
	run: \|
	echo "Checking if API state backend is available..."
	# Quick check - if API responds, great. If not, continue anyway
	# (state backend will fail later if truly unavailable, but we can
	# use -lock=false and -migrate-state to bootstrap)
	for i in {1..6}; do
	if curl -sf --max-time 5 "https://api.engram.rawcontext.com/v1/health" > /dev/null 2>&1; then
	echo "API is healthy, proceeding with OpenTofu init"
	exit 0
	fi
	echo "Attempt $i/6: API not ready, waiting 10s..."
	sleep 10
	done
	echo "::warning::API state backend not available - will attempt to continue anyway"

	- name: OpenTofu Init
	working-directory: packages/infra
	run: \|
	tofu init -reconfigure \
	-backend-config="username=tofu" \
	-backend-config="password=${{ secrets.TF_HTTP_PASSWORD }}"

	- name: OpenTofu Validate
	working-directory: packages/infra
	run: tofu validate

	- name: Import Existing Resources
	working-directory: packages/infra
	env:
	TF_HTTP_USERNAME: tofu
	TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }}
	TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
	TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }}
	TF_VAR_domain: ${{ env.ROOT_DOMAIN }}
	TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }}
	HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
	VERCEL_API_TOKEN: ${{ secrets.VERCEL_API_TOKEN }}
	run: \|
	# Install hcloud CLI
	curl -sL https://github.com/hetznercloud/cli/releases/download/v1.49.0/hcloud-linux-amd64.tar.gz \| tar xz
	chmod +x hcloud

	# Check if all resources exist in state already
	STATE_COUNT=$(tofu state list 2>/dev/null \| wc -l \|\| echo "0")
	if [ "$STATE_COUNT" -ge 8 ]; then
	echo "All resources appear to be in state ($STATE_COUNT items), skipping import"
	exit 0
	fi

	echo "Found $STATE_COUNT items in state, checking for missing resources..."

	# Import Hetzner resources
	echo "=== Importing Hetzner resources ==="

	# Get SSH key ID
	SSH_KEY_ID=$(./hcloud ssh-key list -o noheader \| grep "engram-key" \| awk '{print $1}' \|\| true)
	if [ -n "$SSH_KEY_ID" ]; then
	if ! tofu state list 2>/dev/null \| grep -q "hcloud_ssh_key.engram"; then
	echo "Importing SSH key (ID: $SSH_KEY_ID)..."
	tofu import -lock=false hcloud_ssh_key.engram "$SSH_KEY_ID" \|\| echo "SSH key import failed or already exists"
	fi
	fi

	# Get server ID
	SERVER_ID=$(./hcloud server list -o noheader \| grep "engram" \| awk '{print $1}' \|\| true)
	if [ -n "$SERVER_ID" ]; then
	if ! tofu state list 2>/dev/null \| grep -q "hcloud_server.engram"; then
	echo "Importing server (ID: $SERVER_ID)..."
	tofu import -lock=false hcloud_server.engram "$SERVER_ID" \|\| echo "Server import failed or already exists"
	fi
	fi

	# Get firewall ID
	FIREWALL_ID=$(./hcloud firewall list -o noheader \| grep "engram-firewall" \| awk '{print $1}' \|\| true)
	if [ -n "$FIREWALL_ID" ]; then
	if ! tofu state list 2>/dev/null \| grep -q "hcloud_firewall.engram"; then
	echo "Importing firewall (ID: $FIREWALL_ID)..."
	tofu import -lock=false hcloud_firewall.engram "$FIREWALL_ID" \|\| echo "Firewall import failed or already exists"
	fi

	# Import firewall attachment if both firewall and server exist
	if [ -n "$SERVER_ID" ]; then
	if ! tofu state list 2>/dev/null \| grep -q "hcloud_firewall_attachment.engram"; then
	echo "Importing firewall attachment..."
	tofu import -lock=false hcloud_firewall_attachment.engram "$FIREWALL_ID" \|\| echo "Firewall attachment import failed or already exists"
	fi
	fi
	fi

	# Import Vercel DNS records
	echo "=== Importing Vercel DNS records ==="
	DOMAIN="rawcontext.com"

	# Clear any corrupted DNS record states (domain mismatch bug)
	for record in apex api observatory console; do
	if tofu state list 2>/dev/null \| grep -q "vercel_dns_record.$record"; then
	echo "Removing potentially corrupted state for vercel_dns_record.$record..."
	tofu state rm -lock=false "vercel_dns_record.$record" 2>/dev/null \|\| true
	fi
	done

	# Fetch all DNS records from Vercel
	DNS_RECORDS=$(curl -s -H "Authorization: Bearer $VERCEL_API_TOKEN" \
	"https://api.vercel.com/v4/domains/$DOMAIN/records" \| jq -r '.records // []')

	# Import apex record (engram.rawcontext.com)
	if ! tofu state list 2>/dev/null \| grep -q "vercel_dns_record.apex"; then
	APEX_ID=$(echo "$DNS_RECORDS" \| jq -r '.[] \| select(.name == "engram" and .type == "A") \| .id' \| head -1)
	if [ -n "$APEX_ID" ] && [ "$APEX_ID" != "null" ]; then
	echo "Importing apex DNS record (ID: $APEX_ID)..."
	tofu import -lock=false vercel_dns_record.apex "$APEX_ID" \|\| echo "Apex DNS import failed"
	fi
	fi

	# Import api record (api.engram.rawcontext.com)
	if ! tofu state list 2>/dev/null \| grep -q "vercel_dns_record.api"; then
	API_ID=$(echo "$DNS_RECORDS" \| jq -r '.[] \| select(.name == "api.engram" and .type == "A") \| .id' \| head -1)
	if [ -n "$API_ID" ] && [ "$API_ID" != "null" ]; then
	echo "Importing api DNS record (ID: $API_ID)..."
	tofu import -lock=false vercel_dns_record.api "$API_ID" \|\| echo "API DNS import failed"
	fi
	fi

	# Import observatory record (observatory.engram.rawcontext.com)
	if ! tofu state list 2>/dev/null \| grep -q "vercel_dns_record.observatory"; then
	OBS_ID=$(echo "$DNS_RECORDS" \| jq -r '.[] \| select(.name == "observatory.engram" and .type == "A") \| .id' \| head -1)
	if [ -n "$OBS_ID" ] && [ "$OBS_ID" != "null" ]; then
	echo "Importing observatory DNS record (ID: $OBS_ID)..."
	tofu import -lock=false vercel_dns_record.observatory "$OBS_ID" \|\| echo "Observatory DNS import failed"
	fi
	fi

	# Import console record (console.engram.rawcontext.com)
	if ! tofu state list 2>/dev/null \| grep -q "vercel_dns_record.console"; then
	CONSOLE_ID=$(echo "$DNS_RECORDS" \| jq -r '.[] \| select(.name == "console.engram" and .type == "A") \| .id' \| head -1)
	if [ -n "$CONSOLE_ID" ] && [ "$CONSOLE_ID" != "null" ]; then
	echo "Importing console DNS record (ID: $CONSOLE_ID)..."
	tofu import -lock=false vercel_dns_record.console "$CONSOLE_ID" \|\| echo "Console DNS import failed"
	fi
	fi

	echo "Import complete. Checking state..."
	tofu state list \|\| true

	- name: OpenTofu Plan
	working-directory: packages/infra
	env:
	TF_HTTP_USERNAME: tofu
	TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }}
	TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
	TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }}
	TF_VAR_domain: ${{ env.ROOT_DOMAIN }}
	TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }}
	TF_VAR_engram_api_client_secret: ${{ secrets.ENGRAM_API_CLIENT_SECRET }}
	TF_VAR_engram_search_client_secret: ${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }}
	TF_VAR_engram_tuner_client_secret: ${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }}
	TF_VAR_engram_ingestion_client_secret: ${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }}
	TF_VAR_engram_memory_client_secret: ${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }}
	TF_VAR_engram_console_client_secret: ${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }}
	run: \|
	# Disable state locking - workflow concurrency already prevents parallel runs
	tofu plan -out=tfplan -no-color -lock=false

	- name: OpenTofu Apply
	working-directory: packages/infra
	env:
	TF_HTTP_USERNAME: tofu
	TF_HTTP_PASSWORD: ${{ secrets.TF_HTTP_PASSWORD }}
	TF_VAR_hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
	TF_VAR_vercel_api_token: ${{ secrets.VERCEL_API_TOKEN }}
	TF_VAR_domain: ${{ env.ROOT_DOMAIN }}
	TF_VAR_ssh_public_key: ${{ secrets.HETZNER_SSH_PUBLIC_KEY }}
	TF_VAR_engram_api_client_secret: ${{ secrets.ENGRAM_API_CLIENT_SECRET }}
	TF_VAR_engram_search_client_secret: ${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }}
	TF_VAR_engram_tuner_client_secret: ${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }}
	TF_VAR_engram_ingestion_client_secret: ${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }}
	TF_VAR_engram_memory_client_secret: ${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }}
	TF_VAR_engram_console_client_secret: ${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }}
	run: \|
	tofu apply -auto-approve -lock=false tfplan

	- name: Get Server IP
	id: server
	working-directory: packages/infra
	run: \|
	echo "ip=$(tofu output -raw server_ip)" >> $GITHUB_OUTPUT

	outputs:
	server_ip: ${{ steps.server.outputs.ip }}

	# Sync files to server (runs if any service changed)
	sync:
	runs-on: ubuntu-latest
	needs: [changes]
	if: \|
	always() &&
	needs.changes.result == 'success' &&
	needs.changes.outputs.any_service == 'true'
	environment: production
	steps:
	- uses: actions/checkout@v4

	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Cleanup conflicting paths
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	# Remove conflicting paths and fix permissions
	sudo rm -rf /opt/engram/configs/Caddyfile 2>/dev/null \|\| true
	# Ensure configs directory exists with correct ownership
	sudo mkdir -p /opt/engram/configs
	sudo chown -R engram:engram /opt/engram/configs
	EOF

	- name: Sync files to server
	run: \|
	rsync -avz --delete --no-group --no-owner \
	--exclude '.git' \
	--exclude 'node_modules' \
	--exclude '.venv' \
	--exclude '__pycache__' \
	--exclude '*.pyc' \
	--exclude 'dist' \
	--exclude 'data' \
	--exclude '.turbo' \
	--exclude '.next' \
	--exclude '.terraform' \
	--exclude 'terraform.tfstate*' \
	. engram@${{ env.DOMAIN }}:/opt/engram/

	- name: Create .env file on server
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} "cat > /opt/engram/.env << 'ENVEOF'
	POSTGRES_USER=${{ secrets.POSTGRES_USER }}
	POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
	POSTGRES_DB=${{ secrets.POSTGRES_DB }}
	HF_API_TOKEN=${{ secrets.HF_API_TOKEN }}
	BETTER_AUTH_SECRET=${{ secrets.BETTER_AUTH_SECRET }}
	GOOGLE_CLIENT_ID=${{ secrets.GOOGLE_CLIENT_ID }}
	GOOGLE_CLIENT_SECRET=${{ secrets.GOOGLE_CLIENT_SECRET }}
	ENGRAM_API_CLIENT_SECRET=${{ secrets.ENGRAM_API_CLIENT_SECRET }}
	ENGRAM_SEARCH_CLIENT_SECRET=${{ secrets.ENGRAM_SEARCH_CLIENT_SECRET }}
	ENGRAM_TUNER_CLIENT_SECRET=${{ secrets.ENGRAM_TUNER_CLIENT_SECRET }}
	ENGRAM_INGESTION_CLIENT_SECRET=${{ secrets.ENGRAM_INGESTION_CLIENT_SECRET }}
	ENGRAM_MEMORY_CLIENT_SECRET=${{ secrets.ENGRAM_MEMORY_CLIENT_SECRET }}
	ENGRAM_CONSOLE_CLIENT_SECRET=${{ secrets.ENGRAM_CONSOLE_CLIENT_SECRET }}
	ENVEOF"

	- name: Fix postgres permissions and run migrations
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram

	# Ensure postgres data directory has correct permissions (UID 70 for alpine)
	echo "Fixing postgres permissions..."
	sudo mkdir -p /var/lib/engram/postgres
	sudo chown -R 70:70 /var/lib/engram/postgres

	# Restart postgres if it's unhealthy due to permission issues
	if ! docker compose -f docker-compose.prod.yml exec -T postgres pg_isready -U ${POSTGRES_USER:-engram} 2>/dev/null; then
	echo "Postgres not ready, restarting..."
	docker compose -f docker-compose.prod.yml up -d postgres
	sleep 5
	fi

	# Wait for postgres to be ready
	for i in {1..30}; do
	if docker compose -f docker-compose.prod.yml exec -T postgres pg_isready -U ${POSTGRES_USER:-engram} 2>/dev/null; then
	echo "Postgres is ready"
	break
	fi
	echo "Waiting for postgres... ($i/30)"
	sleep 2
	done

	# Run auth migrations on the engram database
	cat /opt/engram/scripts/migrate-auth.sql \| docker compose -f docker-compose.prod.yml exec -T postgres psql -U ${POSTGRES_USER:-engram} -d engram \|\| echo "Migration may have already been applied"
	EOF

	# Deploy API service
	deploy-api:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.api == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy API
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying API..."
	docker compose -f docker-compose.prod.yml build api
	docker compose -f docker-compose.prod.yml up -d --force-recreate api
	echo "Waiting for API health check..."
	sleep 10
	if ! docker compose -f docker-compose.prod.yml ps api \| grep -q "(healthy)"; then
	echo "=== API Container Logs ==="
	docker compose -f docker-compose.prod.yml logs --tail=100 api
	echo "==========================="
	fi
	docker compose -f docker-compose.prod.yml ps api
	EOF

	# Deploy Search service
	deploy-search:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.search == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Search
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Search..."
	docker compose -f docker-compose.prod.yml build --no-cache search
	docker compose -f docker-compose.prod.yml up -d --force-recreate search
	echo "Waiting for Search health check..."
	sleep 5
	docker compose -f docker-compose.prod.yml ps search
	EOF

	# Deploy Tuner service
	deploy-tuner:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.tuner == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Tuner
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Tuner..."
	docker compose -f docker-compose.prod.yml build --no-cache tuner
	docker compose -f docker-compose.prod.yml up -d --force-recreate tuner
	echo "Waiting for Tuner health check..."
	sleep 10
	if ! docker compose -f docker-compose.prod.yml ps tuner \| grep -q "(healthy)"; then
	echo "=== Tuner Container Logs ==="
	docker compose -f docker-compose.prod.yml logs --tail=100 tuner
	echo "==========================="
	fi
	docker compose -f docker-compose.prod.yml ps tuner
	EOF

	# Deploy Observatory service
	deploy-observatory:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.observatory == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Observatory
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Observatory..."
	docker compose -f docker-compose.prod.yml build observatory
	docker compose -f docker-compose.prod.yml up -d --force-recreate observatory
	echo "Waiting for Observatory health check..."
	sleep 5
	docker compose -f docker-compose.prod.yml ps observatory
	EOF

	# Deploy Console service
	deploy-console:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.console == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Console
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Console..."
	docker compose -f docker-compose.prod.yml build console
	docker compose -f docker-compose.prod.yml up -d --force-recreate console
	echo "Waiting for Console health check..."
	sleep 5
	docker compose -f docker-compose.prod.yml ps console
	EOF

	# Deploy Ingestion service
	deploy-ingestion:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.ingestion == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Ingestion
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Ingestion..."
	docker compose -f docker-compose.prod.yml build ingestion
	docker compose -f docker-compose.prod.yml up -d --force-recreate ingestion
	echo "Waiting for Ingestion health check..."
	sleep 5
	docker compose -f docker-compose.prod.yml ps ingestion
	EOF

	# Deploy Memory service
	deploy-memory:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.memory == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Memory
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram
	echo "Building and deploying Memory..."
	docker compose -f docker-compose.prod.yml build memory
	docker compose -f docker-compose.prod.yml up -d --force-recreate memory
	echo "Waiting for Memory startup..."
	sleep 5
	docker compose -f docker-compose.prod.yml ps memory
	EOF

	# Deploy databases/infrastructure services if docker-compose changed
	deploy-infrastructure-services:
	runs-on: ubuntu-latest
	needs: [changes, sync]
	if: \|
	always() &&
	needs.sync.result == 'success' &&
	(needs.changes.outputs.docker_compose == 'true' \|\| inputs.force_deploy_all == true)
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Deploy Infrastructure Services
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	cd /opt/engram

	# Create data directories at /var/lib/engram (outside Docker build context)
	echo "Creating data directories at /var/lib/engram..."
	sudo mkdir -p /var/lib/engram/{qdrant,falkordb,postgres,nats,caddy/data,caddy/config}
	# Set ownership - postgres:17-alpine uses UID 70, others can use engram
	sudo chown -R engram:engram /var/lib/engram/qdrant /var/lib/engram/falkordb /var/lib/engram/nats /var/lib/engram/caddy
	sudo chown -R 70:70 /var/lib/engram/postgres

	# Migrate existing data if present (one-time migration)
	if [ -d "/opt/engram/data" ] && [ "$(ls -A /opt/engram/data 2>/dev/null)" ]; then
	echo "Migrating existing data from /opt/engram/data..."
	for dir in qdrant falkordb nats; do
	if [ -d "/opt/engram/data/$dir" ] && [ "$(ls -A /opt/engram/data/$dir 2>/dev/null)" ]; then
	echo " Migrating $dir..."
	sudo rsync -a /opt/engram/data/$dir/ /var/lib/engram/$dir/ \|\| true
	fi
	done
	# PostgreSQL requires special handling - migration would need DB stopped first
	# We'll let it re-initialize for now if migration is needed
	if [ -d "/opt/engram/data/caddy" ]; then
	echo " Migrating caddy..."
	sudo rsync -a /opt/engram/data/caddy/ /var/lib/engram/caddy/ \|\| true
	fi
	fi

	echo "Pulling infrastructure images..."
	docker compose -f docker-compose.prod.yml pull nats qdrant falkordb postgres

	echo "Starting infrastructure services..."
	docker compose -f docker-compose.prod.yml up -d nats qdrant falkordb postgres

	echo "Waiting for services to be healthy..."
	sleep 15
	docker compose -f docker-compose.prod.yml ps
	EOF

	# Start/reload Caddy reverse proxy (only full restart if Caddyfile changed)
	deploy-caddy:
	runs-on: ubuntu-latest
	needs:
	- changes
	- sync
	- deploy-api
	- deploy-search
	- deploy-tuner
	- deploy-observatory
	- deploy-console
	if: \|
	always() &&
	needs.sync.result == 'success'
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Reload or Start Caddy
	env:
	CADDY_CHANGED: ${{ needs.changes.outputs.caddy }}
	DOCKER_COMPOSE_CHANGED: ${{ needs.changes.outputs.docker_compose }}
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << EOF
	cd /opt/engram

	# Check if Caddy is already running
	CADDY_RUNNING=\$(docker compose -f docker-compose.prod.yml ps caddy --format json 2>/dev/null \| grep -c '"running"' \|\| echo "0")

	if [[ "$CADDY_CHANGED" == "true" ]] \|\| [[ "$DOCKER_COMPOSE_CHANGED" == "true" ]]; then
	echo "Caddyfile or docker-compose changed - full Caddy restart..."

	# Only restart Caddy, not all services
	docker compose -f docker-compose.prod.yml up -d --force-recreate caddy

	echo "Waiting for Caddy to be ready..."
	sleep 10
	elif [[ "\$CADDY_RUNNING" == "0" ]]; then
	echo "Caddy not running - starting it..."
	docker compose -f docker-compose.prod.yml up -d caddy

	echo "Waiting for Caddy to be ready..."
	sleep 10
	else
	echo "Caddy already running and no config changes - skipping restart"
	fi

	docker compose -f docker-compose.prod.yml ps
	EOF

	# Cleanup old Docker images
	cleanup:
	runs-on: ubuntu-latest
	needs: [deploy-caddy, sync]
	if: \|
	always() &&
	needs.deploy-caddy.result == 'success'
	environment: production
	steps:
	- name: Setup SSH
	uses: webfactory/ssh-agent@v0.9.0
	with:
	ssh-private-key: ${{ secrets.HETZNER_SSH_PRIVATE_KEY }}

	- name: Add host to known_hosts
	run: \|
	for i in {1..3}; do
	if ssh-keyscan -T 30 -H ${{ env.DOMAIN }} >> ~/.ssh/known_hosts 2>/dev/null; then
	echo "SSH key scan successful"
	exit 0
	fi
	echo "Attempt $i failed, retrying in 5s..."
	sleep 5
	done
	echo "::error::Failed to scan SSH host after 3 attempts"
	exit 1

	- name: Prune old Docker images
	run: \|
	ssh -o ServerAliveInterval=30 -o ServerAliveCountMax=20 engram@${{ env.DOMAIN }} << 'EOF'
	echo "Pruning unused Docker images..."
	docker image prune -af
	echo ""
	echo "Pruning Docker build cache..."
	docker builder prune -af
	echo ""
	echo "Pruning unused Docker volumes..."
	docker volume prune -f
	echo ""
	echo "Pruning unused Docker networks..."
	docker network prune -f
	echo ""
	echo "Disk usage after cleanup:"
	docker system df
	df -h /
	EOF

	# Summary job
	summary:
	runs-on: ubuntu-latest
	needs:
	- changes
	- infrastructure
	- sync
	- deploy-api
	- deploy-search
	- deploy-tuner
	- deploy-observatory
	- deploy-console
	- deploy-ingestion
	- deploy-memory
	- deploy-infrastructure-services
	- deploy-caddy
	- cleanup
	if: always()
	steps:
	- name: Deployment Summary
	run: \|
	echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Component \| Changed \| Status \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----------\|---------\|--------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| Infrastructure \| ${{ needs.changes.outputs.infra }} \| ${{ needs.infrastructure.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| API \| ${{ needs.changes.outputs.api }} \| ${{ needs.deploy-api.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Search \| ${{ needs.changes.outputs.search }} \| ${{ needs.deploy-search.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Tuner \| ${{ needs.changes.outputs.tuner }} \| ${{ needs.deploy-tuner.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Observatory \| ${{ needs.changes.outputs.observatory }} \| ${{ needs.deploy-observatory.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Console \| ${{ needs.changes.outputs.console }} \| ${{ needs.deploy-console.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Ingestion \| ${{ needs.changes.outputs.ingestion }} \| ${{ needs.deploy-ingestion.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Memory \| ${{ needs.changes.outputs.memory }} \| ${{ needs.deploy-memory.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Docker Compose \| ${{ needs.changes.outputs.docker_compose }} \| ${{ needs.deploy-infrastructure-services.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Caddy \| ${{ needs.changes.outputs.caddy }} \| ${{ needs.deploy-caddy.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Service URLs" >> $GITHUB_STEP_SUMMARY
	echo "- API: https://api.engram.rawcontext.com/v1" >> $GITHUB_STEP_SUMMARY
	echo "- Search: https://api.engram.rawcontext.com/v1/search" >> $GITHUB_STEP_SUMMARY
	echo "- Tuner: https://api.engram.rawcontext.com/v1/tuner" >> $GITHUB_STEP_SUMMARY
	echo "- Observatory: https://observatory.engram.rawcontext.com" >> $GITHUB_STEP_SUMMARY
	echo "- Console: https://console.engram.rawcontext.com" >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(graph): extend QueryBuilder and migrate production queries #262

Workflow file

feat(graph): extend QueryBuilder and migrate production queries #262

Uh oh!

Workflow file for this run