Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/export/dump.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,44 @@ describe('Database Dump Module', () => {
)
})

it('should paginate through large tables instead of loading all rows at once', async () => {
// A full page signals there may be more rows, triggering another query.
const fullPage = Array.from({ length: 1000 }, (_, i) => ({
id: i + 1,
name: `User${i + 1}`,
}))
const partialPage = [{ id: 1001, name: 'User1001' }]

vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'users' }]) // table list
.mockResolvedValueOnce([
{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' },
]) // schema
.mockResolvedValueOnce(fullPage) // data page 1 (full -> fetch more)
.mockResolvedValueOnce(partialPage) // data page 2 (partial -> stop)

const response = await dumpDatabaseRoute(mockDataSource, mockConfig)

expect(response).toBeInstanceOf(Response)
const dumpText = await response.text()

// tables + schema + 2 data pages
expect(executeOperation).toHaveBeenCalledTimes(4)
expect(dumpText).toContain("INSERT INTO users VALUES (1, 'User1');")
expect(dumpText).toContain(
"INSERT INTO users VALUES (1001, 'User1001');"
)

// Data queries should be paginated with LIMIT/OFFSET.
const firstDataQuery =
vi.mocked(executeOperation).mock.calls[2][0][0].sql
const secondDataQuery =
vi.mocked(executeOperation).mock.calls[3][0][0].sql
expect(firstDataQuery).toContain('LIMIT')
expect(firstDataQuery).toContain('OFFSET 0')
expect(secondDataQuery).toContain('OFFSET 1000')
})

it('should return a 500 response when an error occurs', async () => {
const consoleErrorMock = vi
.spyOn(console, 'error')
Expand Down
117 changes: 80 additions & 37 deletions src/export/dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,67 +3,110 @@ import { StarbaseDBConfiguration } from '../handler'
import { DataSource } from '../types'
import { createResponse } from '../utils'

// Number of rows to read from a table per query. Keeping this bounded means
// we never materialize an entire (potentially multi-GB) table in memory at
// once while building the dump.
const DUMP_PAGE_SIZE = 1000

export async function dumpDatabaseRoute(
dataSource: DataSource,
config: StarbaseDBConfiguration
): Promise<Response> {
try {
// Get all table names
// Get all table names up front so we can fail fast (with a proper 500
// response) if the database is unreachable.
const tablesResult = await executeOperation(
[{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }],
dataSource,
config
)

const tables = tablesResult.map((row: any) => row.name)
let dumpContent = 'SQLite format 3\0' // SQLite file header
const encoder = new TextEncoder()

// Stream the dump out instead of buffering the whole database into a
// single string. This keeps memory usage flat regardless of database
// size, and because the response body is produced incrementally the
// connection stays alive past the 30s request window for large dumps.
const stream = new ReadableStream({
async start(controller) {
try {
controller.enqueue(encoder.encode('SQLite format 3\0')) // SQLite file header

// Iterate through all tables
for (const table of tables) {
// Get table schema
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
},
],
dataSource,
config
)
// Iterate through all tables
for (const table of tables) {
// Get table schema
const schemaResult = await executeOperation(
[
{
sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`,
},
],
dataSource,
config
)

if (schemaResult.length) {
const schema = schemaResult[0].sql
dumpContent += `\n-- Table: ${table}\n${schema};\n\n`
}
if (schemaResult.length) {
const schema = schemaResult[0].sql
controller.enqueue(
encoder.encode(
`\n-- Table: ${table}\n${schema};\n\n`
)
)
}

// Get table data
const dataResult = await executeOperation(
[{ sql: `SELECT * FROM ${table};` }],
dataSource,
config
)
// Get table data one page at a time so a single large
// table never has to fit in memory all at once.
let offset = 0
while (true) {
const dataResult = await executeOperation(
[
{
sql: `SELECT * FROM ${table} LIMIT ${DUMP_PAGE_SIZE} OFFSET ${offset};`,
},
],
dataSource,
config
)
Comment on lines +62 to +70

for (const row of dataResult) {
const values = Object.values(row).map((value) =>
typeof value === 'string'
? `'${value.replace(/'/g, "''")}'`
: value
)
dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n`
}
for (const row of dataResult) {
const values = Object.values(row).map(
(value) =>
typeof value === 'string'
? `'${value.replace(/'/g, "''")}'`
: value
)
controller.enqueue(
encoder.encode(
`INSERT INTO ${table} VALUES (${values.join(', ')});\n`
)
)
}

dumpContent += '\n'
}
// A short page means we've reached the end of the table.
if (dataResult.length < DUMP_PAGE_SIZE) {
break
}
offset += DUMP_PAGE_SIZE
}

// Create a Blob from the dump content
const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' })
controller.enqueue(encoder.encode('\n'))
}

controller.close()
} catch (error: any) {
console.error('Database Dump Error:', error)
controller.error(error)
}
Comment on lines +97 to +100
},
})

const headers = new Headers({
'Content-Type': 'application/x-sqlite3',
'Content-Disposition': 'attachment; filename="database_dump.sql"',
})

return new Response(blob, { headers })
return new Response(stream, { headers })
} catch (error: any) {
console.error('Database Dump Error:', error)
return createResponse(undefined, 'Failed to create database dump', 500)
Expand Down
Loading