From 79e0497bdd928a477f6729868d570c44cc3026cf Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:55:47 +0530 Subject: [PATCH 1/3] Add a config to allow standard API to fetch data from BigQuery --- runtime/drivers/bigquery/bigquery.go | 7 +++++-- runtime/drivers/bigquery/warehouse.go | 25 ++++++++----------------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index da60f617e4eb..5a9407fa768c 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -58,8 +58,11 @@ type configProperties struct { // MaxBytesBilled is the maximum number of bytes billed for a query. This is a safety mechanism to prevent accidentally running large queries. // Set this to 0 for project defaults. // Only applies to dashboard queries and does not apply when ingesting data from BigQuery into Rill. - MaxBytesBilled int64 `mapstructure:"max_bytes_billed"` - AllowHostAccess bool `mapstructure:"allow_host_access"` + MaxBytesBilled int64 `mapstructure:"max_bytes_billed"` + // AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API. + // This is less efficient but may be necessary if users don't have access to the Storage Read API. + AllowStandardAPI bool `mapstructure:"allow_standard_api"` + AllowHostAccess bool `mapstructure:"allow_host_access"` // LogQueries controls whether to log the raw SQL passed to OLAP. LogQueries bool `mapstructure:"log_queries"` } diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go index c7ff07d6f447..7ae119329c20 100644 --- a/runtime/drivers/bigquery/warehouse.go +++ b/runtime/drivers/bigquery/warehouse.go @@ -199,10 +199,11 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (ou } type fileIterator struct { - client *bigquery.Client - bqIter *bigquery.RowIterator - logger *zap.Logger - tempDir string + client *bigquery.Client + bqIter *bigquery.RowIterator + logger *zap.Logger + tempDir string + allowStandardAPI bool downloaded bool } @@ -235,6 +236,9 @@ func (f *fileIterator) Next(ctx context.Context) ([]string, error) { // storage API not available so can't read as arrow records. Read results row by row and dump in a json file. if !f.bqIter.IsAccelerated() { + if !f.allowStandardAPI { + return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Please provide necessary BigQuery roles") + } f.logger.Debug("downloading results in json file", observability.ZapCtx(ctx)) span.SetAttributes(attribute.Bool("storage_api", false)) @@ -385,18 +389,5 @@ func (f *fileIterator) downloadAsJSONFile(ctx context.Context) (string, error) { if err != nil { return "", fmt.Errorf("conversion of row to json failed with error: %w", err) } - - // If we don't have storage API access, BigQuery may return massive JSON results. (But even with storage API access, it may return JSON for small results.) - // We want to avoid JSON for massive results. Currently, the only way to do so is to error at a limit. - rows++ - if rows != 0 && rows%10000 == 0 { // Check file size every 10k rows - fileInfo, err := os.Stat(fw.Name()) - if err != nil { - return "", fmt.Errorf("bigquery: failed to poll json file size: %w", err) - } - if fileInfo.Size() >= _jsonDownloadLimitBytes { - return "", fmt.Errorf("bigquery: json download exceeded limit of %d bytes (enable and provide access to the BigQuery Storage Read API to read larger results)", _jsonDownloadLimitBytes) - } - } } } From 844755f80f86fbd3516749df215be5d2ca53834f Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:04:51 +0530 Subject: [PATCH 2/3] review --- runtime/drivers/bigquery/bigquery.go | 8 ++++---- runtime/drivers/bigquery/warehouse.go | 15 ++++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go index 5a9407fa768c..e06ad0ec6e59 100644 --- a/runtime/drivers/bigquery/bigquery.go +++ b/runtime/drivers/bigquery/bigquery.go @@ -58,11 +58,11 @@ type configProperties struct { // MaxBytesBilled is the maximum number of bytes billed for a query. This is a safety mechanism to prevent accidentally running large queries. // Set this to 0 for project defaults. // Only applies to dashboard queries and does not apply when ingesting data from BigQuery into Rill. - MaxBytesBilled int64 `mapstructure:"max_bytes_billed"` - // AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API. + MaxBytesBilled int64 `mapstructure:"max_bytes_billed"` + // AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API. // This is less efficient but may be necessary if users don't have access to the Storage Read API. - AllowStandardAPI bool `mapstructure:"allow_standard_api"` - AllowHostAccess bool `mapstructure:"allow_host_access"` + AllowStandardAPI bool `mapstructure:"allow_standard_api"` + AllowHostAccess bool `mapstructure:"allow_host_access"` // LogQueries controls whether to log the raw SQL passed to OLAP. LogQueries bool `mapstructure:"log_queries"` } diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go index 7ae119329c20..89cc57e64022 100644 --- a/runtime/drivers/bigquery/warehouse.go +++ b/runtime/drivers/bigquery/warehouse.go @@ -33,8 +33,6 @@ var tracer = otel.Tracer("github.com/rilldata/rill/runtime/drivers/bigquery") // 64MB seems to be a good balance const rowGroupBufferSize = int64(datasize.MB) * 64 -const _jsonDownloadLimitBytes = 100 * int64(datasize.MB) - // Regex to parse BigQuery SELECT ALL statement: SELECT * FROM `project_id.dataset.table` var selectQueryRegex = regexp.MustCompile( `(?is)^\s*` + @@ -191,10 +189,11 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (ou return nil, err } return &fileIterator{ - client: client, - bqIter: it, - logger: c.logger, - tempDir: tempDir, + client: client, + bqIter: it, + logger: c.logger, + tempDir: tempDir, + allowStandardAPI: c.config.AllowStandardAPI, }, nil } @@ -237,7 +236,7 @@ func (f *fileIterator) Next(ctx context.Context) ([]string, error) { // storage API not available so can't read as arrow records. Read results row by row and dump in a json file. if !f.bqIter.IsAccelerated() { if !f.allowStandardAPI { - return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Please provide necessary BigQuery roles") + return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Granting the necessary BigQuery roles is recommended; alternatively, set 'allow_standard_api: true' on the connector to read results via the standard API (less efficient and may fail for large results)") } f.logger.Debug("downloading results in json file", observability.ZapCtx(ctx)) span.SetAttributes(attribute.Bool("storage_api", false)) @@ -389,5 +388,7 @@ func (f *fileIterator) downloadAsJSONFile(ctx context.Context) (string, error) { if err != nil { return "", fmt.Errorf("conversion of row to json failed with error: %w", err) } + + rows++ } } From 245da046a44b7500989e8f3cd0dba8e3e3b647f0 Mon Sep 17 00:00:00 2001 From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:10:23 +0530 Subject: [PATCH 3/3] docs update --- docs/docs/reference/project-files/connectors.md | 4 ++++ runtime/parser/schema/project.schema.yaml | 3 +++ 2 files changed, 7 insertions(+) diff --git a/docs/docs/reference/project-files/connectors.md b/docs/docs/reference/project-files/connectors.md index 31643dfaca91..05de056dd995 100644 --- a/docs/docs/reference/project-files/connectors.md +++ b/docs/docs/reference/project-files/connectors.md @@ -203,6 +203,10 @@ _[boolean]_ - Controls whether to log raw SQL queries _[integer]_ - Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill. +### `allow_standard_api` + +_[boolean]_ - Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues. + ```yaml # Example: BigQuery connector configuration type: connector # Must be `connector` (required) diff --git a/runtime/parser/schema/project.schema.yaml b/runtime/parser/schema/project.schema.yaml index f9c57d64b7ab..964cf0ccfe38 100644 --- a/runtime/parser/schema/project.schema.yaml +++ b/runtime/parser/schema/project.schema.yaml @@ -216,6 +216,9 @@ definitions: max_bytes_billed: type: integer description: Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill. + allow_standard_api: + type: boolean + description: Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues. examples: - # Example: BigQuery connector configuration type: connector # Must be `connector` (required)