From 79e0497bdd928a477f6729868d570c44cc3026cf Mon Sep 17 00:00:00 2001
From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:55:47 +0530
Subject: [PATCH 1/3] Add a config to allow standard API to fetch data from
 BigQuery

---
 runtime/drivers/bigquery/bigquery.go  |  7 +++++--
 runtime/drivers/bigquery/warehouse.go | 25 ++++++++-----------------
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go
index da60f617e4eb..5a9407fa768c 100644
--- a/runtime/drivers/bigquery/bigquery.go
+++ b/runtime/drivers/bigquery/bigquery.go
@@ -58,8 +58,11 @@ type configProperties struct {
 	// MaxBytesBilled is the maximum number of bytes billed for a query. This is a safety mechanism to prevent accidentally running large queries.
 	// Set this to 0 for project defaults.
 	// Only applies to dashboard queries and does not apply when ingesting data from BigQuery into Rill.
-	MaxBytesBilled  int64 `mapstructure:"max_bytes_billed"`
-	AllowHostAccess bool  `mapstructure:"allow_host_access"`
+	MaxBytesBilled   int64 `mapstructure:"max_bytes_billed"`
+	// AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API. 
+	// This is less efficient but may be necessary if users don't have access to the Storage Read API.
+	AllowStandardAPI bool  `mapstructure:"allow_standard_api"`
+	AllowHostAccess  bool  `mapstructure:"allow_host_access"`
 	// LogQueries controls whether to log the raw SQL passed to OLAP.
 	LogQueries bool `mapstructure:"log_queries"`
 }
diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go
index c7ff07d6f447..7ae119329c20 100644
--- a/runtime/drivers/bigquery/warehouse.go
+++ b/runtime/drivers/bigquery/warehouse.go
@@ -199,10 +199,11 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (ou
 }
 
 type fileIterator struct {
-	client  *bigquery.Client
-	bqIter  *bigquery.RowIterator
-	logger  *zap.Logger
-	tempDir string
+	client           *bigquery.Client
+	bqIter           *bigquery.RowIterator
+	logger           *zap.Logger
+	tempDir          string
+	allowStandardAPI bool
 
 	downloaded bool
 }
@@ -235,6 +236,9 @@ func (f *fileIterator) Next(ctx context.Context) ([]string, error) {
 
 	// storage API not available so can't read as arrow records. Read results row by row and dump in a json file.
 	if !f.bqIter.IsAccelerated() {
+		if !f.allowStandardAPI {
+			return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Please provide necessary BigQuery roles")
+		}
 		f.logger.Debug("downloading results in json file", observability.ZapCtx(ctx))
 		span.SetAttributes(attribute.Bool("storage_api", false))
 
@@ -385,18 +389,5 @@ func (f *fileIterator) downloadAsJSONFile(ctx context.Context) (string, error) {
 		if err != nil {
 			return "", fmt.Errorf("conversion of row to json failed with error: %w", err)
 		}
-
-		// If we don't have storage API access, BigQuery may return massive JSON results. (But even with storage API access, it may return JSON for small results.)
-		// We want to avoid JSON for massive results. Currently, the only way to do so is to error at a limit.
-		rows++
-		if rows != 0 && rows%10000 == 0 { // Check file size every 10k rows
-			fileInfo, err := os.Stat(fw.Name())
-			if err != nil {
-				return "", fmt.Errorf("bigquery: failed to poll json file size: %w", err)
-			}
-			if fileInfo.Size() >= _jsonDownloadLimitBytes {
-				return "", fmt.Errorf("bigquery: json download exceeded limit of %d bytes (enable and provide access to the BigQuery Storage Read API to read larger results)", _jsonDownloadLimitBytes)
-			}
-		}
 	}
 }

From 844755f80f86fbd3516749df215be5d2ca53834f Mon Sep 17 00:00:00 2001
From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:04:51 +0530
Subject: [PATCH 2/3] review

---
 runtime/drivers/bigquery/bigquery.go  |  8 ++++----
 runtime/drivers/bigquery/warehouse.go | 15 ++++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/runtime/drivers/bigquery/bigquery.go b/runtime/drivers/bigquery/bigquery.go
index 5a9407fa768c..e06ad0ec6e59 100644
--- a/runtime/drivers/bigquery/bigquery.go
+++ b/runtime/drivers/bigquery/bigquery.go
@@ -58,11 +58,11 @@ type configProperties struct {
 	// MaxBytesBilled is the maximum number of bytes billed for a query. This is a safety mechanism to prevent accidentally running large queries.
 	// Set this to 0 for project defaults.
 	// Only applies to dashboard queries and does not apply when ingesting data from BigQuery into Rill.
-	MaxBytesBilled   int64 `mapstructure:"max_bytes_billed"`
-	// AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API. 
+	MaxBytesBilled int64 `mapstructure:"max_bytes_billed"`
+	// AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API.
 	// This is less efficient but may be necessary if users don't have access to the Storage Read API.
-	AllowStandardAPI bool  `mapstructure:"allow_standard_api"`
-	AllowHostAccess  bool  `mapstructure:"allow_host_access"`
+	AllowStandardAPI bool `mapstructure:"allow_standard_api"`
+	AllowHostAccess  bool `mapstructure:"allow_host_access"`
 	// LogQueries controls whether to log the raw SQL passed to OLAP.
 	LogQueries bool `mapstructure:"log_queries"`
 }
diff --git a/runtime/drivers/bigquery/warehouse.go b/runtime/drivers/bigquery/warehouse.go
index 7ae119329c20..89cc57e64022 100644
--- a/runtime/drivers/bigquery/warehouse.go
+++ b/runtime/drivers/bigquery/warehouse.go
@@ -33,8 +33,6 @@ var tracer = otel.Tracer("github.com/rilldata/rill/runtime/drivers/bigquery")
 // 64MB seems to be a good balance
 const rowGroupBufferSize = int64(datasize.MB) * 64
 
-const _jsonDownloadLimitBytes = 100 * int64(datasize.MB)
-
 // Regex to parse BigQuery SELECT ALL statement: SELECT * FROM `project_id.dataset.table`
 var selectQueryRegex = regexp.MustCompile(
 	`(?is)^\s*` +
@@ -191,10 +189,11 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (ou
 		return nil, err
 	}
 	return &fileIterator{
-		client:  client,
-		bqIter:  it,
-		logger:  c.logger,
-		tempDir: tempDir,
+		client:           client,
+		bqIter:           it,
+		logger:           c.logger,
+		tempDir:          tempDir,
+		allowStandardAPI: c.config.AllowStandardAPI,
 	}, nil
 }
 
@@ -237,7 +236,7 @@ func (f *fileIterator) Next(ctx context.Context) ([]string, error) {
 	// storage API not available so can't read as arrow records. Read results row by row and dump in a json file.
 	if !f.bqIter.IsAccelerated() {
 		if !f.allowStandardAPI {
-			return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Please provide necessary BigQuery roles")
+			return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Granting the necessary BigQuery roles is recommended; alternatively, set 'allow_standard_api: true' on the connector to read results via the standard API (less efficient and may fail for large results)")
 		}
 		f.logger.Debug("downloading results in json file", observability.ZapCtx(ctx))
 		span.SetAttributes(attribute.Bool("storage_api", false))
@@ -389,5 +388,7 @@ func (f *fileIterator) downloadAsJSONFile(ctx context.Context) (string, error) {
 		if err != nil {
 			return "", fmt.Errorf("conversion of row to json failed with error: %w", err)
 		}
+
+		rows++
 	}
 }

From 245da046a44b7500989e8f3cd0dba8e3e3b647f0 Mon Sep 17 00:00:00 2001
From: Anshul Khandelwal <12948312+k-anshul@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:10:23 +0530
Subject: [PATCH 3/3] docs update

---
 docs/docs/reference/project-files/connectors.md | 4 ++++
 runtime/parser/schema/project.schema.yaml       | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/docs/docs/reference/project-files/connectors.md b/docs/docs/reference/project-files/connectors.md
index 31643dfaca91..05de056dd995 100644
--- a/docs/docs/reference/project-files/connectors.md
+++ b/docs/docs/reference/project-files/connectors.md
@@ -203,6 +203,10 @@ _[boolean]_ - Controls whether to log raw SQL queries
 
 _[integer]_ - Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill. 
 
+### `allow_standard_api`
+
+_[boolean]_ - Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues. 
+
 ```yaml
 # Example: BigQuery connector configuration
 type: connector # Must be `connector` (required)
diff --git a/runtime/parser/schema/project.schema.yaml b/runtime/parser/schema/project.schema.yaml
index f9c57d64b7ab..964cf0ccfe38 100644
--- a/runtime/parser/schema/project.schema.yaml
+++ b/runtime/parser/schema/project.schema.yaml
@@ -216,6 +216,9 @@ definitions:
             max_bytes_billed:
               type: integer
               description: Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill.
+            allow_standard_api:
+              type: boolean
+              description: Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues.
           examples:
             - # Example: BigQuery connector configuration
               type: connector                                  # Must be `connector` (required)