Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/docs/reference/project-files/connectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ _[boolean]_ - Controls whether to log raw SQL queries

_[integer]_ - Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill.

### `allow_standard_api`

_[boolean]_ - Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues.

```yaml
# Example: BigQuery connector configuration
type: connector # Must be `connector` (required)
Expand Down
7 changes: 5 additions & 2 deletions runtime/drivers/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@ type configProperties struct {
// MaxBytesBilled is the maximum number of bytes billed for a query. This is a safety mechanism to prevent accidentally running large queries.
// Set this to 0 for project defaults.
// Only applies to dashboard queries and does not apply when ingesting data from BigQuery into Rill.
MaxBytesBilled int64 `mapstructure:"max_bytes_billed"`
AllowHostAccess bool `mapstructure:"allow_host_access"`
MaxBytesBilled int64 `mapstructure:"max_bytes_billed"`
// AllowStandardAPI lets users query BigQuery using the standard API instead of the Storage Read API.
// This is less efficient but may be necessary if users don't have access to the Storage Read API.
AllowStandardAPI bool `mapstructure:"allow_standard_api"`
AllowHostAccess bool `mapstructure:"allow_host_access"`
// LogQueries controls whether to log the raw SQL passed to OLAP.
LogQueries bool `mapstructure:"log_queries"`
}
Expand Down
34 changes: 13 additions & 21 deletions runtime/drivers/bigquery/warehouse.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ var tracer = otel.Tracer("github.com/rilldata/rill/runtime/drivers/bigquery")
// 64MB seems to be a good balance
const rowGroupBufferSize = int64(datasize.MB) * 64

const _jsonDownloadLimitBytes = 100 * int64(datasize.MB)

// Regex to parse BigQuery SELECT ALL statement: SELECT * FROM `project_id.dataset.table`
var selectQueryRegex = regexp.MustCompile(
`(?is)^\s*` +
Expand Down Expand Up @@ -191,18 +189,20 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any) (ou
return nil, err
}
return &fileIterator{
client: client,
bqIter: it,
logger: c.logger,
tempDir: tempDir,
client: client,
bqIter: it,
logger: c.logger,
tempDir: tempDir,
allowStandardAPI: c.config.AllowStandardAPI,
}, nil
}

type fileIterator struct {
client *bigquery.Client
bqIter *bigquery.RowIterator
logger *zap.Logger
tempDir string
client *bigquery.Client
bqIter *bigquery.RowIterator
logger *zap.Logger
tempDir string
allowStandardAPI bool

downloaded bool
}
Expand Down Expand Up @@ -235,6 +235,9 @@ func (f *fileIterator) Next(ctx context.Context) ([]string, error) {

// storage API not available so can't read as arrow records. Read results row by row and dump in a json file.
if !f.bqIter.IsAccelerated() {
if !f.allowStandardAPI {
return nil, fmt.Errorf("bigquery: query results cannot be read with the BigQuery Storage Read API. Granting the necessary BigQuery roles is recommended; alternatively, set 'allow_standard_api: true' on the connector to read results via the standard API (less efficient and may fail for large results)")
}
f.logger.Debug("downloading results in json file", observability.ZapCtx(ctx))
span.SetAttributes(attribute.Bool("storage_api", false))

Expand Down Expand Up @@ -386,17 +389,6 @@ func (f *fileIterator) downloadAsJSONFile(ctx context.Context) (string, error) {
return "", fmt.Errorf("conversion of row to json failed with error: %w", err)
}

// If we don't have storage API access, BigQuery may return massive JSON results. (But even with storage API access, it may return JSON for small results.)
// We want to avoid JSON for massive results. Currently, the only way to do so is to error at a limit.
rows++
if rows != 0 && rows%10000 == 0 { // Check file size every 10k rows
fileInfo, err := os.Stat(fw.Name())
if err != nil {
return "", fmt.Errorf("bigquery: failed to poll json file size: %w", err)
}
if fileInfo.Size() >= _jsonDownloadLimitBytes {
return "", fmt.Errorf("bigquery: json download exceeded limit of %d bytes (enable and provide access to the BigQuery Storage Read API to read larger results)", _jsonDownloadLimitBytes)
}
}
}
}
3 changes: 3 additions & 0 deletions runtime/parser/schema/project.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ definitions:
max_bytes_billed:
type: integer
description: Maximum number of bytes billed for a query. Queries that exceed this limit will fail with an error. This can help prevent unexpectedly high costs from large queries. It is highly recommended to set this when running on `on-demand pricing` model. The default value is 0 i.e. no limits are enforced in Rill.
allow_standard_api:
type: boolean
description: Allow querying BigQuery using the standard API instead of the Storage Read API. This is less efficient and may lead to higher latency, but can be used as a fallback if the Storage Read API is not available due to insufficient permissions or other issues.
examples:
- # Example: BigQuery connector configuration
type: connector # Must be `connector` (required)
Expand Down
Loading