Skip to content

Commit 2adab68

Browse files
tswastgemini-code-assist[bot]iennaeglasnt
authored
feat: add bigframes calling Python UDF code sample (#13919)
* feat: add bigframes calling Python UDF code sample This sample is intended for https://docs.cloud.google.com/bigquery/docs/user-defined-functions-python#udf_users See internal issue b/494558638. * add codeowners * run linter * Update bigquery/bigframes/noxfile_config.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * fix conftest * add dedent * add partial ordering mode * add type annotation * split tests * remove unused import * fix: update ignored_Versions to avoid 3.8 testing as per @glasnt Co-authored-by: Katie McLaughlin <katie@glasnt.com> --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Jennifer Davis <iennae@gmail.com> Co-authored-by: Katie McLaughlin <katie@glasnt.com>
1 parent c2e88e3 commit 2adab68

9 files changed

Lines changed: 209 additions & 0 deletions

File tree

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
/appengine/standard_python3/spanner/* @GoogleCloudPlatform/api-spanner-python @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
7878
/asset/**/* @GoogleCloudPlatform/cloud-asset-analysis-team @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
7979
/bigquery/**/* @chalmerlowe @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
80+
/bigquery/bigframes/**/* @tswast @GoogleCloudPlatform/api-bigquery @GoogleCloudPlatform/cloud-samples-reviewers
8081
/bigquery/remote_function/**/* @autoerr @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
8182
/cloud-media-livestream/**/* @GoogleCloudPlatform/cloud-media-team @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
8283
/bigquery-connection/**/* @GoogleCloudPlatform/api-bigquery @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers

bigquery/bigframes/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
noxfile.py

bigquery/bigframes/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# BigQuery DataFrames code samples
2+
3+
This directory contains code samples for [BigQuery DataFrames (aka
4+
BigFrames)](https://dataframes.bigquery.dev/).
5+
6+
To install BigQuery DataFrames, run:
7+
8+
```
9+
pip install --upgrade bigframes
10+
```
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START bigquery_dataframes_call_python_udf]
17+
import textwrap
18+
from typing import Tuple
19+
20+
import bigframes.pandas as bpd
21+
import pandas as pd
22+
import pyarrow as pa
23+
24+
25+
# Using partial ordering mode enables more efficient query optimizations.
26+
bpd.options.bigquery.ordering_mode = "partial"
27+
28+
29+
def call_python_udf(
30+
project_id: str, location: str,
31+
) -> Tuple[pd.Series, bpd.Series]:
32+
# Set the billing project to use for queries. This step is optional, as the
33+
# project can be inferred from your environment in many cases.
34+
bpd.options.bigquery.project = project_id # "your-project-id"
35+
36+
# Since this example works with local data, set a processing location.
37+
bpd.options.bigquery.location = location # "US"
38+
39+
# Create a sample series.
40+
xml_series = pd.Series(
41+
[
42+
textwrap.dedent(
43+
"""
44+
<book id="1">
45+
<title>The Great Gatsby</title>
46+
<author>F. Scott Fitzgerald</author>
47+
</book>
48+
"""
49+
),
50+
textwrap.dedent(
51+
"""
52+
<book id="2">
53+
<title>1984</title>
54+
<author>George Orwell</author>
55+
</book>
56+
"""
57+
),
58+
textwrap.dedent(
59+
"""
60+
<book id="3">
61+
<title>Brave New World</title>
62+
<author>Aldous Huxley</author>
63+
</book>
64+
"""
65+
),
66+
],
67+
dtype=pd.ArrowDtype(pa.string()),
68+
)
69+
df = pd.DataFrame({"xml": xml_series})
70+
71+
# Use the BigQuery Accessor, which is automatically registered on pandas
72+
# DataFrames when you import bigframes. This example uses a function that
73+
# has been deployed to bigquery-utils for demonstration purposes. To use in
74+
# production, deploy the function at
75+
# https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/cw_xml_extract.sqlx
76+
# to your own project.
77+
titles_pandas = df.bigquery.sql_scalar(
78+
"`bqutil`.`fn`.cw_xml_extract({xml}, '//title/text()')",
79+
)
80+
81+
# Alternatively, call read_gbq_function to get a pointer to the function
82+
# that can be applied on BigQuery DataFrames objects.
83+
cw_xml_extract = bpd.read_gbq_function("bqutil.fn.cw_xml_extract")
84+
xml_bigframes = bpd.read_pandas(xml_series)
85+
86+
xpath_query = "//title/text()"
87+
titles_bigframes = xml_bigframes.apply(cw_xml_extract, args=(xpath_query,))
88+
return titles_pandas, titles_bigframes
89+
# [END bigquery_dataframes_call_python_udf]
90+
91+
92+
if __name__ == "__main__":
93+
import argparse
94+
95+
parser = argparse.ArgumentParser()
96+
97+
# Note: GCP project ID can be inferred from the environment if Application
98+
# Default Credentials are set, so None is perfectly valid for --project_id.
99+
parser.add_argument("--project_id", type=str)
100+
parser.add_argument("--location", default="US", type=str)
101+
args = parser.parse_args()
102+
103+
pddf, bfdf = call_python_udf(project_id=args.project_id, location=args.location)
104+
print(pddf)
105+
print(bfdf.to_pandas())
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.pandas as bpd
16+
17+
import call_python_udf
18+
19+
20+
def test_call_python_udf(project_id: str, location: str) -> None:
21+
bpd.close_session()
22+
pd_result, bf_result = call_python_udf.call_python_udf(project_id=project_id, location=location)
23+
assert len(pd_result.index) == 3
24+
assert len(bf_result.index) == 3

bigquery/bigframes/conftest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import pytest
18+
19+
20+
@pytest.fixture(scope="session")
21+
def project_id() -> str:
22+
return os.environ["GOOGLE_CLOUD_PROJECT"]
23+
24+
25+
@pytest.fixture(scope="session")
26+
def location() -> str:
27+
return "US"
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Default TEST_CONFIG_OVERRIDE for python repos.
16+
17+
# You can copy this file into your directory, then it will be imported from
18+
# the noxfile.py.
19+
20+
# The source of truth:
21+
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py
22+
23+
TEST_CONFIG_OVERRIDE = {
24+
# You can opt out from the test for specific Python versions.
25+
"ignored_versions": ["2.7", "3.6", "3.8", "3.9", "3.11"],
26+
# Old samples are opted out of enforcing Python type hints
27+
# All new samples should feature them
28+
"enforce_type_hints": True,
29+
# An envvar key for determining the project id to use. Change it
30+
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
31+
# build specific Cloud project. You can also use your own string
32+
# to use your own Cloud project.
33+
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
34+
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
35+
# A dictionary you want to inject into your test. Don't put any
36+
# secrets here. These values will override predefined values.
37+
"envs": {},
38+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
flaky==3.8.1
2+
pytest==8.2.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bigframes==2.38.0

0 commit comments

Comments
 (0)