feat: add bigframes calling Python UDF code sample (#13919)

tswast · gemini-code-assist[bot] · iennae · web-flow · commit 2adab684ea52 · 2026-04-07T19:11:48.000-07:00
* feat: add bigframes calling Python UDF code sample This sample is intended for https://docs.cloud.google.com/bigquery/docs/user-defined-functions-python#udf_users See internal issue b/494558638. * add codeowners * run linter * Update bigquery/bigframes/noxfile_config.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * fix conftest * add dedent * add partial ordering mode * add type annotation * split tests * remove unused import * fix: update ignored_Versions to avoid 3.8 testing as per @glasnt Co-authored-by: Katie McLaughlin <katie@glasnt.com> --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Jennifer Davis <iennae@gmail.com> Co-authored-by: Katie McLaughlin <katie@glasnt.com>
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -77,6 +77,7 @@
 /appengine/standard_python3/spanner/*  @GoogleCloudPlatform/api-spanner-python @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
 /asset/**/*                            @GoogleCloudPlatform/cloud-asset-analysis-team @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
 /bigquery/**/*                         @chalmerlowe @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
+/bigquery/bigframes/**/*               @tswast @GoogleCloudPlatform/api-bigquery @GoogleCloudPlatform/cloud-samples-reviewers
 /bigquery/remote_function/**/*         @autoerr @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
 /cloud-media-livestream/**/*           @GoogleCloudPlatform/cloud-media-team @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
 /bigquery-connection/**/*              @GoogleCloudPlatform/api-bigquery @GoogleCloudPlatform/python-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
diff --git a/bigquery/bigframes/.gitignore b/bigquery/bigframes/.gitignore
@@ -0,0 +1 @@
+noxfile.py
diff --git a/bigquery/bigframes/README.md b/bigquery/bigframes/README.md
@@ -0,0 +1,10 @@
+# BigQuery DataFrames code samples
+
+This directory contains code samples for [BigQuery DataFrames (aka
+BigFrames)](https://dataframes.bigquery.dev/).
+
+To install BigQuery DataFrames, run:
+
+```
+pip install --upgrade bigframes
+```
diff --git a/bigquery/bigframes/call_python_udf.py b/bigquery/bigframes/call_python_udf.py
@@ -0,0 +1,105 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START bigquery_dataframes_call_python_udf]
+import textwrap
+from typing import Tuple
+
+import bigframes.pandas as bpd
+import pandas as pd
+import pyarrow as pa
+
+
+# Using partial ordering mode enables more efficient query optimizations.
+bpd.options.bigquery.ordering_mode = "partial"
+
+
+def call_python_udf(
+    project_id: str, location: str,
+) -> Tuple[pd.Series, bpd.Series]:
+    # Set the billing project to use for queries. This step is optional, as the
+    # project can be inferred from your environment in many cases.
+    bpd.options.bigquery.project = project_id  # "your-project-id"
+
+    # Since this example works with local data, set a processing location.
+    bpd.options.bigquery.location = location  # "US"
+
+    # Create a sample series.
+    xml_series = pd.Series(
+        [
+            textwrap.dedent(
+                """
+                <book id="1">
+                    <title>The Great Gatsby</title>
+                    <author>F. Scott Fitzgerald</author>
+                </book>
+                """
+            ),
+            textwrap.dedent(
+                """
+                <book id="2">
+                    <title>1984</title>
+                    <author>George Orwell</author>
+                </book>
+                """
+            ),
+            textwrap.dedent(
+                """
+                <book id="3">
+                    <title>Brave New World</title>
+                    <author>Aldous Huxley</author>
+                </book>
+                """
+            ),
+        ],
+        dtype=pd.ArrowDtype(pa.string()),
+    )
+    df = pd.DataFrame({"xml": xml_series})
+
+    # Use the BigQuery Accessor, which is automatically registered on pandas
+    # DataFrames when you import bigframes.  This example uses a function that
+    # has been deployed to bigquery-utils for demonstration purposes. To use in
+    # production, deploy the function at
+    # https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/cw_xml_extract.sqlx
+    # to your own project.
+    titles_pandas = df.bigquery.sql_scalar(
+        "`bqutil`.`fn`.cw_xml_extract({xml}, '//title/text()')",
+    )
+
+    # Alternatively, call read_gbq_function to get a pointer to the function
+    # that can be applied on BigQuery DataFrames objects.
+    cw_xml_extract = bpd.read_gbq_function("bqutil.fn.cw_xml_extract")
+    xml_bigframes = bpd.read_pandas(xml_series)
+
+    xpath_query = "//title/text()"
+    titles_bigframes = xml_bigframes.apply(cw_xml_extract, args=(xpath_query,))
+    return titles_pandas, titles_bigframes
+    # [END bigquery_dataframes_call_python_udf]
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+
+    # Note: GCP project ID can be inferred from the environment if Application
+    # Default Credentials are set, so None is perfectly valid for --project_id.
+    parser.add_argument("--project_id", type=str)
+    parser.add_argument("--location", default="US", type=str)
+    args = parser.parse_args()
+
+    pddf, bfdf = call_python_udf(project_id=args.project_id, location=args.location)
+    print(pddf)
+    print(bfdf.to_pandas())
diff --git a/bigquery/bigframes/call_python_udf_test.py b/bigquery/bigframes/call_python_udf_test.py
@@ -0,0 +1,24 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes.pandas as bpd
+
+import call_python_udf
+
+
+def test_call_python_udf(project_id: str, location: str) -> None:
+    bpd.close_session()
+    pd_result, bf_result = call_python_udf.call_python_udf(project_id=project_id, location=location)
+    assert len(pd_result.index) == 3
+    assert len(bf_result.index) == 3
diff --git a/bigquery/bigframes/conftest.py b/bigquery/bigframes/conftest.py
@@ -0,0 +1,27 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def project_id() -> str:
+    return os.environ["GOOGLE_CLOUD_PROJECT"]
+
+
+@pytest.fixture(scope="session")
+def location() -> str:
+    return "US"
diff --git a/bigquery/bigframes/noxfile_config.py b/bigquery/bigframes/noxfile_config.py
@@ -0,0 +1,38 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be imported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    "ignored_versions": ["2.7", "3.6", "3.8", "3.9", "3.11"],
+    # Old samples are opted out of enforcing Python type hints
+    # All new samples should feature them
+    "enforce_type_hints": True,
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+    # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    "envs": {},
+}
diff --git a/bigquery/bigframes/requirements-test.txt b/bigquery/bigframes/requirements-test.txt
@@ -0,0 +1,2 @@
+flaky==3.8.1
+pytest==8.2.0
diff --git a/bigquery/bigframes/requirements.txt b/bigquery/bigframes/requirements.txt
@@ -0,0 +1 @@
+bigframes==2.38.0