From 51c4dbfa58eaef7e33aa4714d73dba0d0bbcce5d Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 21 Jan 2026 23:08:17 +0000 Subject: [PATCH 1/2] feat: add bigquery.ml.transform function --- bigframes/bigquery/_operations/ml.py | 38 ++++++++++++++++++++++++++++ bigframes/bigquery/ml.py | 2 ++ bigframes/core/sql/ml.py | 11 ++++++++ tests/unit/bigquery/test_ml.py | 18 +++++++++++++ tests/unit/core/sql/test_ml.py | 8 ++++++ 5 files changed, 77 insertions(+) diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index c9b48bb5ac..e5a5c5dfb6 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -393,3 +393,41 @@ def global_explain( return bpd.read_gbq_query(sql) else: return session.read_gbq_query(sql) + + +@log_adapter.method_logger(custom_base_name="bigquery_ml") +def transform( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], + input_: Union[pd.DataFrame, dataframe.DataFrame, str], +) -> dataframe.DataFrame: + """ + Transforms input data using a BigQuery ML model. + + See the `BigQuery ML TRANSFORM function syntax + `_ + for additional reference. + + Args: + model (bigframes.ml.base.BaseEstimator or str): + The model to use for transformation. + input_ (Union[bigframes.pandas.DataFrame, str]): + The DataFrame or query to use for transformation. + + Returns: + bigframes.pandas.DataFrame: + The transformed data. + """ + import bigframes.pandas as bpd + + model_name, session = _get_model_name_and_session(model, input_) + table_sql = _to_sql(input_) + + sql = bigframes.core.sql.ml.transform( + model_name=model_name, + table=table_sql, + ) + + if session is None: + return bpd.read_gbq_query(sql) + else: + return session.read_gbq_query(sql) diff --git a/bigframes/bigquery/ml.py b/bigframes/bigquery/ml.py index 93b0670ba5..6ceadb324d 100644 --- a/bigframes/bigquery/ml.py +++ b/bigframes/bigquery/ml.py @@ -25,6 +25,7 @@ explain_predict, global_explain, predict, + transform, ) __all__ = [ @@ -33,4 +34,5 @@ "predict", "explain_predict", "global_explain", + "transform", ] diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index ec55fe0426..1749315925 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -213,3 +213,14 @@ def global_explain( sql += _build_struct_sql(struct_options) sql += ")\n" return sql + + +def transform( + model_name: str, + table: str, +) -> str: + """Encode the ML.TRANSFORM statement. + See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform for reference. + """ + sql = f"SELECT * FROM ML.TRANSFORM(MODEL {googlesql.identifier(model_name)}, ({table}))\n" + return sql diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index 063ddafcca..96b97d68fe 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -145,3 +145,21 @@ def test_global_explain_with_pandas_series_model(read_gbq_query_mock): generated_sql = read_gbq_query_mock.call_args[0][0] assert "ML.GLOBAL_EXPLAIN" in generated_sql assert f"MODEL `{MODEL_NAME}`" in generated_sql + + +@mock.patch("bigframes.pandas.read_gbq_query") +@mock.patch("bigframes.pandas.read_pandas") +def test_transform_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mock): + df = pd.DataFrame({"col1": [1, 2, 3]}) + read_pandas_mock.return_value._to_sql_query.return_value = ( + "SELECT * FROM `pandas_df`", + [], + [], + ) + ml_ops.transform(MODEL_SERIES, input_=df) + read_pandas_mock.assert_called_once() + read_gbq_query_mock.assert_called_once() + generated_sql = read_gbq_query_mock.call_args[0][0] + assert "ML.TRANSFORM" in generated_sql + assert f"MODEL `{MODEL_NAME}`" in generated_sql + assert "(SELECT * FROM `pandas_df`)" in generated_sql diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py index fe8c1a04d4..9721f42fee 100644 --- a/tests/unit/core/sql/test_ml.py +++ b/tests/unit/core/sql/test_ml.py @@ -169,3 +169,11 @@ def test_global_explain_model_with_options(snapshot): class_level_explain=True, ) snapshot.assert_match(sql, "global_explain_model_with_options.sql") + + +def test_transform_model_basic(snapshot): + sql = bigframes.core.sql.ml.transform( + model_name="my_project.my_dataset.my_model", + table="SELECT * FROM new_data", + ) + snapshot.assert_match(sql, "transform_model_basic.sql") From 611d0814714ffc86968d8eda7cd3eab16ca91a41 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 21 Jan 2026 23:14:34 +0000 Subject: [PATCH 2/2] fix --- .../test_ml/test_transform_model_basic/transform_model_basic.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql diff --git a/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql new file mode 100644 index 0000000000..e6cedc1647 --- /dev/null +++ b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql @@ -0,0 +1 @@ +SELECT * FROM ML.TRANSFORM(MODEL `my_project.my_dataset.my_model`, (SELECT * FROM new_data))