Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions bigframes/bigquery/_operations/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,41 @@ def global_explain(
return bpd.read_gbq_query(sql)
else:
return session.read_gbq_query(sql)


@log_adapter.method_logger(custom_base_name="bigquery_ml")
def transform(
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
) -> dataframe.DataFrame:
"""
Transforms input data using a BigQuery ML model.

See the `BigQuery ML TRANSFORM function syntax
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform>`_
for additional reference.

Args:
model (bigframes.ml.base.BaseEstimator or str):
The model to use for transformation.
input_ (Union[bigframes.pandas.DataFrame, str]):
The DataFrame or query to use for transformation.

Returns:
bigframes.pandas.DataFrame:
The transformed data.
"""
import bigframes.pandas as bpd

model_name, session = _get_model_name_and_session(model, input_)
table_sql = _to_sql(input_)

sql = bigframes.core.sql.ml.transform(
model_name=model_name,
table=table_sql,
)

if session is None:
return bpd.read_gbq_query(sql)
else:
return session.read_gbq_query(sql)
2 changes: 2 additions & 0 deletions bigframes/bigquery/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
explain_predict,
global_explain,
predict,
transform,
)

__all__ = [
Expand All @@ -33,4 +34,5 @@
"predict",
"explain_predict",
"global_explain",
"transform",
]
11 changes: 11 additions & 0 deletions bigframes/core/sql/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,14 @@ def global_explain(
sql += _build_struct_sql(struct_options)
sql += ")\n"
return sql


def transform(
model_name: str,
table: str,
) -> str:
"""Encode the ML.TRANSFORM statement.
See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform for reference.
"""
sql = f"SELECT * FROM ML.TRANSFORM(MODEL {googlesql.identifier(model_name)}, ({table}))\n"
return sql
18 changes: 18 additions & 0 deletions tests/unit/bigquery/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,21 @@ def test_global_explain_with_pandas_series_model(read_gbq_query_mock):
generated_sql = read_gbq_query_mock.call_args[0][0]
assert "ML.GLOBAL_EXPLAIN" in generated_sql
assert f"MODEL `{MODEL_NAME}`" in generated_sql


@mock.patch("bigframes.pandas.read_gbq_query")
@mock.patch("bigframes.pandas.read_pandas")
def test_transform_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mock):
df = pd.DataFrame({"col1": [1, 2, 3]})
read_pandas_mock.return_value._to_sql_query.return_value = (
"SELECT * FROM `pandas_df`",
[],
[],
)
ml_ops.transform(MODEL_SERIES, input_=df)
read_pandas_mock.assert_called_once()
read_gbq_query_mock.assert_called_once()
generated_sql = read_gbq_query_mock.call_args[0][0]
assert "ML.TRANSFORM" in generated_sql
assert f"MODEL `{MODEL_NAME}`" in generated_sql
assert "(SELECT * FROM `pandas_df`)" in generated_sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM ML.TRANSFORM(MODEL `my_project.my_dataset.my_model`, (SELECT * FROM new_data))
8 changes: 8 additions & 0 deletions tests/unit/core/sql/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,11 @@ def test_global_explain_model_with_options(snapshot):
class_level_explain=True,
)
snapshot.assert_match(sql, "global_explain_model_with_options.sql")


def test_transform_model_basic(snapshot):
sql = bigframes.core.sql.ml.transform(
model_name="my_project.my_dataset.my_model",
table="SELECT * FROM new_data",
)
snapshot.assert_match(sql, "transform_model_basic.sql")
Loading