Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c2009ed
docs: link to ML.EVALUATE BQML page for score() methods
ashleyxuu Oct 24, 2023
09ad5e4
feat: label query job with bigframes-api-xx using decorator
ashleyxuu Oct 25, 2023
4f4eb9b
reorganize the commit
ashleyxuu Oct 25, 2023
9ee937c
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Oct 26, 2023
272f0af
test: Log slowest tests durations (#146)
shobsi Oct 26, 2023
0e4c49c
docs: link to ML.EVALUATE BQML page for score() methods (#137)
ashleyxuu Oct 26, 2023
aad2c1a
feat: populate ibis version in user agent (#140)
ashleyxuu Oct 26, 2023
1043d6d
fix: don't override the global logging config (#138)
tswast Oct 26, 2023
1f49ef9
fix: use indexee's session for loc listlike cases (#152)
milkshakeiii Oct 26, 2023
c4c1e6e
feat: add pandas.qcut (#104)
TrevorBergeron Oct 26, 2023
4a27f44
feat: add unstack to series, add level param (#115)
TrevorBergeron Oct 26, 2023
fface57
feat: add `DataFrame.to_pandas_batches()` to download large `DataFram…
tswast Oct 26, 2023
bbc3c69
fix: resolve plotly rendering issue by using ipython html for job pro…
orrbradford Oct 26, 2023
a99d62c
refactor: ArrayValue is now a tree that defers conversion to ibis (#110)
TrevorBergeron Oct 27, 2023
f37d0b0
fix: fix bug with column names under repeated column assignment (#150)
milkshakeiii Oct 27, 2023
aba301c
test: refactor remote function tests (#147)
shobsi Oct 27, 2023
53bb2cd
feat: add dataframe melt (#116)
TrevorBergeron Oct 28, 2023
2bf4bcc
docs: add artithmetic df sample code (#153)
ashleyxuu Oct 30, 2023
343414a
feat: Implement operator `@` for `DataFrame.dot` (#139)
shobsi Oct 30, 2023
4eac10d
fix: fix typo and address comments
ashleyxuu Oct 30, 2023
868d2ad
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Oct 30, 2023
c03a8d9
Merge branch 'main' into ashleyxu-add-api-methods
tswast Nov 2, 2023
39321e4
fix: address comments
ashleyxuu Nov 3, 2023
aebcf11
Remove utils folder and refactor it in core directory
ashleyxuu Nov 3, 2023
72217c2
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 3, 2023
ec526b5
Remove utils folder and refactor it in core directory
ashleyxuu Nov 3, 2023
9edfe31
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 3, 2023
4baa373
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 3, 2023
3a94c23
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Nov 3, 2023
d84c569
fix merge conflicts
ashleyxuu Nov 3, 2023
308c9a7
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 3, 2023
4618107
commit the conflicts
ashleyxuu Nov 13, 2023
a87bcb8
redesign the log adapter
ashleyxuu Nov 14, 2023
cf97f8b
resolve conflicts and merge remote-tracking branch 'origin/main' into…
ashleyxuu Nov 14, 2023
53a99f9
Make the global _api_methods and lock threads
ashleyxuu Nov 14, 2023
3cc3599
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 14, 2023
1c3deb5
Make the global _api_methods and lock threads
ashleyxuu Nov 14, 2023
99f423b
merge conflicts
ashleyxuu Nov 14, 2023
115de27
address comments
ashleyxuu Nov 14, 2023
b0adf27
address comments
ashleyxuu Nov 14, 2023
b4ea9e3
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 14, 2023
df9c9c0
fix error
ashleyxuu Nov 14, 2023
00bb6de
fix None job_config error
ashleyxuu Nov 14, 2023
36fea06
address comments
ashleyxuu Nov 14, 2023
e872d18
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
test: refactor remote function tests (#147)
This changes moves the tests that deploy cloud function to large remote function tests, and the tests that do not make call to bigquery service to unit tests.

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
  • Loading branch information
shobsi authored and ashleyxuu committed Oct 30, 2023
commit aba301c9406cff495cbdc6bdba4b51d14a1ef18b
90 changes: 90 additions & 0 deletions tests/system/large/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import pytest
import test_utils.prefixer

import bigframes
from bigframes.remote_function import (
get_cloud_function_name,
get_remote_function_locations,
Expand Down Expand Up @@ -1120,3 +1121,92 @@ def plusone(x):
)
for dir_ in dirs_to_cleanup:
shutil.rmtree(dir_)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_via_session_context_connection_setter(
scalars_dfs, dataset_id, bq_cf_connection
):
# Creating a session scoped only to this test as we would be setting a
# property in it
context = bigframes.BigQueryOptions()
context.bq_connection = bq_cf_connection
session = bigframes.connect(context)

try:
# Without an explicit bigquery connection, the one present in Session,
# set via context setter would be used. Without an explicit `reuse` the
# default behavior of reuse=True will take effect. Please note that the
# udf is same as the one used in other tests in this file so the underlying
# cloud function would be common with reuse=True. Since we are using a
# unique dataset_id, even though the cloud function would be reused, the bq
# remote function would still be created, making use of the bq connection
# set in the BigQueryOptions above.
@session.remote_function([int], int, dataset=dataset_id)
def square(x):
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_int64_col_filter = bf_int64_col.notnull()
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
bf_result_col = bf_int64_col_filtered.apply(square)
bf_result = (
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
)

pd_int64_col = scalars_pandas_df["int64_col"]
pd_int64_col_filter = pd_int64_col.notnull()
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
# pd_int64_col_filtered.dtype is Int64Dtype()
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
# For this test let's force the pandas dtype to be same as bigframes' dtype.
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
finally:
# clean up the gcp assets created for the remote function
cleanup_remote_function_assets(
session.bqclient, session.cloudfunctionsclient, square
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_default_connection(session, scalars_dfs, dataset_id):
try:

@session.remote_function([int], int, dataset=dataset_id)
def square(x):
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_int64_col_filter = bf_int64_col.notnull()
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
bf_result_col = bf_int64_col_filtered.apply(square)
bf_result = (
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
)

pd_int64_col = scalars_pandas_df["int64_col"]
pd_int64_col_filter = pd_int64_col.notnull()
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
# pd_int64_col_filtered.dtype is Int64Dtype()
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
# For this test let's force the pandas dtype to be same as bigframes' dtype.
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
finally:
# clean up the gcp assets created for the remote function
cleanup_remote_function_assets(
session.bqclient, session.cloudfunctionsclient, square
)
156 changes: 30 additions & 126 deletions tests/system/small/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@
# limitations under the License.

from google.cloud import bigquery
from ibis.backends.bigquery import datatypes as bq_types
from ibis.expr import datatypes as ibis_types
import pandas as pd
import pytest

import bigframes
from bigframes import remote_function as rf
import bigframes.pandas as bpd
from tests.system.utils import assert_pandas_df_equal_ignore_ordering


Expand Down Expand Up @@ -65,45 +62,14 @@ def bq_cf_connection_location_project_mismatched() -> str:


@pytest.fixture(scope="module")
def session_with_bq_connection(bq_cf_connection) -> bigframes.Session:
return bigframes.Session(bigframes.BigQueryOptions(bq_connection=bq_cf_connection))


@pytest.fixture(scope="module")
def session_with_bq_connection_location_specified(
bq_cf_connection_location,
) -> bigframes.Session:
return bigframes.Session(
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location)
)


@pytest.fixture(scope="module")
def session_with_bq_connection_location_mistached(
bq_cf_connection_location_mistached,
) -> bigframes.Session:
return bigframes.Session(
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location_mistached)
)


@pytest.fixture(scope="module")
def session_with_bq_connection_location_project_specified(
bq_cf_connection_location_project,
def session_with_bq_connection_and_permanent_dataset(
bq_cf_connection, dataset_id_permanent
) -> bigframes.Session:
return bigframes.Session(
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location_project)
session = bigframes.Session(
bigframes.BigQueryOptions(bq_connection=bq_cf_connection)
)


def test_supported_types_correspond():
# The same types should be representable by the supported Python and BigQuery types.
ibis_types_from_python = {ibis_types.dtype(t) for t in rf.SUPPORTED_IO_PYTHON_TYPES}
ibis_types_from_bigquery = {
bq_types.BigQueryType.to_ibis(tk) for tk in rf.SUPPORTED_IO_BIGQUERY_TYPEKINDS
}

assert ibis_types_from_python == ibis_types_from_bigquery
session._session_dataset = bigquery.Dataset(dataset_id_permanent)
return session


@pytest.mark.flaky(retries=2, delay=120)
Expand Down Expand Up @@ -311,11 +277,13 @@ def square(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_direct_session_param(session_with_bq_connection, scalars_dfs):
def test_remote_function_direct_session_param(
session_with_bq_connection_and_permanent_dataset, scalars_dfs
):
@rf.remote_function(
[int],
int,
session=session_with_bq_connection,
session=session_with_bq_connection_and_permanent_dataset,
)
def square(x):
return x * x
Expand Down Expand Up @@ -345,15 +313,17 @@ def square(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_via_session_default(session_with_bq_connection, scalars_dfs):
def test_remote_function_via_session_default(
session_with_bq_connection_and_permanent_dataset, scalars_dfs
):
# Session has bigquery connection initialized via context. Without an
# explicit dataset the default dataset from the session would be used.
# Without an explicit bigquery connection, the one present in Session set
# through the explicit BigQueryOptions would be used. Without an explicit `reuse`
# the default behavior of reuse=True will take effect. Please note that the
# udf is same as the one used in other tests in this file so the underlying
# cloud function would be common and quickly reused.
@session_with_bq_connection.remote_function([int], int)
@session_with_bq_connection_and_permanent_dataset.remote_function([int], int)
def square(x):
return x * x

Expand Down Expand Up @@ -421,87 +391,15 @@ def square(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_via_session_context_connection_setter(
scalars_dfs, dataset_id, bq_cf_connection
def test_dataframe_applymap(
session_with_bq_connection_and_permanent_dataset, scalars_dfs
):
# Creating a session scoped only to this test as we would be setting a
# property in it
context = bigframes.BigQueryOptions()
context.bq_connection = bq_cf_connection
session = bigframes.connect(context)

# Without an explicit bigquery connection, the one present in Session,
# set via context setter would be used. Without an explicit `reuse` the
# default behavior of reuse=True will take effect. Please note that the
# udf is same as the one used in other tests in this file so the underlying
# cloud function would be common with reuse=True. Since we are using a
# unique dataset_id, even though the cloud function would be reused, the bq
# remote function would still be created, making use of the bq connection
# set in the BigQueryOptions above.
@session.remote_function([int], int, dataset=dataset_id)
def square(x):
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_int64_col_filter = bf_int64_col.notnull()
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
bf_result_col = bf_int64_col_filtered.apply(square)
bf_result = (
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
)

pd_int64_col = scalars_pandas_df["int64_col"]
pd_int64_col_filter = pd_int64_col.notnull()
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
# pd_int64_col_filtered.dtype is Int64Dtype()
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
# For this test let's force the pandas dtype to be same as bigframes' dtype.
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_default_connection(scalars_dfs, dataset_id):
@bpd.remote_function([int], int, dataset=dataset_id)
def square(x):
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_int64_col_filter = bf_int64_col.notnull()
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
bf_result_col = bf_int64_col_filtered.apply(square)
bf_result = (
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
)

pd_int64_col = scalars_pandas_df["int64_col"]
pd_int64_col_filter = pd_int64_col.notnull()
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
# pd_int64_col_filtered.dtype is Int64Dtype()
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
# For this test let's force the pandas dtype to be same as bigframes' dtype.
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)


@pytest.mark.flaky(retries=2, delay=120)
def test_dataframe_applymap(session_with_bq_connection, scalars_dfs):
def add_one(x):
return x + 1

remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
[int], int
)(add_one)

scalars_df, scalars_pandas_df = scalars_dfs
int64_cols = ["int64_col", "int64_too"]
Expand All @@ -524,11 +422,15 @@ def add_one(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_dataframe_applymap_na_ignore(session_with_bq_connection, scalars_dfs):
def test_dataframe_applymap_na_ignore(
session_with_bq_connection_and_permanent_dataset, scalars_dfs
):
def add_one(x):
return x + 1

remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
[int], int
)(add_one)

scalars_df, scalars_pandas_df = scalars_dfs
int64_cols = ["int64_col", "int64_too"]
Expand All @@ -549,11 +451,13 @@ def add_one(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_series_map(session_with_bq_connection, scalars_dfs):
def test_series_map(session_with_bq_connection_and_permanent_dataset, scalars_dfs):
def add_one(x):
return x + 1

remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
[int], int
)(add_one)

scalars_df, scalars_pandas_df = scalars_dfs

Expand Down Expand Up @@ -635,7 +539,7 @@ def square1(x):


@pytest.mark.flaky(retries=2, delay=120)
def test_read_gbq_function_reads_udfs(bigquery_client, scalars_dfs, dataset_id):
def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
arg = bigquery.RoutineArgument(
name="x",
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_remote_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.backends.bigquery import datatypes as bq_types
from ibis.expr import datatypes as ibis_types

from bigframes import remote_function as rf


def test_supported_types_correspond():
# The same types should be representable by the supported Python and BigQuery types.
ibis_types_from_python = {ibis_types.dtype(t) for t in rf.SUPPORTED_IO_PYTHON_TYPES}
ibis_types_from_bigquery = {
bq_types.BigQueryType.to_ibis(tk) for tk in rf.SUPPORTED_IO_BIGQUERY_TYPEKINDS
}

assert ibis_types_from_python == ibis_types_from_bigquery