Skip to content

Commit 014765c

Browse files
authored
feat: expose gcf memory param in remote_function (#803)
* feat: expose gcf memory param in `remote_function` * add the reported broken usecase as a test * fix mypy failure * revert test that is prone to timing out during deployment
1 parent b47f32d commit 014765c

File tree

4 files changed

+89
-1
lines changed

4 files changed

+89
-1
lines changed

bigframes/functions/remote_function.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ def create_cloud_function(
331331
max_instance_count=None,
332332
is_row_processor=False,
333333
vpc_connector=None,
334+
memory_mib=1024,
334335
):
335336
"""Create a cloud function from the given user defined function.
336337
@@ -410,7 +411,8 @@ def create_cloud_function(
410411
self._cloud_function_docker_repository
411412
)
412413
function.service_config = functions_v2.ServiceConfig()
413-
function.service_config.available_memory = "1024M"
414+
if memory_mib is not None:
415+
function.service_config.available_memory = f"{memory_mib}Mi"
414416
if timeout_seconds is not None:
415417
if timeout_seconds > 1200:
416418
raise ValueError(
@@ -473,6 +475,7 @@ def provision_bq_remote_function(
473475
cloud_function_max_instance_count,
474476
is_row_processor,
475477
cloud_function_vpc_connector,
478+
cloud_function_memory_mib,
476479
):
477480
"""Provision a BigQuery remote function."""
478481
# If reuse of any existing function with the same name (indicated by the
@@ -504,6 +507,7 @@ def provision_bq_remote_function(
504507
max_instance_count=cloud_function_max_instance_count,
505508
is_row_processor=is_row_processor,
506509
vpc_connector=cloud_function_vpc_connector,
510+
memory_mib=cloud_function_memory_mib,
507511
)
508512
else:
509513
logger.info(f"Cloud function {cloud_function_name} already exists.")
@@ -667,6 +671,7 @@ def remote_function(
667671
cloud_function_timeout: Optional[int] = 600,
668672
cloud_function_max_instances: Optional[int] = None,
669673
cloud_function_vpc_connector: Optional[str] = None,
674+
cloud_function_memory_mib: Optional[int] = 1024,
670675
):
671676
"""Decorator to turn a user defined function into a BigQuery remote function.
672677
@@ -817,6 +822,15 @@ def remote_function(
817822
function. This is useful if your code needs access to data or
818823
service(s) that are on a VPC network. See for more details
819824
https://cloud.google.com/functions/docs/networking/connecting-vpc.
825+
cloud_function_memory_mib (int, Optional):
826+
The amounts of memory (in mebibytes) to allocate for the cloud
827+
function (2nd gen) created. This also dictates a corresponding
828+
amount of allocated CPU for the function. By default a memory of
829+
1024 MiB is set for the cloud functions created to support
830+
BigQuery DataFrames remote function. If you want to let the
831+
default memory of cloud functions be allocated, pass `None`. See
832+
for more details
833+
https://cloud.google.com/functions/docs/configuring/memory.
820834
"""
821835
# Some defaults may be used from the session if not provided otherwise
822836
import bigframes.exceptions as bf_exceptions
@@ -1027,6 +1041,7 @@ def try_delattr(attr):
10271041
cloud_function_max_instance_count=cloud_function_max_instances,
10281042
is_row_processor=is_row_processor,
10291043
cloud_function_vpc_connector=cloud_function_vpc_connector,
1044+
cloud_function_memory_mib=cloud_function_memory_mib,
10301045
)
10311046

10321047
# TODO: Move ibis logic to compiler step

bigframes/pandas/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ def remote_function(
666666
cloud_function_timeout: Optional[int] = 600,
667667
cloud_function_max_instances: Optional[int] = None,
668668
cloud_function_vpc_connector: Optional[str] = None,
669+
cloud_function_memory_mib: Optional[int] = 1024,
669670
):
670671
return global_session.with_default_session(
671672
bigframes.session.Session.remote_function,
@@ -683,6 +684,7 @@ def remote_function(
683684
cloud_function_timeout=cloud_function_timeout,
684685
cloud_function_max_instances=cloud_function_max_instances,
685686
cloud_function_vpc_connector=cloud_function_vpc_connector,
687+
cloud_function_memory_mib=cloud_function_memory_mib,
686688
)
687689

688690

bigframes/session/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,7 @@ def remote_function(
15371537
cloud_function_timeout: Optional[int] = 600,
15381538
cloud_function_max_instances: Optional[int] = None,
15391539
cloud_function_vpc_connector: Optional[str] = None,
1540+
cloud_function_memory_mib: Optional[int] = 1024,
15401541
):
15411542
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
15421543
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1670,6 +1671,15 @@ def remote_function(
16701671
function. This is useful if your code needs access to data or
16711672
service(s) that are on a VPC network. See for more details
16721673
https://cloud.google.com/functions/docs/networking/connecting-vpc.
1674+
cloud_function_memory_mib (int, Optional):
1675+
The amounts of memory (in mebibytes) to allocate for the cloud
1676+
function (2nd gen) created. This also dictates a corresponding
1677+
amount of allocated CPU for the function. By default a memory of
1678+
1024 MiB is set for the cloud functions created to support
1679+
BigQuery DataFrames remote function. If you want to let the
1680+
default memory of cloud functions be allocated, pass `None`. See
1681+
for more details
1682+
https://cloud.google.com/functions/docs/configuring/memory.
16731683
Returns:
16741684
callable: A remote function object pointing to the cloud assets created
16751685
in the background to support the remote execution. The cloud assets can be
@@ -1695,6 +1705,7 @@ def remote_function(
16951705
cloud_function_timeout=cloud_function_timeout,
16961706
cloud_function_max_instances=cloud_function_max_instances,
16971707
cloud_function_vpc_connector=cloud_function_vpc_connector,
1708+
cloud_function_memory_mib=cloud_function_memory_mib,
16981709
)
16991710

17001711
def read_gbq_function(

tests/system/large/test_remote_function.py

+60
Original file line numberDiff line numberDiff line change
@@ -1800,3 +1800,63 @@ def float_parser(row):
18001800
cleanup_remote_function_assets(
18011801
session.bqclient, session.cloudfunctionsclient, float_parser_remote
18021802
)
1803+
1804+
1805+
@pytest.mark.parametrize(
1806+
("memory_mib_args", "expected_memory"),
1807+
[
1808+
pytest.param({}, "1024Mi", id="no-set"),
1809+
pytest.param({"cloud_function_memory_mib": None}, "256M", id="set-None"),
1810+
pytest.param({"cloud_function_memory_mib": 128}, "128Mi", id="set-128"),
1811+
pytest.param({"cloud_function_memory_mib": 1024}, "1024Mi", id="set-1024"),
1812+
pytest.param({"cloud_function_memory_mib": 4096}, "4096Mi", id="set-4096"),
1813+
pytest.param({"cloud_function_memory_mib": 32768}, "32768Mi", id="set-32768"),
1814+
],
1815+
)
1816+
@pytest.mark.flaky(retries=2, delay=120)
1817+
def test_remote_function_gcf_memory(
1818+
session, scalars_dfs, memory_mib_args, expected_memory
1819+
):
1820+
try:
1821+
1822+
def square(x: int) -> int:
1823+
return x * x
1824+
1825+
square_remote = session.remote_function(reuse=False, **memory_mib_args)(square)
1826+
1827+
# Assert that the GCF is created with the intended memory
1828+
gcf = session.cloudfunctionsclient.get_function(
1829+
name=square_remote.bigframes_cloud_function
1830+
)
1831+
assert gcf.service_config.available_memory == expected_memory
1832+
1833+
scalars_df, scalars_pandas_df = scalars_dfs
1834+
1835+
bf_result = scalars_df["int64_too"].apply(square_remote).to_pandas()
1836+
pd_result = scalars_pandas_df["int64_too"].apply(square)
1837+
1838+
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
1839+
finally:
1840+
# clean up the gcp assets created for the remote function
1841+
cleanup_remote_function_assets(
1842+
session.bqclient, session.cloudfunctionsclient, square_remote
1843+
)
1844+
1845+
1846+
@pytest.mark.parametrize(
1847+
("memory_mib",),
1848+
[
1849+
pytest.param(127, id="127-too-low"),
1850+
pytest.param(32769, id="set-32769-too-high"),
1851+
],
1852+
)
1853+
@pytest.mark.flaky(retries=2, delay=120)
1854+
def test_remote_function_gcf_memory_unsupported(session, memory_mib):
1855+
with pytest.raises(
1856+
google.api_core.exceptions.InvalidArgument,
1857+
match="Invalid value specified for container memory",
1858+
):
1859+
1860+
@session.remote_function(reuse=False, cloud_function_memory_mib=memory_mib)
1861+
def square(x: int) -> int:
1862+
return x * x

0 commit comments

Comments
 (0)