Skip to content

Commit d92ced2

Browse files
authored
feat: Support BYOSA in remote_function (#407)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue 328138730 🦕
1 parent 45e6229 commit d92ced2

File tree

4 files changed

+71
-4
lines changed

4 files changed

+71
-4
lines changed

bigframes/functions/remote_function.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def __init__(
129129
bq_connection_client,
130130
bq_connection_id,
131131
cloud_resource_manager_client,
132+
cloud_function_service_account,
132133
):
133134
self._gcp_project_id = gcp_project_id
134135
self._cloud_function_region = cloud_function_region
@@ -140,6 +141,7 @@ def __init__(
140141
self._bq_connection_manager = clients.BqConnectionManager(
141142
bq_connection_client, cloud_resource_manager_client
142143
)
144+
self._cloud_function_service_account = cloud_function_service_account
143145

144146
def create_bq_remote_function(
145147
self, input_args, input_types, output_type, endpoint, bq_function_name
@@ -384,6 +386,9 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
384386
function.service_config = functions_v2.ServiceConfig()
385387
function.service_config.available_memory = "1024M"
386388
function.service_config.timeout_seconds = 600
389+
function.service_config.service_account_email = (
390+
self._cloud_function_service_account
391+
)
387392
create_function_request.function = function
388393

389394
# Create the cloud function and wait for it to be ready to use
@@ -591,6 +596,7 @@ def remote_function(
591596
reuse: bool = True,
592597
name: Optional[str] = None,
593598
packages: Optional[Sequence[str]] = None,
599+
cloud_function_service_account: Optional[str] = None,
594600
):
595601
"""Decorator to turn a user defined function into a BigQuery remote function.
596602
@@ -646,12 +652,12 @@ def remote_function(
646652
Client to use for BigQuery operations. If this param is not provided
647653
then bigquery client from the session would be used.
648654
bigquery_connection_client (google.cloud.bigquery_connection_v1.ConnectionServiceClient, Optional):
649-
Client to use for cloud functions operations. If this param is not
650-
provided then functions client from the session would be used.
651-
cloud_functions_client (google.cloud.functions_v2.FunctionServiceClient, Optional):
652655
Client to use for BigQuery connection operations. If this param is
653656
not provided then bigquery connection client from the session would
654657
be used.
658+
cloud_functions_client (google.cloud.functions_v2.FunctionServiceClient, Optional):
659+
Client to use for cloud functions operations. If this param is not
660+
provided then the functions client from the session would be used.
655661
resource_manager_client (google.cloud.resourcemanager_v3.ProjectsClient, Optional):
656662
Client to use for cloud resource management operations, e.g. for
657663
getting and setting IAM roles on cloud resources. If this param is
@@ -686,7 +692,13 @@ def remote_function(
686692
Explicit name of the external package dependencies. Each dependency
687693
is added to the `requirements.txt` as is, and can be of the form
688694
supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
689-
695+
cloud_function_service_account (str, Optional):
696+
Service account to use for the cloud functions. If not provided then
697+
the default service account would be used. See
698+
https://cloud.google.com/functions/docs/securing/function-identity
699+
for more details. Please make sure the service account has the
700+
necessary IAM permissions configured as described in
701+
https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
690702
"""
691703
import bigframes.pandas as bpd
692704

@@ -787,6 +799,7 @@ def wrapper(f):
787799
bigquery_connection_client,
788800
bq_connection_id,
789801
resource_manager_client,
802+
cloud_function_service_account,
790803
)
791804

792805
rf_name, cf_name = remote_function_client.provision_bq_remote_function(

bigframes/pandas/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ def remote_function(
615615
reuse: bool = True,
616616
name: Optional[str] = None,
617617
packages: Optional[Sequence[str]] = None,
618+
cloud_function_service_account: Optional[str] = None,
618619
):
619620
return global_session.with_default_session(
620621
bigframes.session.Session.remote_function,
@@ -625,6 +626,7 @@ def remote_function(
625626
reuse=reuse,
626627
name=name,
627628
packages=packages,
629+
cloud_function_service_account=cloud_function_service_account,
628630
)
629631

630632

bigframes/session/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ def remote_function(
13371337
reuse: bool = True,
13381338
name: Optional[str] = None,
13391339
packages: Optional[Sequence[str]] = None,
1340+
cloud_function_service_account: Optional[str] = None,
13401341
):
13411342
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
13421343
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1410,6 +1411,13 @@ def remote_function(
14101411
Explicit name of the external package dependencies. Each dependency
14111412
is added to the `requirements.txt` as is, and can be of the form
14121413
supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
1414+
cloud_function_service_account (str, Optional):
1415+
Service account to use for the cloud functions. If not provided
1416+
then the default service account would be used. See
1417+
https://cloud.google.com/functions/docs/securing/function-identity
1418+
for more details. Please make sure the service account has the
1419+
necessary IAM permissions configured as described in
1420+
https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
14131421
Returns:
14141422
callable: A remote function object pointing to the cloud assets created
14151423
in the background to support the remote execution. The cloud assets can be
@@ -1428,6 +1436,7 @@ def remote_function(
14281436
reuse=reuse,
14291437
name=name,
14301438
packages=packages,
1439+
cloud_function_service_account=cloud_function_service_account,
14311440
)
14321441

14331442
def read_gbq_function(

tests/system/large/test_remote_function.py

+43
Original file line numberDiff line numberDiff line change
@@ -1279,3 +1279,46 @@ def square(x):
12791279
cleanup_remote_function_assets(
12801280
session.bqclient, session.cloudfunctionsclient, square
12811281
)
1282+
1283+
1284+
@pytest.mark.skip("This requires additional project config.")
1285+
def test_remote_function_via_session_custom_sa(scalars_dfs):
1286+
# Set these values to run the test locally
1287+
# TODO(shobs): Automate and enable this test
1288+
PROJECT = ""
1289+
GCF_SERVICE_ACCOUNT = ""
1290+
1291+
rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=PROJECT))
1292+
1293+
try:
1294+
1295+
@rf_session.remote_function(
1296+
[int], int, reuse=False, cloud_function_service_account=GCF_SERVICE_ACCOUNT
1297+
)
1298+
def square_num(x):
1299+
if x is None:
1300+
return x
1301+
return x * x
1302+
1303+
scalars_df, scalars_pandas_df = scalars_dfs
1304+
1305+
bf_int64_col = scalars_df["int64_col"]
1306+
bf_result_col = bf_int64_col.apply(square_num)
1307+
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()
1308+
1309+
pd_int64_col = scalars_pandas_df["int64_col"]
1310+
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
1311+
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
1312+
1313+
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1314+
1315+
# Assert that the GCF is created with the intended SA
1316+
gcf = rf_session.cloudfunctionsclient.get_function(
1317+
name=square_num.bigframes_cloud_function
1318+
)
1319+
assert gcf.service_config.service_account_email == GCF_SERVICE_ACCOUNT
1320+
finally:
1321+
# clean up the gcp assets created for the remote function
1322+
cleanup_remote_function_assets(
1323+
rf_session.bqclient, rf_session.cloudfunctionsclient, square_num
1324+
)

0 commit comments

Comments
 (0)