Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c2009ed
docs: link to ML.EVALUATE BQML page for score() methods
ashleyxuu Oct 24, 2023
09ad5e4
feat: label query job with bigframes-api-xx using decorator
ashleyxuu Oct 25, 2023
4f4eb9b
reorganize the commit
ashleyxuu Oct 25, 2023
9ee937c
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Oct 26, 2023
272f0af
test: Log slowest tests durations (#146)
shobsi Oct 26, 2023
0e4c49c
docs: link to ML.EVALUATE BQML page for score() methods (#137)
ashleyxuu Oct 26, 2023
aad2c1a
feat: populate ibis version in user agent (#140)
ashleyxuu Oct 26, 2023
1043d6d
fix: don't override the global logging config (#138)
tswast Oct 26, 2023
1f49ef9
fix: use indexee's session for loc listlike cases (#152)
milkshakeiii Oct 26, 2023
c4c1e6e
feat: add pandas.qcut (#104)
TrevorBergeron Oct 26, 2023
4a27f44
feat: add unstack to series, add level param (#115)
TrevorBergeron Oct 26, 2023
fface57
feat: add `DataFrame.to_pandas_batches()` to download large `DataFram…
tswast Oct 26, 2023
bbc3c69
fix: resolve plotly rendering issue by using ipython html for job pro…
orrbradford Oct 26, 2023
a99d62c
refactor: ArrayValue is now a tree that defers conversion to ibis (#110)
TrevorBergeron Oct 27, 2023
f37d0b0
fix: fix bug with column names under repeated column assignment (#150)
milkshakeiii Oct 27, 2023
aba301c
test: refactor remote function tests (#147)
shobsi Oct 27, 2023
53bb2cd
feat: add dataframe melt (#116)
TrevorBergeron Oct 28, 2023
2bf4bcc
docs: add artithmetic df sample code (#153)
ashleyxuu Oct 30, 2023
343414a
feat: Implement operator `@` for `DataFrame.dot` (#139)
shobsi Oct 30, 2023
4eac10d
fix: fix typo and address comments
ashleyxuu Oct 30, 2023
868d2ad
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Oct 30, 2023
c03a8d9
Merge branch 'main' into ashleyxu-add-api-methods
tswast Nov 2, 2023
39321e4
fix: address comments
ashleyxuu Nov 3, 2023
aebcf11
Remove utils folder and refactor it in core directory
ashleyxuu Nov 3, 2023
72217c2
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 3, 2023
ec526b5
Remove utils folder and refactor it in core directory
ashleyxuu Nov 3, 2023
9edfe31
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 3, 2023
4baa373
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 3, 2023
3a94c23
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Nov 3, 2023
d84c569
fix merge conflicts
ashleyxuu Nov 3, 2023
308c9a7
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 3, 2023
4618107
commit the conflicts
ashleyxuu Nov 13, 2023
a87bcb8
redesign the log adapter
ashleyxuu Nov 14, 2023
cf97f8b
resolve conflicts and merge remote-tracking branch 'origin/main' into…
ashleyxuu Nov 14, 2023
53a99f9
Make the global _api_methods and lock threads
ashleyxuu Nov 14, 2023
3cc3599
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 14, 2023
1c3deb5
Make the global _api_methods and lock threads
ashleyxuu Nov 14, 2023
99f423b
merge conflicts
ashleyxuu Nov 14, 2023
115de27
address comments
ashleyxuu Nov 14, 2023
b0adf27
address comments
ashleyxuu Nov 14, 2023
b4ea9e3
Merge remote-tracking branch 'origin/ashleyxu-add-api-methods' into a…
ashleyxuu Nov 14, 2023
df9c9c0
fix error
ashleyxuu Nov 14, 2023
00bb6de
fix None job_config error
ashleyxuu Nov 14, 2023
36fea06
address comments
ashleyxuu Nov 14, 2023
e872d18
Merge branch 'main' into ashleyxu-add-api-methods
ashleyxuu Nov 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
import bigframes.operations as ops
import bigframes.operations.aggregations as agg_ops
import bigframes.series as series
from bigframes.utils import log_adapter
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated
import third_party.bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby


@log_adapter.class_logger
class DataFrameGroupBy(vendored_pandas_groupby.DataFrameGroupBy):
__doc__ = vendored_pandas_groupby.GroupBy.__doc__

Expand Down Expand Up @@ -408,6 +410,7 @@ def _resolve_label(self, label: blocks.Label) -> str:
return col_ids[0]


@log_adapter.class_logger
class SeriesGroupBy(vendored_pandas_groupby.SeriesGroupBy):
__doc__ = vendored_pandas_groupby.GroupBy.__doc__

Expand Down
2 changes: 2 additions & 0 deletions bigframes/core/window/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
import bigframes.core as core
import bigframes.core.blocks as blocks
import bigframes.operations.aggregations as agg_ops
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.window.rolling as vendored_pandas_rolling


@log_adapter.class_logger
class Window(vendored_pandas_rolling.Window):
__doc__ = vendored_pandas_rolling.Window.__doc__

Expand Down
2 changes: 2 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import bigframes.series
import bigframes.series as bf_series
import bigframes.session._io.bigquery
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.frame as vendored_pandas_frame
import third_party.bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing

Expand All @@ -80,6 +81,7 @@


# Inherits from pandas DataFrame so that we can use the same docstrings.
@log_adapter.class_logger
class DataFrame(vendored_pandas_frame.DataFrame):
__doc__ = vendored_pandas_frame.DataFrame.__doc__

Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
import bigframes.operations as ops
import bigframes.operations.base
import bigframes.series as series
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.indexes.accessor as vendordt


@log_adapter.class_logger
class DatetimeMethods(
bigframes.operations.base.SeriesMethods, vendordt.DatetimeProperties
):
Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import bigframes.operations as ops
import bigframes.operations.base
import bigframes.series as series
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.strings.accessor as vendorstr

# Maps from python to re2
Expand All @@ -32,6 +33,7 @@
}


@log_adapter.class_logger
class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods):
__doc__ = vendorstr.StringMethods.__doc__

Expand Down
2 changes: 2 additions & 0 deletions bigframes/operations/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import bigframes.operations
import bigframes.operations.base
import bigframes.series
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors


Expand All @@ -38,6 +39,7 @@ def _as_ibis(self, x: ibis_types.Value):
return struct_value[name].name(name)


@log_adapter.class_logger
class StructAccessor(
bigframes.operations.base.SeriesMethods, vendoracessors.StructAccessor
):
Expand Down
2 changes: 2 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@
import bigframes.operations.datetimes as dt
import bigframes.operations.strings as strings
import bigframes.operations.structs as structs
from bigframes.utils import log_adapter
import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series

LevelType = typing.Union[str, int]
LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]


@log_adapter.class_logger
class Series(bigframes.operations.base.SeriesMethods, vendored_pandas_series.Series):
def __init__(self, *args, **kwargs):
self._query_job: Optional[bigquery.QueryJob] = None
Expand Down
15 changes: 13 additions & 2 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from bigframes.remote_function import remote_function as bigframes_rf
import bigframes.session._io.bigquery as bigframes_io
import bigframes.session.clients
from bigframes.utils import log_adapter
import bigframes.version

# Even though the ibis.backends.bigquery.registry import is unused, it's needed
Expand Down Expand Up @@ -110,6 +111,7 @@ def _is_query(query_or_table: str) -> bool:
return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None


@log_adapter.class_logger
class Session(
third_party_pandas_gbq.GBQIOMixin,
third_party_pandas_parquet.ParquetIOMixin,
Expand Down Expand Up @@ -1496,10 +1498,19 @@ def _start_query(
"""
Starts query job and waits for results
"""
api_methods = log_adapter._api_methods
if job_config is not None:
query_job = self.bqclient.query(sql, job_config=job_config)
job_config.labels = bigframes_io.create_job_configs_labels(
job_configs_labels=job_config.labels, api_methods=api_methods
)
else:
query_job = self.bqclient.query(sql)
job_config = bigquery.QueryJobConfig()
job_config.labels = bigframes_io.create_job_configs_labels(
job_configs_labels=None, api_methods=api_methods
)
query_job = self.bqclient.query(sql, job_config=job_config)
# Clear out the global api logger
log_adapter._api_methods = []

opts = bigframes.options.display
if opts.progress_bar is not None and not query_job.configuration.dry_run:
Expand Down
32 changes: 31 additions & 1 deletion bigframes/session/_io/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,41 @@
import datetime
import textwrap
import types
from typing import Dict, Iterable, Union
from typing import Dict, Iterable, Optional, Sequence, Union

import google.cloud.bigquery as bigquery

IO_ORDERING_ID = "bqdf_row_nums"
MAX_LABELS_COUNT = 64


def create_job_configs_labels(
job_configs_labels: Optional[Dict[str, str]],
api_methods: Sequence[str],
) -> Dict[str, str]:
# If there is no label set
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated
if job_configs_labels is None:
labels = {}
label_values = list(api_methods)
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated
else:
labels = job_configs_labels.copy()
cur_labels_len = len(job_configs_labels)
api_methods_len = len(api_methods)
# If the total number of labels is under the limit of labels count
if cur_labels_len + api_methods_len <= MAX_LABELS_COUNT:
label_values = list(api_methods)
# We capture the latest label if it is out of the length limit of labels count
else:
added_api_len = cur_labels_len + api_methods_len - MAX_LABELS_COUNT
label_values = list(api_methods)[-added_api_len:]

for i, label_value in enumerate(label_values):
if job_configs_labels is not None:
label_key = "bigframes-api-" + str(i + len(job_configs_labels))
else:
label_key = "bigframes-api-" + str(i)
labels[label_key] = label_value
return labels
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated


def create_export_csv_statement(
Expand Down
17 changes: 17 additions & 0 deletions bigframes/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from bigframes.utils.log_adapter import class_logger, method_logger

__all__ = ["class_logger", "method_logger"]
32 changes: 32 additions & 0 deletions bigframes/utils/log_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import functools
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated

from loguru import logger

_log_file_path = None
_logger = logger
_api_methods = []


def class_logger(decorated_cls):
"""Decorator that adds logging functionality to each method of the class."""
for attr_name, attr_value in decorated_cls.__dict__.items():
if callable(attr_value):
setattr(decorated_cls, attr_name, method_logger(attr_value))
return decorated_cls


def method_logger(method):
"""Decorator that adds logging functionality to a method."""

@functools.wraps(method)
def wrapper(*args, **kwargs):
api_method_name = str(method.__name__)
if not api_method_name.startswith("__"):
add_api_method(api_method_name)
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated

return wrapper


def add_api_method(method: str):
global _api_methods
_api_methods.append(method)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"sqlalchemy >=1.4,<3.0dev",
"ipywidgets >=7.7.1",
"humanize >= 4.6.0",
"logruru >=0.6.0",
]
extras = {
# Optional test dependencies packages. If they're missed, may skip some tests.
Expand Down
115 changes: 111 additions & 4 deletions tests/unit/session/test_io_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,122 @@
import google.cloud.bigquery as bigquery
import pytest

import bigframes.session._io.bigquery
import bigframes.pandas as bpd
import bigframes.session._io.bigquery as io_bq
from bigframes.utils import log_adapter


def test_create_job_configs_labels_is_none():
api_methods = ["df-agg", "series-mode"]
labels = io_bq.create_job_configs_labels(
job_configs_labels=None, api_methods=api_methods
)
expected_dict = {"bigframes-api-0": "df-agg", "bigframes-api-1": "series-mode"}
assert labels is not None
assert labels == expected_dict


def test_create_job_configs_labels_length_limit_not_met():
cur_labels = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
}
api_methods = ["df-agg", "series-mode"]
labels = io_bq.create_job_configs_labels(
job_configs_labels=cur_labels, api_methods=api_methods
)
expected_dict = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
"bigframes-api-2": "df-agg",
"bigframes-api-3": "series-mode",
}
assert labels is not None
assert len(labels) == 4
assert labels == expected_dict


def test_create_job_configs_labels_log_adaptor_under_length_limit():
log_adapter._api_methods = ["df-agg", "series-mode"]
cur_labels = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
}
api_methods = log_adapter._api_methods
labels = io_bq.create_job_configs_labels(
job_configs_labels=cur_labels, api_methods=api_methods
)
expected_dict = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
"bigframes-api-2": "df-agg",
"bigframes-api-3": "series-mode",
}
assert labels is not None
assert len(labels) == 4
assert labels == expected_dict


def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
cur_labels = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
}
log_adapter._api_methods = []
df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
# Test running two methods
df.head()
df.max()
api_methods = log_adapter._api_methods

labels = io_bq.create_job_configs_labels(
job_configs_labels=cur_labels, api_methods=api_methods
)
expected_dict = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
"bigframes-api-2": "head",
Comment thread
ashleyxuu marked this conversation as resolved.
Outdated
"bigframes-api-3": "max",
}
assert labels is not None
assert len(labels) == 4
assert labels == expected_dict


def test_create_job_configs_labels_length_limit_met():
Comment thread
ashleyxuu marked this conversation as resolved.
cur_labels = {
"bigframes-api": "read_pandas",
"source": "bigquery-dataframes-temp",
}
for i in range(61):
key = f"bigframes-api-{i}"
value = f"test{i}"
cur_labels[key] = value
# If cur_labels length is 63, we can only add one label from api_methods
log_adapter._api_methods = []
df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
# Test running two methods
df.head()
df.max()
api_methods = log_adapter._api_methods

labels = io_bq.create_job_configs_labels(
job_configs_labels=cur_labels, api_methods=api_methods
)
assert labels is not None
assert len(labels) == 64
assert "head" not in labels.values()
assert "max" in labels.values()
assert "bigframes-api" in labels.keys()
assert "source" in labels.keys()


def test_create_snapshot_sql_doesnt_timetravel_anonymous_datasets():
table_ref = bigquery.TableReference.from_string(
"my-test-project._e8166e0cdb.anonbb92cd"
)

sql = bigframes.session._io.bigquery.create_snapshot_sql(
sql = io_bq.create_snapshot_sql(
table_ref, datetime.datetime.now(datetime.timezone.utc)
)

Expand All @@ -40,7 +147,7 @@ def test_create_snapshot_sql_doesnt_timetravel_anonymous_datasets():
def test_create_snapshot_sql_doesnt_timetravel_session_datasets():
table_ref = bigquery.TableReference.from_string("my-test-project._session.abcdefg")

sql = bigframes.session._io.bigquery.create_snapshot_sql(
sql = io_bq.create_snapshot_sql(
table_ref, datetime.datetime.now(datetime.timezone.utc)
)

Expand Down Expand Up @@ -101,5 +208,5 @@ def test_create_snapshot_sql_doesnt_timetravel_session_datasets():
),
)
def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str):
sql = bigframes.session._io.bigquery.bq_schema_to_sql(schema)
sql = io_bq.bq_schema_to_sql(schema)
assert sql == expected
1 change: 0 additions & 1 deletion third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# License: BSD 3 clause

from abc import ABC
from typing import List, Optional

from bigframes import constants
from third_party.bigframes_vendored.sklearn.base import BaseEstimator
Expand Down