Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
more tests, prettier sql
  • Loading branch information
TrevorBergeron committed Oct 28, 2023
commit 7b382804bd02a2fc48b044d124776931e4838dcb
10 changes: 5 additions & 5 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,11 +1277,11 @@ def to_sql(
)
if sorted:
sql = textwrap.dedent(
f"""
SELECT * EXCEPT (`{offsets_id}`)
FROM ({sql})
ORDER BY `{offsets_id}`
"""
f"SELECT * EXCEPT (`{offsets_id}`)\n"
"FROM (\n"
f"{sql}\n"
")\n"
f"ORDER BY `{offsets_id}`\n"
)
return typing.cast(str, sql)

Expand Down
4 changes: 2 additions & 2 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ def to_pandas(
take longer to execute and require more computation. If set to a value other than
None, this will supersede the global config.
ordered (bool, default True):
Determines whether the resulting pandas series will be ordered. In some cases,
unordered may result in a faster-executing query.
Determines whether the resulting pandas series will be deterministically ordered.
In some cases, unordered may result in a faster-executing query.


Returns:
Expand Down
2 changes: 1 addition & 1 deletion tests/system/large/ml/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_cluster_configure_fit_score_predict(
index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
)
expected.index.name = "observation"
assert_pandas_df_equal(result, expected)
assert_pandas_df_equal(result, expected, ignore_order=True)

# save, load, check n_clusters to ensure configuration was kept
reloaded_model = model.to_gbq(
Expand Down
35 changes: 26 additions & 9 deletions tests/system/small/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pytest

import bigframes.pandas as bpd
from tests.system.utils import assert_pandas_df_equal


@pytest.mark.parametrize(
Expand Down Expand Up @@ -88,16 +89,23 @@ def test_dataframe_groupby_aggregate(
pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)


def test_dataframe_groupby_agg_string(scalars_df_index, scalars_pandas_df_index):
@pytest.mark.parametrize(
("ordered"),
[
(True),
(False),
],
)
def test_dataframe_groupby_agg_string(
scalars_df_index, scalars_pandas_df_index, ordered
):
col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
bf_result = scalars_df_index[col_names].groupby("string_col").agg("count")
pd_result = scalars_pandas_df_index[col_names].groupby("string_col").agg("count")
bf_result_computed = bf_result.to_pandas()
bf_result_computed = bf_result.to_pandas(ordered=ordered)

pd.testing.assert_frame_equal(
pd_result,
bf_result_computed,
check_dtype=False,
assert_pandas_df_equal(
pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered
)


Expand Down Expand Up @@ -270,13 +278,22 @@ def test_dataframe_groupby_kurt(scalars_df_index, scalars_pandas_df_index):
pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)


def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index):
@pytest.mark.parametrize(
("ordered"),
[
(True),
(False),
],
)
def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index, ordered):
col_names = ["float64_col", "int64_col", "string_col"]
bf_result = scalars_df_index[col_names].groupby("string_col").diff(-1)
pd_result = scalars_pandas_df_index[col_names].groupby("string_col").diff(-1)
bf_result_computed = bf_result.to_pandas()
bf_result_computed = bf_result.to_pandas(ordered=ordered)

pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
assert_pandas_df_equal(
pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered
)


def test_dataframe_groupby_getitem(
Expand Down