84
84
import bigframes .core .ordering as order
85
85
import bigframes .core .traversal as traversals
86
86
import bigframes .core .utils as utils
87
- import bigframes .dataframe as dataframe
88
87
import bigframes .dtypes
89
88
import bigframes .formatting_helpers as formatting_helpers
90
89
from bigframes .functions .remote_function import read_gbq_function as bigframes_rgf
93
92
import bigframes .session .clients
94
93
import bigframes .version
95
94
95
+ # Avoid circular imports.
96
+ if typing .TYPE_CHECKING :
97
+ import bigframes .dataframe as dataframe
98
+
96
99
_BIGFRAMES_DEFAULT_CONNECTION_ID = "bigframes-default-connection"
97
100
98
101
_MAX_CLUSTER_COLUMNS = 4
@@ -557,6 +560,8 @@ def _read_gbq_query(
557
560
api_name : str = "read_gbq_query" ,
558
561
use_cache : Optional [bool ] = None ,
559
562
) -> dataframe .DataFrame :
563
+ import bigframes .dataframe as dataframe
564
+
560
565
configuration = _transform_read_gbq_configuration (configuration )
561
566
562
567
if "query" not in configuration :
@@ -754,6 +759,8 @@ def _read_gbq_table(
754
759
api_name : str ,
755
760
use_cache : bool = True ,
756
761
) -> dataframe .DataFrame :
762
+ import bigframes .dataframe as dataframe
763
+
757
764
if max_results and max_results <= 0 :
758
765
raise ValueError ("`max_results` should be a positive number." )
759
766
@@ -989,6 +996,8 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
989
996
def _read_pandas (
990
997
self , pandas_dataframe : pandas .DataFrame , api_name : str
991
998
) -> dataframe .DataFrame :
999
+ import bigframes .dataframe as dataframe
1000
+
992
1001
if isinstance (pandas_dataframe , dataframe .DataFrame ):
993
1002
raise ValueError (
994
1003
"read_pandas() expects a pandas.DataFrame, but got a "
@@ -1003,6 +1012,8 @@ def _read_pandas(
1003
1012
def _read_pandas_inline (
1004
1013
self , pandas_dataframe : pandas .DataFrame
1005
1014
) -> Optional [dataframe .DataFrame ]:
1015
+ import bigframes .dataframe as dataframe
1016
+
1006
1017
if pandas_dataframe .size > MAX_INLINE_DF_SIZE :
1007
1018
return None
1008
1019
@@ -1024,11 +1035,20 @@ def _read_pandas_inline(
1024
1035
def _read_pandas_load_job (
1025
1036
self , pandas_dataframe : pandas .DataFrame , api_name : str
1026
1037
) -> dataframe .DataFrame :
1038
+ import bigframes .dataframe as dataframe
1039
+
1040
+ col_index = pandas_dataframe .columns .copy ()
1027
1041
col_labels , idx_labels = (
1028
- pandas_dataframe . columns .to_list (),
1042
+ col_index .to_list (),
1029
1043
pandas_dataframe .index .names ,
1030
1044
)
1031
- new_col_ids , new_idx_ids = utils .get_standardized_ids (col_labels , idx_labels )
1045
+ new_col_ids , new_idx_ids = utils .get_standardized_ids (
1046
+ col_labels ,
1047
+ idx_labels ,
1048
+ # Loading parquet files into BigQuery with special column names
1049
+ # is only supported under an allowlist.
1050
+ strict = True ,
1051
+ )
1032
1052
1033
1053
# Add order column to pandas DataFrame to preserve order in BigQuery
1034
1054
ordering_col = "rowid"
@@ -1047,7 +1067,7 @@ def _read_pandas_load_job(
1047
1067
1048
1068
# Specify the datetime dtypes, which is auto-detected as timestamp types.
1049
1069
schema : list [bigquery .SchemaField ] = []
1050
- for column , dtype in zip (pandas_dataframe . columns , pandas_dataframe .dtypes ):
1070
+ for column , dtype in zip (new_col_ids , pandas_dataframe .dtypes ):
1051
1071
if dtype == "timestamp[us][pyarrow]" :
1052
1072
schema .append (
1053
1073
bigquery .SchemaField (column , bigquery .enums .SqlTypeNames .DATETIME )
@@ -1101,7 +1121,7 @@ def _read_pandas_load_job(
1101
1121
block = blocks .Block (
1102
1122
array_value ,
1103
1123
index_columns = new_idx_ids ,
1104
- column_labels = col_labels ,
1124
+ column_labels = col_index ,
1105
1125
index_labels = idx_labels ,
1106
1126
)
1107
1127
return dataframe .DataFrame (block )
0 commit comments