Skip to content

Commit 45421e7

Browse files
author
Jim Fulton
authored
feat: Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types (#673)
* parse parameterized schema info * Fixed SchemaField repr/key * Fix code duplication between _parse_schema_resource and from_api_repr Move new parameterized-type code from _parse_schema_resource to from_api_repr and implement _parse_schema_resource in terms of from_api_repr. * empty schemas are lists now, just like non-empty schemas. * changed new parameterized-type tests to use from_api_repr Because that's more direct and it uncovered duplicate code. * paramaterized the from_api_repr tests and added to_api_repr tests * Test BYTES and _key (repr) too. * Added a round-trip parameterized types schema tests * handle BYTES in _key/repr * blacken * Move _get_int close to use * Updated documentation. * Oops, forgot BIGNUMERIC * Improve argument doc and better argument name to __get_int * doom tables before creating them. * Use max_length in the Python for the REST api maxLength
1 parent 591cdd8 commit 45421e7

File tree

4 files changed

+209
-18
lines changed

4 files changed

+209
-18
lines changed

google/cloud/bigquery/schema.py

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ class SchemaField(object):
6767
6868
policy_tags (Optional[PolicyTagList]): The policy tag list for the field.
6969
70+
precision (Optional[int]):
71+
Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
72+
73+
scale (Optional[int]):
74+
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
75+
76+
max_length (Optional[int]):
77+
Maximim length of fields with STRING or BYTES type.
78+
7079
"""
7180

7281
def __init__(
@@ -77,6 +86,9 @@ def __init__(
7786
description=_DEFAULT_VALUE,
7887
fields=(),
7988
policy_tags=None,
89+
precision=_DEFAULT_VALUE,
90+
scale=_DEFAULT_VALUE,
91+
max_length=_DEFAULT_VALUE,
8092
):
8193
self._properties = {
8294
"name": name,
@@ -86,9 +98,22 @@ def __init__(
8698
self._properties["mode"] = mode.upper()
8799
if description is not _DEFAULT_VALUE:
88100
self._properties["description"] = description
101+
if precision is not _DEFAULT_VALUE:
102+
self._properties["precision"] = precision
103+
if scale is not _DEFAULT_VALUE:
104+
self._properties["scale"] = scale
105+
if max_length is not _DEFAULT_VALUE:
106+
self._properties["maxLength"] = max_length
89107
self._fields = tuple(fields)
90108
self._policy_tags = policy_tags
91109

110+
@staticmethod
111+
def __get_int(api_repr, name):
112+
v = api_repr.get(name, _DEFAULT_VALUE)
113+
if v is not _DEFAULT_VALUE:
114+
v = int(v)
115+
return v
116+
92117
@classmethod
93118
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
94119
"""Return a ``SchemaField`` object deserialized from a dictionary.
@@ -113,6 +138,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
113138
description=description,
114139
name=api_repr["name"],
115140
policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")),
141+
precision=cls.__get_int(api_repr, "precision"),
142+
scale=cls.__get_int(api_repr, "scale"),
143+
max_length=cls.__get_int(api_repr, "maxLength"),
116144
)
117145

118146
@property
@@ -148,6 +176,21 @@ def description(self):
148176
"""Optional[str]: description for the field."""
149177
return self._properties.get("description")
150178

179+
@property
180+
def precision(self):
181+
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
182+
return self._properties.get("precision")
183+
184+
@property
185+
def scale(self):
186+
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
187+
return self._properties.get("scale")
188+
189+
@property
190+
def max_length(self):
191+
"""Optional[int]: Maximum length for the STRING or BYTES field."""
192+
return self._properties.get("maxLength")
193+
151194
@property
152195
def fields(self):
153196
"""Optional[tuple]: Subfields contained in this field.
@@ -191,9 +234,19 @@ def _key(self):
191234
Returns:
192235
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
193236
"""
237+
field_type = self.field_type.upper()
238+
if field_type == "STRING" or field_type == "BYTES":
239+
if self.max_length is not None:
240+
field_type = f"{field_type}({self.max_length})"
241+
elif field_type.endswith("NUMERIC"):
242+
if self.precision is not None:
243+
if self.scale is not None:
244+
field_type = f"{field_type}({self.precision}, {self.scale})"
245+
else:
246+
field_type = f"{field_type}({self.precision})"
194247
return (
195248
self.name,
196-
self.field_type.upper(),
249+
field_type,
197250
# Mode is always str, if not given it defaults to a str value
198251
self.mode.upper(), # pytype: disable=attribute-error
199252
self.description,
@@ -269,21 +322,7 @@ def _parse_schema_resource(info):
269322
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
270323
A list of parsed fields, or ``None`` if no "fields" key found.
271324
"""
272-
if "fields" not in info:
273-
return ()
274-
275-
schema = []
276-
for r_field in info["fields"]:
277-
name = r_field["name"]
278-
field_type = r_field["type"]
279-
mode = r_field.get("mode", "NULLABLE")
280-
description = r_field.get("description")
281-
sub_fields = _parse_schema_resource(r_field)
282-
policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags"))
283-
schema.append(
284-
SchemaField(name, field_type, mode, description, sub_fields, policy_tags)
285-
)
286-
return schema
325+
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
287326

288327

289328
def _build_schema_resource(fields):

tests/system/test_client.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,6 +2173,35 @@ def test_list_rows_page_size(self):
21732173
page = next(pages)
21742174
self.assertEqual(page.num_items, num_last_page)
21752175

2176+
def test_parameterized_types_round_trip(self):
2177+
client = Config.CLIENT
2178+
table_id = f"{Config.DATASET}.test_parameterized_types_round_trip"
2179+
fields = (
2180+
("n", "NUMERIC"),
2181+
("n9", "NUMERIC(9)"),
2182+
("n92", "NUMERIC(9, 2)"),
2183+
("bn", "BIGNUMERIC"),
2184+
("bn9", "BIGNUMERIC(38)"),
2185+
("bn92", "BIGNUMERIC(38, 22)"),
2186+
("s", "STRING"),
2187+
("s9", "STRING(9)"),
2188+
("b", "BYTES"),
2189+
("b9", "BYTES(9)"),
2190+
)
2191+
self.to_delete.insert(0, Table(f"{client.project}.{table_id}"))
2192+
client.query(
2193+
"create table {} ({})".format(
2194+
table_id, ", ".join(" ".join(f) for f in fields)
2195+
)
2196+
).result()
2197+
table = client.get_table(table_id)
2198+
table_id2 = table_id + "2"
2199+
self.to_delete.insert(0, Table(f"{client.project}.{table_id2}"))
2200+
client.create_table(Table(f"{client.project}.{table_id2}", table.schema))
2201+
table2 = client.get_table(table_id2)
2202+
2203+
self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields)
2204+
21762205
def temp_dataset(self, dataset_id, location=None):
21772206
project = Config.CLIENT.project
21782207
dataset_ref = bigquery.DatasetReference(project, dataset_id)

tests/unit/test_query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,7 @@ def _verifySchema(self, query, resource):
13021302
self.assertEqual(found.description, expected.get("description"))
13031303
self.assertEqual(found.fields, expected.get("fields", ()))
13041304
else:
1305-
self.assertEqual(query.schema, ())
1305+
self.assertEqual(query.schema, [])
13061306

13071307
def test_ctor_defaults(self):
13081308
query = self._make_one(self._make_resource())
@@ -1312,7 +1312,7 @@ def test_ctor_defaults(self):
13121312
self.assertIsNone(query.page_token)
13131313
self.assertEqual(query.project, self.PROJECT)
13141314
self.assertEqual(query.rows, [])
1315-
self.assertEqual(query.schema, ())
1315+
self.assertEqual(query.schema, [])
13161316
self.assertIsNone(query.total_rows)
13171317
self.assertIsNone(query.total_bytes_processed)
13181318

tests/unit/test_schema.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import unittest
1616

1717
import mock
18+
import pytest
1819

1920

2021
class TestSchemaField(unittest.TestCase):
@@ -715,3 +716,125 @@ def test___hash__not_equals(self):
715716
set_one = {policy1}
716717
set_two = {policy2}
717718
self.assertNotEqual(set_one, set_two)
719+
720+
721+
@pytest.mark.parametrize(
722+
"api,expect,key2",
723+
[
724+
(
725+
dict(name="n", type="NUMERIC"),
726+
("n", "NUMERIC", None, None, None),
727+
("n", "NUMERIC"),
728+
),
729+
(
730+
dict(name="n", type="NUMERIC", precision=9),
731+
("n", "NUMERIC", 9, None, None),
732+
("n", "NUMERIC(9)"),
733+
),
734+
(
735+
dict(name="n", type="NUMERIC", precision=9, scale=2),
736+
("n", "NUMERIC", 9, 2, None),
737+
("n", "NUMERIC(9, 2)"),
738+
),
739+
(
740+
dict(name="n", type="BIGNUMERIC"),
741+
("n", "BIGNUMERIC", None, None, None),
742+
("n", "BIGNUMERIC"),
743+
),
744+
(
745+
dict(name="n", type="BIGNUMERIC", precision=40),
746+
("n", "BIGNUMERIC", 40, None, None),
747+
("n", "BIGNUMERIC(40)"),
748+
),
749+
(
750+
dict(name="n", type="BIGNUMERIC", precision=40, scale=2),
751+
("n", "BIGNUMERIC", 40, 2, None),
752+
("n", "BIGNUMERIC(40, 2)"),
753+
),
754+
(
755+
dict(name="n", type="STRING"),
756+
("n", "STRING", None, None, None),
757+
("n", "STRING"),
758+
),
759+
(
760+
dict(name="n", type="STRING", maxLength=9),
761+
("n", "STRING", None, None, 9),
762+
("n", "STRING(9)"),
763+
),
764+
(
765+
dict(name="n", type="BYTES"),
766+
("n", "BYTES", None, None, None),
767+
("n", "BYTES"),
768+
),
769+
(
770+
dict(name="n", type="BYTES", maxLength=9),
771+
("n", "BYTES", None, None, 9),
772+
("n", "BYTES(9)"),
773+
),
774+
],
775+
)
776+
def test_from_api_repr_parameterized(api, expect, key2):
777+
from google.cloud.bigquery.schema import SchemaField
778+
779+
field = SchemaField.from_api_repr(api)
780+
781+
assert (
782+
field.name,
783+
field.field_type,
784+
field.precision,
785+
field.scale,
786+
field.max_length,
787+
) == expect
788+
789+
assert field._key()[:2] == key2
790+
791+
792+
@pytest.mark.parametrize(
793+
"field,api",
794+
[
795+
(
796+
dict(name="n", field_type="NUMERIC"),
797+
dict(name="n", type="NUMERIC", mode="NULLABLE"),
798+
),
799+
(
800+
dict(name="n", field_type="NUMERIC", precision=9),
801+
dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9),
802+
),
803+
(
804+
dict(name="n", field_type="NUMERIC", precision=9, scale=2),
805+
dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2),
806+
),
807+
(
808+
dict(name="n", field_type="BIGNUMERIC"),
809+
dict(name="n", type="BIGNUMERIC", mode="NULLABLE"),
810+
),
811+
(
812+
dict(name="n", field_type="BIGNUMERIC", precision=40),
813+
dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40),
814+
),
815+
(
816+
dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2),
817+
dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2),
818+
),
819+
(
820+
dict(name="n", field_type="STRING"),
821+
dict(name="n", type="STRING", mode="NULLABLE"),
822+
),
823+
(
824+
dict(name="n", field_type="STRING", max_length=9),
825+
dict(name="n", type="STRING", mode="NULLABLE", maxLength=9),
826+
),
827+
(
828+
dict(name="n", field_type="BYTES"),
829+
dict(name="n", type="BYTES", mode="NULLABLE"),
830+
),
831+
(
832+
dict(name="n", field_type="BYTES", max_length=9),
833+
dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9),
834+
),
835+
],
836+
)
837+
def test_to_api_repr_parameterized(field, api):
838+
from google.cloud.bigquery.schema import SchemaField
839+
840+
assert SchemaField(**field).to_api_repr() == api

0 commit comments

Comments
 (0)