@@ -23,15 +23,21 @@ class OneHotEncoder(BaseEstimator):
23
23
Given a dataset with two features, we let the encoder find the unique
24
24
values per feature and transform the data to a binary one-hot encoding.
25
25
26
- .. code-block::
27
-
28
- from bigframes.ml.preprocessing import OneHotEncoder
29
- import bigframes.pandas as bpd
30
-
31
- enc = OneHotEncoder()
32
- X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
33
- enc.fit(X)
34
- print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
26
+ >>> from bigframes.ml.preprocessing import OneHotEncoder
27
+ >>> import bigframes.pandas as bpd
28
+ >>> bpd.options.display.progress_bar = None
29
+
30
+ >>> enc = OneHotEncoder()
31
+ >>> X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
32
+ >>> enc.fit(X)
33
+ OneHotEncoder()
34
+
35
+ >>> print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
36
+ onehotencoded_a onehotencoded_b
37
+ 0 [{'index': 1, 'value': 1.0}] [{'index': 1, 'value': 1.0}]
38
+ 1 [{'index': 2, 'value': 1.0}] [{'index': 0, 'value': 1.0}]
39
+ <BLANKLINE>
40
+ [2 rows x 2 columns]
35
41
36
42
Args:
37
43
drop (Optional[Literal["most_frequent"]], default None):
@@ -52,7 +58,7 @@ class OneHotEncoder(BaseEstimator):
52
58
Specifies an upper limit to the number of output features for each input feature
53
59
when considering infrequent categories. If there are infrequent categories,
54
60
max_categories includes the category representing the infrequent categories along with the frequent categories.
55
- Default None, set limit to 1,000,000.
61
+ Default None. Set limit to 1,000,000.
56
62
"""
57
63
58
64
def fit (self , X , y = None ):
0 commit comments