Skip to content

Commit 9f8f181

Browse files
authored
fix: address technical writers fb (#611)
* fix: address technical writers fb
1 parent 34f9f61 commit 9f8f181

File tree

7 files changed

+28
-21
lines changed

7 files changed

+28
-21
lines changed

bigframes/ml/model_selection.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def train_test_split(
3535
Args:
3636
*arrays (bigframes.dataframe.DataFrame or bigframes.series.Series):
3737
A sequence of BigQuery DataFrames or Series that can be joined on
38-
their indexes
38+
their indexes.
3939
test_size (default None):
4040
The proportion of the dataset to include in the test split. If
4141
None, this will default to the complement of train_size. If both

third_party/bigframes_vendored/pandas/core/indexes/accessor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def dayofweek(self):
3737
"""The day of the week with Monday=0, Sunday=6.
3838
3939
Return the day of the week. It is assumed the week starts on
40-
Monday, which is denoted by 0 and ends on Sunday which is denoted
40+
Monday, which is denoted by 0 and ends on Sunday, which is denoted
4141
by 6.
4242
4343
**Examples:**

third_party/bigframes_vendored/sklearn/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def fit_transform(self, X, y=None):
153153
Target values (None for unsupervised transformations).
154154
155155
Returns:
156-
bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_features_new)
156+
bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_features_new).
157157
Transformed DataFrame.
158158
"""
159159
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

third_party/bigframes_vendored/sklearn/metrics/_classification.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def recall_score(
122122
):
123123
"""Compute the recall.
124124
125-
The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
125+
The recall is the ratio ``tp / (tp + fn)``, where ``tp`` is the number of
126126
true positives and ``fn`` the number of false negatives. The recall is
127127
intuitively the ability of the classifier to find all the positive samples.
128128
@@ -170,7 +170,7 @@ def precision_score(
170170
):
171171
"""Compute the precision.
172172
173-
The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
173+
The precision is the ratio ``tp / (tp + fp)``, where ``tp`` is the number of
174174
true positives and ``fp`` the number of false positives. The precision is
175175
intuitively the ability of the classifier not to label as positive a sample
176176
that is negative.
@@ -244,9 +244,9 @@ def f1_score(
244244
dtype: float64
245245
246246
Args:
247-
y_true: Series or DataFrame of shape (n_samples,)
247+
y_true: Series or DataFrame of shape (n_samples,).
248248
Ground truth (correct) target values.
249-
y_pred: Series or DataFrame of shape (n_samples,)
249+
y_pred: Series or DataFrame of shape (n_samples,).
250250
Estimated targets as returned by a classifier.
251251
average: {'micro', 'macro', 'samples', 'weighted', 'binary'} or None, \
252252
default='binary'

third_party/bigframes_vendored/sklearn/pipeline.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@ class Pipeline(BaseEstimator, metaclass=ABCMeta):
2020
"""Pipeline of transforms with a final estimator.
2121
2222
Sequentially apply a list of transforms and a final estimator.
23-
Intermediate steps of the pipeline must be `transforms`, that is, they
23+
Intermediate steps of the pipeline must be `transforms`. That is, they
2424
must implement `fit` and `transform` methods.
2525
The final estimator only needs to implement `fit`.
2626
2727
The purpose of the pipeline is to assemble several steps that can be
28-
cross-validated together while setting different parameters. This simplifies code, and allows deploying an estimator
29-
and peprocessing together, e.g. with `Pipeline.to_gbq(...).`
28+
cross-validated together while setting different parameters. This
29+
simplifies code and allows for deploying an estimator and peprocessing
30+
together, e.g. with `Pipeline.to_gbq(...).`
3031
"""
3132

3233
def fit(

third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,21 @@ class OneHotEncoder(BaseEstimator):
2323
Given a dataset with two features, we let the encoder find the unique
2424
values per feature and transform the data to a binary one-hot encoding.
2525
26-
.. code-block::
27-
28-
from bigframes.ml.preprocessing import OneHotEncoder
29-
import bigframes.pandas as bpd
30-
31-
enc = OneHotEncoder()
32-
X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
33-
enc.fit(X)
34-
print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
26+
>>> from bigframes.ml.preprocessing import OneHotEncoder
27+
>>> import bigframes.pandas as bpd
28+
>>> bpd.options.display.progress_bar = None
29+
30+
>>> enc = OneHotEncoder()
31+
>>> X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})
32+
>>> enc.fit(X)
33+
OneHotEncoder()
34+
35+
>>> print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]})))
36+
onehotencoded_a onehotencoded_b
37+
0 [{'index': 1, 'value': 1.0}] [{'index': 1, 'value': 1.0}]
38+
1 [{'index': 2, 'value': 1.0}] [{'index': 0, 'value': 1.0}]
39+
<BLANKLINE>
40+
[2 rows x 2 columns]
3541
3642
Args:
3743
drop (Optional[Literal["most_frequent"]], default None):
@@ -52,7 +58,7 @@ class OneHotEncoder(BaseEstimator):
5258
Specifies an upper limit to the number of output features for each input feature
5359
when considering infrequent categories. If there are infrequent categories,
5460
max_categories includes the category representing the infrequent categories along with the frequent categories.
55-
Default None, set limit to 1,000,000.
61+
Default None. Set limit to 1,000,000.
5662
"""
5763

5864
def fit(self, X, y=None):

third_party/bigframes_vendored/sklearn/preprocessing/_label.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class LabelEncoder(BaseEstimator):
2626
Specifies an upper limit to the number of output features for each input feature
2727
when considering infrequent categories. If there are infrequent categories,
2828
max_categories includes the category representing the infrequent categories along with the frequent categories.
29-
Default None, set limit to 1,000,000.
29+
Default None. Set limit to 1,000,000.
3030
"""
3131

3232
def fit(self, y):

0 commit comments

Comments
 (0)