diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index d4c34bbd0d..8da073802d 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -47,10 +47,10 @@ class VertexAIModel(base.BaseEstimator): Args: endpoint (str): Vertex AI https endpoint. - input ({column_name: column_type}): - Input schema. Supported types are "bool", "string", "int64", "float64", "array", "array", "array", "array". - output ({column_name: column_type}): - Output label schema. Supported the same types as the input. + input (Mapping): + Input schema: `{column_name: column_type}`. Supported types are "bool", "string", "int64", "float64", "array", "array", "array", "array". + output (Mapping): + Output label schema: `{column_name: column_type}`. Supported the same types as the input. session (bigframes.Session or None): BQ session to create the model. If None, use the global default session. connection_name (str or None): diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 58ac1c0efe..b680a5fc1a 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -108,12 +108,6 @@ - name: PaLM2TextEmbeddingGenerator uid: bigframes.ml.llm.PaLM2TextEmbeddingGenerator name: llm - - items: - - name: Overview - uid: bigframes.ml.remote - - name: VertexAIModel - uid: bigframes.ml.remote.VertexAIModel - name: remote - items: - name: metrics uid: bigframes.ml.metrics @@ -144,6 +138,12 @@ - name: OneHotEncoder uid: bigframes.ml.preprocessing.OneHotEncoder name: preprocessing + - items: + - name: Overview + uid: bigframes.ml.remote + - name: VertexAIModel + uid: bigframes.ml.remote.VertexAIModel + name: remote name: bigframes.ml name: BigQuery DataFrames status: beta diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py index 6be41bf9aa..63c62274fd 100644 --- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py +++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py @@ -47,16 +47,16 @@ def fit(self, X, y): """Build a forest of trees from the training set (X, y). Args: - X: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): Series or DataFrame of shape (n_samples, n_features). Training data. - y: + y (bigframes.dataframe.DataFrame or bigframes.series.Series): Series or DataFrame of shape (n_samples,) or (n_samples, n_targets). Target values. Will be cast to X's dtype if necessary. Returns: - Fitted Estimator. + ForestModel: Fitted Estimator. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -73,12 +73,12 @@ def predict(self, X): mean predicted regression targets of the trees in the forest. Args: - X: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): Series or DataFrame of shape (n_samples, n_features). The data matrix for which we want to get the predictions. Returns: - The predicted values. + bigframes.dataframe.DataFrame: The predicted values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -91,38 +91,38 @@ class RandomForestRegressor(ForestRegressor): to improve the predictive accuracy and control over-fitting. Args: - num_parallel_tree: Optional[int] + num_parallel_tree (Optional[int]): Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2. - tree_method: Optional[str] + tree_method (Optional[str]): Specify which tree method to use. Default to "auto". If this parameter is set to default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx", "hist". - min_child_weight : Optional[float] + min_child_weight (Optional[float]): Minimum sum of instance weight(hessian) needed in a child. Default to 1. - colsample_bytree : Optional[float] + colsample_bytree (Optional[float]): Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1. - colsample_bylevel : Optional[float] + colsample_bylevel (Optional[float]): Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1. - colsample_bynode : Optional[float] + colsample_bynode (Optional[float]): Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1. - gamma : Optional[float] + gamma (Optional[float]): (min_split_loss) Minimum loss reduction required to make a further partition on a leaf node of the tree. Default to 0.0. - max_depth : Optional[int] + max_depth (Optional[int]): Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1. - subsample : Optional[float] + subsample (Optional[float]: Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1. - reg_alpha : Optional[float] + reg_alpha (Optional[float]): L1 regularization term on weights (xgb's alpha). Default to 0.0. - reg_lambda : Optional[float] + reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop: Optional[bool] + early_stop (Optional[bool]): Whether training should stop after the first iteration. Default to True. - min_rel_progress: Optional[float] + min_rel_progress (Optional[float]): Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. - enable_global_explain: Optional[bool] + enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. - xgboost_version: Optional[str] + xgboost_version (Optional[str]): Specifies the Xgboost version for model training. Default to "0.9". Possible values: "0.9", "1.1". """ @@ -144,7 +144,7 @@ def predict(self, X): which we want to get the predictions. Returns: - The predicted values. + bigframes.dataframe.DataFrame: The predicted values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -158,37 +158,37 @@ class RandomForestClassifier(ForestClassifier): improve the predictive accuracy and control over-fitting. Args: - num_parallel_tree: Optional[int] + num_parallel_tree (Optional[int]): Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2. - tree_method: Optional[str] + tree_method (Optional[str]): Specify which tree method to use. Default to "auto". If this parameter is set to default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx", "hist". - min_child_weight : Optional[float] + min_child_weight (Optional[float]): Minimum sum of instance weight(hessian) needed in a child. Default to 1. - colsample_bytree : Optional[float] + colsample_bytree (Optional[float]): Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1. - colsample_bylevel : Optional[float] + colsample_bylevel (Optional[float]): Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1. - colsample_bynode : Optional[float] + colsample_bynode (Optional[float]): Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1. - gamma : Optional[float] + gamma (Optional[float]): (min_split_loss) Minimum loss reduction required to make a further partition on a leaf node of the tree. Default to 0.0. - max_depth : Optional[int] + max_depth (Optional[int]): Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1. - subsample : Optional[float] + subsample (Optional[float]): Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1. - reg_alpha : Optional[float] + reg_alpha (Optional[float]): L1 regularization term on weights (xgb's alpha). Default to 0.0. - reg_lambda : Optional[float] + reg_lambda (Optional[float]): L2 regularization term on weights (xgb's lambda). Default to 1.0. - early_stop: Optional[bool] + early_stop (Optional[bool]): Whether training should stop after the first iteration. Default to True. - min_rel_progress: Optional[float] + min_rel_progress (Optional[float]): Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01. - enable_global_explain: Optional[bool] + enable_global_explain (Optional[bool]): Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False. - xgboost_version: Optional[str] + xgboost_version (Optional[str]): Specifies the Xgboost version for model training. Default to "0.9". Possible values: "0.9", "1.1".ß """