Skip to content

Commit 9d31865

Browse files
authored
feat: Add ml.metrics.pairwise.manhattan_distance (#392)
1 parent 81ece46 commit 9d31865

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

bigframes/ml/metrics/pairwise.py

+16
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,19 @@ def paired_cosine_distances(
3434
paired_cosine_distances.__doc__ = inspect.getdoc(
3535
vendored_metrics_pairwise.paired_cosine_distances
3636
)
37+
38+
39+
def paired_manhattan_distance(
40+
X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series]
41+
) -> bpd.DataFrame:
42+
X, Y = utils.convert_to_dataframe(X, Y)
43+
if len(X.columns) != 1 or len(Y.columns) != 1:
44+
raise ValueError("Inputs X and Y can only contain 1 column.")
45+
46+
base_bqml = core.BaseBqml(session=X._session)
47+
return base_bqml.distance(X, Y, type="MANHATTAN", name="manhattan_distance")
48+
49+
50+
paired_manhattan_distance.__doc__ = inspect.getdoc(
51+
vendored_metrics_pairwise.paired_manhattan_distance
52+
)

tests/system/small/ml/test_metrics_pairwise.py

+14
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,17 @@ def test_paired_cosine_distances():
3333
pd.testing.assert_frame_equal(
3434
result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False
3535
)
36+
37+
38+
def test_paired_manhattan_distance():
39+
x_col = [np.array([4.1, 0.5, 1.0])]
40+
y_col = [np.array([3.0, 0.0, 2.5])]
41+
X = bpd.read_pandas(pd.DataFrame({"X": x_col}))
42+
Y = bpd.read_pandas(pd.DataFrame({"Y": y_col}))
43+
44+
result = metrics.pairwise.paired_manhattan_distance(X, Y)
45+
expected_pd_df = pd.DataFrame({"X": x_col, "Y": y_col, "manhattan_distance": [3.1]})
46+
47+
pd.testing.assert_frame_equal(
48+
result.to_pandas(), expected_pd_df, check_dtype=False, check_index_type=False
49+
)

third_party/bigframes_vendored/sklearn/metrics/pairwise.py

+15
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,18 @@ def paired_cosine_distances(X, Y) -> bpd.DataFrame:
2424
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_distance
2525
"""
2626
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
27+
28+
29+
def paired_manhattan_distance(X, Y) -> bpd.DataFrame:
30+
"""Compute the L1 distances between the vectors in X and Y.
31+
32+
Args:
33+
X (Series or single column DataFrame of array of numeric type):
34+
Input data.
35+
Y (Series or single column DataFrame of array of numeric type):
36+
Input data. X and Y are mapped by indexes, must have the same index.
37+
38+
Returns:
39+
bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and manhattan_distance
40+
"""
41+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)