-
Notifications
You must be signed in to change notification settings - Fork 61
feat: Add plumbing PR for client side metrics to support the open telemetry instruments #1569
Changes from all commits
62a4b8b
7c4f414
83f53ae
610eec0
a2b5951
5f67cad
8f20c78
9b1ba9d
88e96c3
ed39628
76b1249
8d60cb1
19fef92
1ecfb1c
d8a3960
1d6b645
acb1d3a
c30b057
9ef079b
d5a0368
ae532d8
b2bced9
e1dd61c
cd97f35
9ec98df
2457dbe
5a1a3aa
1bd2d2b
c2be338
cd0d774
e7caf36
7fd86d2
db05ff3
9cc4b15
0142329
15d6e4a
865529e
28fbfd8
5995789
a62b124
bfc5883
0996d3c
ef8e3fe
c68a76f
47a24b1
b2600f2
d4d3f6c
b8dff1c
d50384f
b5fc1f2
1e5dc82
c2ffbc6
1200b3c
9320149
4023361
ef91733
52b570c
6a6774f
10b6d30
33c17c6
fbf2314
39fe861
8f13100
48e0e95
66c4ab1
e7c5b5f
98be351
efdfcea
4a6a476
edfcf8a
bc4998f
7e6374d
10b72ec
a72b51a
47fd9d0
2a42162
9073f07
32d3983
9716c4a
d2b93ee
99f9577
c82e72d
759e829
76b6f5a
1e840a4
7bf62e9
fca55b7
51afdce
57b1dc1
0f850b7
6c1e01b
2910408
2781561
0b4d93e
1b6681b
b6f1302
1ae82ff
3ee5604
124ed30
ef36a6f
6829224
dd603f1
b493c0d
2f19f31
dfe7d57
02d752a
19d1d81
48ff706
883ea1a
ad9c85b
5a3bac2
4740c62
ee404f1
c997f0f
3719257
a6498f2
e7d631d
382ebef
8ba3347
13382d2
948a3a3
c8bb0a8
78ec2e8
e5ec89f
5403a1b
176ed02
08c1c1b
be7673f
e6d6606
285454f
12a5cc7
54239f3
86890aa
a0fa7e4
f5267c1
887d988
075bf9b
94f422c
1c3c290
b118bc1
8aefe11
a30d3ec
c3f2963
67b6478
8544410
96dbc1c
416e18c
991f5c8
09389ee
87d5592
9ad2ef8
c59fcab
5848588
9c35dfb
07ec90d
bf54c8c
f226b5f
b42b4f4
54ac764
eb8f14b
6ecb1a6
fa0a56e
fcef83d
cd2efac
eba027c
5929a9d
3b48c8e
8edc4ab
8c9d23f
7b49f01
2ec12b6
b4f7705
2a32459
e5caa9e
fc114ff
6fb5944
ca6f05e
4bec216
bd4b0ac
c191614
86be1ea
3b0f081
3ebb9ff
cf32131
78a20d4
105b58b
d4022fd
7ea28d2
7848643
7229174
7f4e167
c86196a
41c9b2d
d76fa14
2f3b4e5
7a4b33e
5a09c33
aa18c1e
791e70d
2e21503
e936aaa
ab07a58
7fe9a46
26b9ca7
29ef6b0
8fd59f4
183bdb7
92059aa
af3aa73
422060e
f93a721
ea230aa
5ecfb70
303c57c
a822961
31fe184
30152d6
9584c69
082c049
f2e46a4
dfcf56e
35da5c6
b5ae964
b575010
822c14e
561889c
f423c57
9076be1
ead3f5e
fa6c3fd
a587d0c
311d555
117473b
e4154af
0ccec5b
9f94b9e
7e76c39
bb64fbe
27f5bcd
d1a292f
23a4d39
55dbd8f
3b4f54c
1566d1f
cab2a95
29c78cc
28c81e0
84fcc86
2022022
d7b37db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,320 @@ | ||
| // Copyright 2025 Google LLC | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // https://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| import {MetricExporter} from '@google-cloud/opentelemetry-cloud-monitoring-exporter'; | ||
| import { | ||
| DataPoint, | ||
| ExponentialHistogram, | ||
| Histogram, | ||
| ResourceMetrics, | ||
| } from '@opentelemetry/sdk-metrics'; | ||
| import {ServiceError} from 'google-gax'; | ||
| import {MetricServiceClient} from '@google-cloud/monitoring'; | ||
| import {google} from '@google-cloud/monitoring/build/protos/protos'; | ||
| import ICreateTimeSeriesRequest = google.monitoring.v3.ICreateTimeSeriesRequest; | ||
|
|
||
| export interface ExportResult { | ||
| code: number; | ||
| } | ||
|
|
||
| /** | ||
| * Type guard function to determine if a given value is a counter value (a number). | ||
| * | ||
| * This function checks if a value, which could be either a `DistributionValue` | ||
| * object or a `number`, is specifically a `number`. This is used to differentiate | ||
| * between counter metrics (which have numeric values) and distribution metrics | ||
| * (which have more complex, object-based values). | ||
| * | ||
| */ | ||
| function isCounterValue( | ||
| dataPoint: | ||
| | DataPoint<number> | ||
| | DataPoint<Histogram> | ||
| | DataPoint<ExponentialHistogram> | ||
| ): dataPoint is DataPoint<number> { | ||
| return typeof dataPoint.value === 'number'; | ||
| } | ||
|
|
||
| function getInterval( | ||
| dataPoint: | ||
| | DataPoint<number> | ||
| | DataPoint<Histogram> | ||
| | DataPoint<ExponentialHistogram> | ||
| ) { | ||
| return { | ||
| endTime: { | ||
| seconds: dataPoint.endTime[0], | ||
| }, | ||
| startTime: { | ||
| seconds: dataPoint.startTime[0], | ||
| }, | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * This function gets the timeseries data points for metrics that are | ||
| * represented as distributions on the backend. These data points are part of a | ||
| * timeseries object that is recorded to Google Cloud Monitoring. | ||
| * | ||
| * @param {DataPoint} dataPoint The datapoint containing the data we wish to | ||
| * send to the Google Cloud Monitoring dashboard | ||
| */ | ||
| function getDistributionPoints( | ||
| dataPoint: DataPoint<Histogram> | DataPoint<ExponentialHistogram> | ||
| ) { | ||
| const value = dataPoint.value; | ||
| return [ | ||
| { | ||
| interval: getInterval(dataPoint), | ||
| value: { | ||
| distributionValue: { | ||
| count: String(value.count), | ||
| mean: value.count && value.sum ? value.sum / value.count : 0, | ||
| bucketOptions: { | ||
| explicitBuckets: { | ||
| bounds: (value as Histogram).buckets.boundaries, | ||
| }, | ||
| }, | ||
| bucketCounts: (value as Histogram).buckets.counts.map(String), | ||
| }, | ||
| }, | ||
| }, | ||
| ]; | ||
| } | ||
|
|
||
| /** | ||
| * This function gets the timeseries data points for metrics that are | ||
| * represented as integers on the backend. These data points are part of a | ||
| * timeseries object that is recorded to Google Cloud Monitoring. | ||
| * | ||
| * @param {DataPoint} dataPoint The datapoint containing the data we wish to | ||
| * send to the Google Cloud Monitoring dashboard | ||
| */ | ||
| function getIntegerPoints(dataPoint: DataPoint<number>) { | ||
| return [ | ||
| { | ||
| interval: getInterval(dataPoint), | ||
| value: { | ||
| int64Value: dataPoint.value, | ||
| }, | ||
| }, | ||
| ]; | ||
| } | ||
|
|
||
| /** | ||
| * getResource gets the resource object which is used for building the timeseries | ||
| * object that will be sent to Google Cloud Monitoring dashboard | ||
| * | ||
| * @param {string} metricName The backend name of the metric that we want to record | ||
| * @param {DataPoint} dataPoint The datapoint containing the data we wish to | ||
| * send to the Google Cloud Monitoring dashboard | ||
| */ | ||
| function getResource( | ||
| projectId: string, | ||
| dataPoint: | ||
| | DataPoint<number> | ||
| | DataPoint<Histogram> | ||
| | DataPoint<ExponentialHistogram> | ||
| ) { | ||
| const resourceLabels = { | ||
| cluster: dataPoint.attributes.cluster, | ||
| instance: dataPoint.attributes.instanceId, | ||
| project_id: projectId, | ||
| table: dataPoint.attributes.table, | ||
| zone: dataPoint.attributes.zone, | ||
| }; | ||
| return { | ||
| type: 'bigtable_client_raw', | ||
| labels: resourceLabels, | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * getMetric gets the metric object which is used for building the timeseries | ||
| * object that will be sent to Google Cloud Monitoring dashboard | ||
| * | ||
| * @param {string} metricName The backend name of the metric that we want to record | ||
| * @param {DataPoint} dataPoint The datapoint containing the data we wish to | ||
| * send to the Google Cloud Monitoring dashboard | ||
| */ | ||
| function getMetric( | ||
| metricName: string, | ||
| dataPoint: | ||
| | DataPoint<number> | ||
| | DataPoint<Histogram> | ||
| | DataPoint<ExponentialHistogram> | ||
| ) { | ||
| const streaming = dataPoint.attributes.streaming; | ||
| const app_profile = dataPoint.attributes.app_profile; | ||
| return { | ||
| type: metricName, | ||
| labels: Object.assign( | ||
| { | ||
| method: dataPoint.attributes.method, | ||
| client_uid: dataPoint.attributes.client_uid, | ||
| status: dataPoint.attributes.status, | ||
| client_name: dataPoint.attributes.client_name, | ||
| }, | ||
| streaming ? {streaming} : null, | ||
| app_profile ? {app_profile} : null | ||
| ), | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * Converts OpenTelemetry metrics data into a format suitable for the Google Cloud | ||
| * Monitoring API's `createTimeSeries` method. | ||
| * | ||
| * This function transforms the structured metrics data, including resource and | ||
| * metric attributes, data points, and aggregation information, into an object | ||
| * that conforms to the expected request format of the Cloud Monitoring API. | ||
| * | ||
| * @param {ResourceMetrics} exportArgs - The OpenTelemetry metrics data to be converted. This | ||
| * object contains resource attributes, scope information, and a list of | ||
| * metrics with their associated data points. | ||
| * | ||
| * @returns An object representing a `CreateTimeSeriesRequest`, ready for sending | ||
| * to the Google Cloud Monitoring API. This object contains the project name | ||
| * and an array of time series data points, formatted for ingestion by | ||
| * Cloud Monitoring. | ||
| * | ||
| * @throws Will throw an error if there are issues converting the data. | ||
| * | ||
| * @remarks | ||
| * The output format is specific to the Cloud Monitoring API and involves | ||
| * mapping OpenTelemetry concepts to Cloud Monitoring's data model, including: | ||
| * - Mapping resource attributes to resource labels. | ||
| * - Mapping metric attributes to metric labels. | ||
| * - Handling different metric types (counter, distribution). | ||
| * - Converting data points to the correct structure, including start and end | ||
| * times, values, and bucket information for distributions. | ||
| * | ||
| * @example | ||
| * const exportInput: ExportInput = { ... }; // Example ExportInput object | ||
| * const monitoringRequest = metricsToRequest(exportInput); | ||
| * // monitoringRequest can now be used in monitoringClient.createTimeSeries(monitoringRequest) | ||
| * | ||
| * | ||
| */ | ||
| export function metricsToRequest(exportArgs: ResourceMetrics) { | ||
| type WithSyncAttributes = {_syncAttributes: {[index: string]: string}}; | ||
| const resourcesWithSyncAttributes = | ||
| exportArgs.resource as unknown as WithSyncAttributes; | ||
| const projectId = | ||
| resourcesWithSyncAttributes._syncAttributes[ | ||
| 'monitored_resource.project_id' | ||
| ]; | ||
| const timeSeriesArray = []; | ||
| for (const scopeMetrics of exportArgs.scopeMetrics) { | ||
| for (const scopeMetric of scopeMetrics.metrics) { | ||
| for (const dataPoint of scopeMetric.dataPoints) { | ||
| const metric = getMetric(scopeMetric.descriptor.name, dataPoint); | ||
| const resource = getResource(projectId, dataPoint); | ||
| if (isCounterValue(dataPoint)) { | ||
| timeSeriesArray.push({ | ||
| metric, | ||
| resource, | ||
| valueType: 'INT64', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure it's always int64? Are there some fields that are doubles?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, this is just for isCounterValue, makes sense! I'd still recommend refactoring this function a bit. Either breaking out counter/distribution into two helper methods, or a single helper if there are enough shared fields to combine them But that's just a style suggestion
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I like breaking this stuff down because to the code reader it makes the variable interelationship clear. In particular, helper functions let us know which variables aren't related which makes it simpler to look at. I added four helpers: So this function is much shorter now. |
||
| points: getIntegerPoints(dataPoint), | ||
| }); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this one need a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The tests that export results to Google Cloud Monitoring succeed at exporting the metrics so I don't think so. If we did have a unit for |
||
| } else { | ||
| timeSeriesArray.push({ | ||
| metric, | ||
| resource, | ||
| metricKind: 'CUMULATIVE', | ||
| valueType: 'DISTRIBUTION', | ||
| points: getDistributionPoints(dataPoint), | ||
| unit: scopeMetric.descriptor.unit || 'ms', // Default to 'ms' if no unit is specified | ||
| }); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return { | ||
| name: `projects/${projectId}`, | ||
| timeSeries: timeSeriesArray, | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * A custom OpenTelemetry `MetricExporter` that sends metrics data to Google Cloud | ||
| * Monitoring. | ||
| * | ||
| * This class extends the base `MetricExporter` from `@google-cloud/opentelemetry-cloud-monitoring-exporter` | ||
| * and handles the process of converting OpenTelemetry metrics data into the | ||
| * format required by the Google Cloud Monitoring API. It uses the | ||
| * `MetricServiceClient` to send the data to Google Cloud Monitoring's | ||
| * `createTimeSeries` method. | ||
| * | ||
| * @remarks | ||
| * This exporter relies on the `metricsToRequest` function to perform the | ||
| * necessary transformation of OpenTelemetry metrics into Cloud Monitoring | ||
| * `TimeSeries` data. | ||
| * | ||
| * The exporter is asynchronous and will not block the calling thread while | ||
| * sending metrics. It manages the Google Cloud Monitoring client and handles | ||
| * potential errors during the export process. | ||
| * | ||
| * The class expects the `ResourceMetrics` to have been correctly configured | ||
| * and populated with the required resource attributes to correctly identify | ||
| * the monitored resource in Cloud Monitoring. | ||
| * | ||
| * @example | ||
| * // Create an instance of the CloudMonitoringExporter | ||
| * const exporter = new CloudMonitoringExporter(); | ||
| * | ||
| * // Use the exporter with a MeterProvider | ||
| * const meterProvider = new MeterProvider({ | ||
| * resource: new Resource({ | ||
| * 'service.name': 'my-service', | ||
| * // ... other resource attributes | ||
| * }), | ||
| * readers: [new PeriodicExportingMetricReader({ | ||
| * exporter: exporter, | ||
| * exportIntervalMillis: 10000 // Export every 10 seconds | ||
| * })] | ||
| * }); | ||
| * | ||
| * // Now start instrumenting your application using the meter | ||
| * const meter = meterProvider.getMeter('my-meter'); | ||
| * // ... create counters, histograms, etc. | ||
| * | ||
| * @beta | ||
| */ | ||
| export class CloudMonitoringExporter extends MetricExporter { | ||
| private monitoringClient = new MetricServiceClient(); | ||
|
|
||
| export( | ||
| metrics: ResourceMetrics, | ||
| resultCallback: (result: ExportResult) => void | ||
| ): void { | ||
| (async () => { | ||
| try { | ||
| const request = metricsToRequest(metrics); | ||
| await this.monitoringClient.createTimeSeries( | ||
| request as ICreateTimeSeriesRequest | ||
| ); | ||
| // The resultCallback typically accepts a value equal to {code: x} | ||
| // for some value x along with other info. When the code is equal to 0 | ||
| // then the operation completed successfully. When the code is not equal | ||
| // to 0 then the operation failed. Open telemetry logs errors to the | ||
| // console when the resultCallback passes in non-zero code values and | ||
| // logs nothing when the code is 0. | ||
| resultCallback({code: 0}); | ||
| } catch (error) { | ||
| resultCallback(error as ServiceError); | ||
| } | ||
| })(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need
StandardData,OnOperationAttribute, andOnOperationCompleteData? The data model is hard to follow, with duplicated fields in different placesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OnOperationAttributeis used in five different places in this file. Most importantly, it is used to define aDistributionMetricand aCounterMetricwhich are interfaces that identify the shape of the data we expect to be passed into the export function of the exporter. ie:OnOperationCompleteDatais the shape of the data when it is passed into the metrics handler before it is transformed and stored in the open telemetry instruments. You'll notice that this data contains attributes and metrics unlike OnOperationAttribute.The
StandardDatainterface was added because of a discussion we had here, but we can remove it again and flatten the hierarchy we want. It's just that this will reintroduce the data duplication you had pointed out before.With that information, what do you think we should do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you misunderstood what I was saying in that thread. By deduplicating the data models, I mean each piece of data should live in one place. Instead of storing the projectId and clientName on each attempt, you can just add a reference to the operation to the attempt, and keep those fields there
My feedback has always been to cut down on the number of subclasses and interfaces wherever possible, because it can get complicated fast
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, if you absolutely need OnOperationAttribute and OnOperationCompleteData, I'd suggest making their names more distinct to reduce confusion
Looking at how they're used now though, can't you combine OnOperationAttribute and OnAttemptAttribute into a shared
DataPointAttributes? Make them both recordstatusinstead ofattemptStatusandfinalOperationStatus, and they will be the same structure, and you can simplify this logicUh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By eliminating
ExportInputI was able to eliminate the need forOnOperationAttributeandOnAttemptAttributealong with a number of interfaces theExportInputwas counting on.ExportInputwas useful when I was trying to write themetricsToRequestfunction, but with some careful casting we don't need it anymore.