Skip to content
This repository was archived by the owner on Nov 14, 2023. It is now read-only.

Commit 9c8ce64

Browse files
yoshi-automationBenjamin E. Coe
authored andcommitted
feat: introduces configuration, e.g., dimensions, for doc prediction
1 parent 01aa9d6 commit 9c8ce64

9 files changed

Lines changed: 589 additions & 93 deletions

File tree

protos/google/cloud/automl/v1beta1/data_items.proto

Lines changed: 122 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ syntax = "proto3";
1717

1818
package google.cloud.automl.v1beta1;
1919

20+
import "google/api/annotations.proto";
21+
import "google/cloud/automl/v1beta1/geometry.proto";
2022
import "google/cloud/automl/v1beta1/io.proto";
23+
import "google/cloud/automl/v1beta1/text_segment.proto";
2124
import "google/protobuf/any.proto";
2225
import "google/protobuf/duration.proto";
2326
import "google/protobuf/struct.proto";
24-
import "google/api/annotations.proto";
2527

2628
option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
2729
option java_multiple_files = true;
@@ -56,19 +58,135 @@ message TextSnippet {
5658
// characters long.
5759
string content = 1;
5860

59-
// Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed
60-
// values are "text/html" and "text/plain". If left blank, the format is
61-
// automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content].
61+
// Optional. The format of
62+
// [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the
63+
// only two allowed values are "text/html" and "text/plain". If left blank,
64+
// the format is automatically determined from the type of the uploaded
65+
// [content][google.cloud.automl.v1beta1.TextSnippet.content].
6266
string mime_type = 2;
6367

6468
// Output only. HTTP URI where you can download the content.
6569
string content_uri = 4;
6670
}
6771

72+
// Message that describes dimension of a document.
73+
message DocumentDimensions {
74+
// Unit of the document dimension.
75+
enum DocumentDimensionUnit {
76+
// Should not be used.
77+
DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0;
78+
79+
// Document dimension is measured in inches.
80+
INCH = 1;
81+
82+
// Document dimension is measured in centimeters.
83+
CENTIMETER = 2;
84+
85+
// Document dimension is measured in points. 72 points = 1 inch.
86+
POINT = 3;
87+
}
88+
89+
// Unit of the dimension.
90+
DocumentDimensionUnit unit = 1;
91+
92+
// Width value of the document, works together with the unit.
93+
float width = 2;
94+
95+
// Height value of the document, works together with the unit.
96+
float height = 3;
97+
}
98+
6899
// A structured text document e.g. a PDF.
69100
message Document {
101+
// Describes the layout information of a
102+
// [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in
103+
// the document.
104+
message Layout {
105+
// The type of TextSegment in the context of the original document.
106+
enum TextSegmentType {
107+
// Should not be used.
108+
TEXT_SEGMENT_TYPE_UNSPECIFIED = 0;
109+
110+
// The text segment is a token. e.g. word.
111+
TOKEN = 1;
112+
113+
// The text segment is a paragraph.
114+
PARAGRAPH = 2;
115+
116+
// The text segment is a form field.
117+
FORM_FIELD = 3;
118+
119+
// The text segment is the name part of a form field. It will be treated
120+
// as child of another FORM_FIELD TextSegment if its span is subspan of
121+
// another TextSegment with type FORM_FIELD.
122+
FORM_FIELD_NAME = 4;
123+
124+
// The text segment is the text content part of a form field. It will be
125+
// treated as child of another FORM_FIELD TextSegment if its span is
126+
// subspan of another TextSegment with type FORM_FIELD.
127+
FORM_FIELD_CONTENTS = 5;
128+
129+
// The text segment is a whole table, including headers, and all rows.
130+
TABLE = 6;
131+
132+
// The text segment is a table's headers. It will be treated as child of
133+
// another TABLE TextSegment if its span is subspan of another TextSegment
134+
// with type TABLE.
135+
TABLE_HEADER = 7;
136+
137+
// The text segment is a row in table. It will be treated as child of
138+
// another TABLE TextSegment if its span is subspan of another TextSegment
139+
// with type TABLE.
140+
TABLE_ROW = 8;
141+
142+
// The text segment is a cell in table. It will be treated as child of
143+
// another TABLE_ROW TextSegment if its span is subspan of another
144+
// TextSegment with type TABLE_ROW.
145+
TABLE_CELL = 9;
146+
}
147+
148+
// Text Segment that represents a segment in
149+
// [document_text][google.cloud.automl.v1beta1.Document.document_text].
150+
TextSegment text_segment = 1;
151+
152+
// Page number of the
153+
// [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
154+
// in the original document, starts from 1.
155+
int32 page_number = 2;
156+
157+
// The position of the
158+
// [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
159+
// in the page. Contains exactly 4
160+
//
161+
// [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices]
162+
// and they are connected by edges in the order provided, which will
163+
// represent a rectangle parallel to the frame. The
164+
// [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are
165+
// relative to the page.
166+
// Coordinates are based on top-left as point (0,0).
167+
BoundingPoly bounding_poly = 3;
168+
169+
// The type of the
170+
// [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
171+
// in document.
172+
TextSegmentType text_segment_type = 4;
173+
}
174+
70175
// An input config specifying the content of the document.
71176
DocumentInputConfig input_config = 1;
177+
178+
// The plain text version of this document.
179+
TextSnippet document_text = 2;
180+
181+
// Describes the layout of the document.
182+
// Sorted by [page_number][].
183+
repeated Layout layout = 3;
184+
185+
// The dimensions of the page in the document.
186+
DocumentDimensions document_dimensions = 4;
187+
188+
// Number of pages in the document.
189+
int32 page_count = 5;
72190
}
73191

74192
// A representation of a row in a relational table.

protos/google/cloud/automl/v1beta1/io.proto

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -140,20 +140,25 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
140140
// CSV file(s) with each line in format:
141141
// ML_USE,GCS_FILE_PATH
142142
// GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
143-
// either imports text in-line or as documents.
143+
// either imports text in-line or as documents. Any given
144+
// .JSONL file must be 100MB or smaller.
144145
// The in-line .JSONL file contains, per line, a proto that wraps a
145146
// TextSnippet proto (in json representation) followed by one or more
146147
// AnnotationPayload protos (called annotations), which have
147148
// display_name and text_extraction detail populated. The given text
148149
// is expected to be annotated exhaustively, for example, if you look
149150
// for animals and text contains "dolphin" that is not labeled, then
150151
// "dolphin" is assumed to not be an animal. Any given text snippet
151-
// content must have 30,000 characters or less, and also be UTF-8 NFC
152-
// encoded (ASCII already is). The document .JSONL file contains, per line, a proto that wraps a
153-
// Document proto with input_config set. Only PDF documents are
154-
// supported now, and each document may be up to 2MB large. Currently
155-
// annotations on documents cannot be specified at import. Any given
156-
// .JSONL file must be 100MB or smaller.
152+
// content must be 10KB or smaller, and also be UTF-8 NFC encoded
153+
// (ASCII already is).
154+
// The document .JSONL file contains, per line, a proto that wraps a
155+
// Document proto. The Document proto must have either document_text
156+
// or input_config set. In document_text case, the Document proto may
157+
// also contain the spatial information of the document, including
158+
// layout, document dimension and page number. In input_config case,
159+
// only PDF documents are supported now, and each document may be up
160+
// to 2MB large. Currently, annotations on documents cannot be
161+
// specified at import.
157162
// Three sample CSV rows:
158163
// TRAIN,gs://folder/file1.jsonl
159164
// VALIDATE,gs://folder/file2.jsonl
@@ -162,27 +167,61 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
162167
// with artificial line breaks, but the only actual line break is
163168
// denoted by \n).:
164169
// {
165-
// "text_snippet": {
166-
// "content": "dog car cat"
167-
// } "annotations": [
168-
// {
169-
// "display_name": "animal",
170-
// "text_extraction": {
171-
// "text_segment": {"start_offset": 0, "end_offset": 3}
170+
// "document": {
171+
// "document_text": {"content": "dog cat"}
172+
// "layout": [
173+
// {
174+
// "text_segment": {
175+
// "start_offset": 0,
176+
// "end_offset": 3,
177+
// },
178+
// "page_number": 1,
179+
// "bounding_poly": {
180+
// "normalized_vertices": [
181+
// {"x": 0.1, "y": 0.1},
182+
// {"x": 0.1, "y": 0.3},
183+
// {"x": 0.3, "y": 0.3},
184+
// {"x": 0.3, "y": 0.1},
185+
// ],
186+
// },
187+
// "text_segment_type": TOKEN,
188+
// },
189+
// {
190+
// "text_segment": {
191+
// "start_offset": 4,
192+
// "end_offset": 7,
193+
// },
194+
// "page_number": 1,
195+
// "bounding_poly": {
196+
// "normalized_vertices": [
197+
// {"x": 0.4, "y": 0.1},
198+
// {"x": 0.4, "y": 0.3},
199+
// {"x": 0.8, "y": 0.3},
200+
// {"x": 0.8, "y": 0.1},
201+
// ],
202+
// },
203+
// "text_segment_type": TOKEN,
172204
// }
173-
// },
205+
//
206+
// ],
207+
// "document_dimensions": {
208+
// "width": 8.27,
209+
// "height": 11.69,
210+
// "unit": INCH,
211+
// }
212+
// "page_count": 1,
213+
// },
214+
// "annotations": [
174215
// {
175-
// "display_name": "vehicle",
176-
// "text_extraction": {
177-
// "text_segment": {"start_offset": 4, "end_offset": 7}
178-
// }
216+
// "display_name": "animal",
217+
// "text_extraction": {"text_segment": {"start_offset": 0,
218+
// "end_offset": 3}}
179219
// },
180220
// {
181221
// "display_name": "animal",
182-
// "text_extraction": {
183-
// "text_segment": {"start_offset": 8, "end_offset": 11}
184-
// }
185-
// },
222+
// "text_extraction": {"text_segment": {"start_offset": 4,
223+
// "end_offset": 7}}
224+
// }
186225
// ],
187226
// }\n
188227
// {

protos/google/cloud/automl/v1beta1/prediction_service.proto

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ syntax = "proto3";
1818
package google.cloud.automl.v1beta1;
1919

2020
import "google/api/annotations.proto";
21+
import "google/api/client.proto";
2122
import "google/cloud/automl/v1beta1/annotation_payload.proto";
2223
import "google/cloud/automl/v1beta1/data_items.proto";
2324
import "google/cloud/automl/v1beta1/io.proto";
2425
import "google/cloud/automl/v1beta1/operations.proto";
2526
import "google/longrunning/operations.proto";
26-
import "google/api/client.proto";
2727

2828
option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
2929
option java_multiple_files = true;
@@ -38,7 +38,8 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
3838
// snake_case or kebab-case, either of those cases is accepted.
3939
service PredictionService {
4040
option (google.api.default_host) = "automl.googleapis.com";
41-
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
41+
option (google.api.oauth_scopes) =
42+
"https://www.googleapis.com/auth/cloud-platform";
4243

4344
// Perform an online prediction. The prediction result will be directly
4445
// returned in the response.
@@ -66,12 +67,14 @@ service PredictionService {
6667
};
6768
}
6869

69-
// Perform a batch prediction. Unlike the online [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
70+
// Perform a batch prediction. Unlike the online
71+
// [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
7072
// prediction result won't be immediately available in the response. Instead,
7173
// a long running operation object is returned. User can poll the operation
7274
// result via [GetOperation][google.longrunning.Operations.GetOperation]
73-
// method. Once the operation is done, [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is returned in
74-
// the [response][google.longrunning.Operation.response] field.
75+
// method. Once the operation is done,
76+
// [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is
77+
// returned in the [response][google.longrunning.Operation.response] field.
7578
// Available for following ML problems:
7679
// * Image Classification
7780
// * Image Object Detection
@@ -86,7 +89,8 @@ service PredictionService {
8689
}
8790
}
8891

89-
// Request message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
92+
// Request message for
93+
// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
9094
message PredictRequest {
9195
// Name of the model requested to serve the prediction.
9296
string name = 1;
@@ -122,12 +126,20 @@ message PredictRequest {
122126
map<string, string> params = 3;
123127
}
124128

125-
// Response message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
129+
// Response message for
130+
// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
126131
message PredictResponse {
127132
// Prediction result.
128133
// Translation and Text Sentiment will return precisely one payload.
129134
repeated AnnotationPayload payload = 1;
130135

136+
// The preprocessed example that AutoML actually makes prediction on.
137+
// Empty if AutoML does not preprocess the input example.
138+
// * For Text Extraction:
139+
// If the input is a .pdf file, the OCR'ed text will be provided in
140+
// [document_text][google.cloud.automl.v1beta1.Document.document_text].
141+
ExamplePayload preprocessed_input = 3;
142+
131143
// Additional domain-specific prediction response metadata.
132144
//
133145
// * For Image Object Detection:
@@ -146,7 +158,8 @@ message PredictResponse {
146158
map<string, string> metadata = 2;
147159
}
148160

149-
// Request message for [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
161+
// Request message for
162+
// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
150163
message BatchPredictRequest {
151164
// Name of the model requested to serve the batch prediction.
152165
string name = 1;
@@ -226,7 +239,8 @@ message BatchPredictRequest {
226239

227240
// Result of the Batch Predict. This message is returned in
228241
// [response][google.longrunning.Operation.response] of the operation returned
229-
// by the [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
242+
// by the
243+
// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
230244
message BatchPredictResult {
231245
// Additional domain-specific prediction response metadata.
232246
//

0 commit comments

Comments
 (0)