@@ -17,11 +17,13 @@ syntax = "proto3";
1717
1818package google.cloud.automl.v1beta1 ;
1919
20+ import "google/api/annotations.proto" ;
21+ import "google/cloud/automl/v1beta1/geometry.proto" ;
2022import "google/cloud/automl/v1beta1/io.proto" ;
23+ import "google/cloud/automl/v1beta1/text_segment.proto" ;
2124import "google/protobuf/any.proto" ;
2225import "google/protobuf/duration.proto" ;
2326import "google/protobuf/struct.proto" ;
24- import "google/api/annotations.proto" ;
2527
2628option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl" ;
2729option java_multiple_files = true ;
@@ -56,19 +58,135 @@ message TextSnippet {
5658 // characters long.
5759 string content = 1 ;
5860
59- // Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed
60- // values are "text/html" and "text/plain". If left blank, the format is
61- // automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content].
61+ // Optional. The format of
62+ // [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the
63+ // only two allowed values are "text/html" and "text/plain". If left blank,
64+ // the format is automatically determined from the type of the uploaded
65+ // [content][google.cloud.automl.v1beta1.TextSnippet.content].
6266 string mime_type = 2 ;
6367
6468 // Output only. HTTP URI where you can download the content.
6569 string content_uri = 4 ;
6670}
6771
72+ // Message that describes dimension of a document.
73+ message DocumentDimensions {
74+ // Unit of the document dimension.
75+ enum DocumentDimensionUnit {
76+ // Should not be used.
77+ DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0 ;
78+
79+ // Document dimension is measured in inches.
80+ INCH = 1 ;
81+
82+ // Document dimension is measured in centimeters.
83+ CENTIMETER = 2 ;
84+
85+ // Document dimension is measured in points. 72 points = 1 inch.
86+ POINT = 3 ;
87+ }
88+
89+ // Unit of the dimension.
90+ DocumentDimensionUnit unit = 1 ;
91+
92+ // Width value of the document, works together with the unit.
93+ float width = 2 ;
94+
95+ // Height value of the document, works together with the unit.
96+ float height = 3 ;
97+ }
98+
6899// A structured text document e.g. a PDF.
69100message Document {
101+ // Describes the layout information of a
102+ // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in
103+ // the document.
104+ message Layout {
105+ // The type of TextSegment in the context of the original document.
106+ enum TextSegmentType {
107+ // Should not be used.
108+ TEXT_SEGMENT_TYPE_UNSPECIFIED = 0 ;
109+
110+ // The text segment is a token. e.g. word.
111+ TOKEN = 1 ;
112+
113+ // The text segment is a paragraph.
114+ PARAGRAPH = 2 ;
115+
116+ // The text segment is a form field.
117+ FORM_FIELD = 3 ;
118+
119+ // The text segment is the name part of a form field. It will be treated
120+ // as child of another FORM_FIELD TextSegment if its span is subspan of
121+ // another TextSegment with type FORM_FIELD.
122+ FORM_FIELD_NAME = 4 ;
123+
124+ // The text segment is the text content part of a form field. It will be
125+ // treated as child of another FORM_FIELD TextSegment if its span is
126+ // subspan of another TextSegment with type FORM_FIELD.
127+ FORM_FIELD_CONTENTS = 5 ;
128+
129+ // The text segment is a whole table, including headers, and all rows.
130+ TABLE = 6 ;
131+
132+ // The text segment is a table's headers. It will be treated as child of
133+ // another TABLE TextSegment if its span is subspan of another TextSegment
134+ // with type TABLE.
135+ TABLE_HEADER = 7 ;
136+
137+ // The text segment is a row in table. It will be treated as child of
138+ // another TABLE TextSegment if its span is subspan of another TextSegment
139+ // with type TABLE.
140+ TABLE_ROW = 8 ;
141+
142+ // The text segment is a cell in table. It will be treated as child of
143+ // another TABLE_ROW TextSegment if its span is subspan of another
144+ // TextSegment with type TABLE_ROW.
145+ TABLE_CELL = 9 ;
146+ }
147+
148+ // Text Segment that represents a segment in
149+ // [document_text][google.cloud.automl.v1beta1.Document.document_text].
150+ TextSegment text_segment = 1 ;
151+
152+ // Page number of the
153+ // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
154+ // in the original document, starts from 1.
155+ int32 page_number = 2 ;
156+
157+ // The position of the
158+ // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
159+ // in the page. Contains exactly 4
160+ //
161+ // [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices]
162+ // and they are connected by edges in the order provided, which will
163+ // represent a rectangle parallel to the frame. The
164+ // [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are
165+ // relative to the page.
166+ // Coordinates are based on top-left as point (0,0).
167+ BoundingPoly bounding_poly = 3 ;
168+
169+ // The type of the
170+ // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
171+ // in document.
172+ TextSegmentType text_segment_type = 4 ;
173+ }
174+
70175 // An input config specifying the content of the document.
71176 DocumentInputConfig input_config = 1 ;
177+
178+ // The plain text version of this document.
179+ TextSnippet document_text = 2 ;
180+
181+ // Describes the layout of the document.
182+ // Sorted by [page_number][].
183+ repeated Layout layout = 3 ;
184+
185+ // The dimensions of the page in the document.
186+ DocumentDimensions document_dimensions = 4 ;
187+
188+ // Number of pages in the document.
189+ int32 page_count = 5 ;
72190}
73191
74192// A representation of a row in a relational table.
0 commit comments