23
23
import re
24
24
from contextlib import ExitStack
25
25
from enum import Enum
26
- from typing import List , Optional
26
+ from typing import Any , Dict , List , Optional
27
27
28
28
from airflow .models import BaseOperator
29
29
from airflow .providers .google .cloud .hooks .dataflow import DEFAULT_DATAFLOW_LOCATION , DataflowHook
@@ -277,6 +277,14 @@ class DataflowTemplatedJobStartOperator(BaseOperator):
277
277
:type template: str
278
278
:param job_name: The 'jobName' to use when executing the DataFlow template
279
279
(templated).
280
+ :param options: Map of job runtime environment options.
281
+
282
+ .. seealso::
283
+ For more information on possible configurations, look at the API documentation
284
+ `https://cloud.google.com/dataflow/pipelines/specifying-exec-params
285
+ <https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment>`__
286
+
287
+ :type options: dict
280
288
:param dataflow_default_options: Map of default job environment options.
281
289
:type dataflow_default_options: dict
282
290
:param parameters: Map of job specific parameters for the template.
@@ -344,16 +352,25 @@ class DataflowTemplatedJobStartOperator(BaseOperator):
344
352
For more detail on job template execution have a look at the reference:
345
353
https://cloud.google.com/dataflow/docs/templates/executing-templates
346
354
"""
347
- template_fields = ['parameters' , 'dataflow_default_options' , 'template' , 'job_name' ]
355
+ template_fields = [
356
+ 'template' ,
357
+ 'job_name' ,
358
+ 'options' ,
359
+ 'parameters' ,
360
+ 'project_id' ,
361
+ 'location' ,
362
+ 'gcp_conn_id'
363
+ ]
348
364
ui_color = '#0273d4'
349
365
350
366
@apply_defaults
351
- def __init__ (
367
+ def __init__ ( # pylint: disable=too-many-arguments
352
368
self ,
353
369
template : str ,
354
370
job_name : str = '{{task.task_id}}' ,
355
- dataflow_default_options : Optional [dict ] = None ,
356
- parameters : Optional [dict ] = None ,
371
+ options : Optional [Dict [str , Any ]] = None ,
372
+ dataflow_default_options : Optional [Dict [str , Any ]] = None ,
373
+ parameters : Optional [Dict [str , str ]] = None ,
357
374
project_id : Optional [str ] = None ,
358
375
location : str = DEFAULT_DATAFLOW_LOCATION ,
359
376
gcp_conn_id : str = 'google_cloud_default' ,
@@ -362,14 +379,11 @@ def __init__(
362
379
* args ,
363
380
** kwargs ) -> None :
364
381
super ().__init__ (* args , ** kwargs )
365
-
366
- dataflow_default_options = dataflow_default_options or {}
367
- parameters = parameters or {}
368
-
369
382
self .template = template
370
383
self .job_name = job_name
371
- self .dataflow_default_options = dataflow_default_options
372
- self .parameters = parameters
384
+ self .options = options or {}
385
+ self .dataflow_default_options = dataflow_default_options or {}
386
+ self .parameters = parameters or {}
373
387
self .project_id = project_id
374
388
self .location = location
375
389
self .gcp_conn_id = gcp_conn_id
@@ -387,10 +401,12 @@ def execute(self, context):
387
401
388
402
def set_current_job_id (job_id ):
389
403
self .job_id = job_id
404
+ options = self .dataflow_default_options
405
+ options .update (self .options )
390
406
391
407
job = self .hook .start_template_dataflow (
392
408
job_name = self .job_name ,
393
- variables = self . dataflow_default_options ,
409
+ variables = options ,
394
410
parameters = self .parameters ,
395
411
dataflow_template = self .template ,
396
412
on_new_job_id_callback = set_current_job_id ,
0 commit comments