· 6 years ago · Oct 24, 2019, 01:44 AM
1# Create New Glue Jobs from TextFile Template
2#Assumes Text File has no "" characters (i.e. Name = SLFGlueJob#1)
3
4#import Boto3 AWS APIs, pprint (Beautify Jsons files), defaultdict for creating Dict of TextFile
5import boto3, pprint
6import awsKeys #Authentication to Create keys for all environments (Prod, NonProd, Lab, Stage)
7
8from collections import defaultdict
9#assign Glue Api to variable 'Client'
10client = boto3.client('glue')
11
12#Open the TextFile containing the Glue Job Params, create a dict, read lines, and split dict.keys and dict.values. Re-loop and assign
13# as 'd' update
14filepath = r'C:\\UsersTest.txt'
15
16
17def gluedict(glueDict):
18 with open(filepath) as gluePromo:
19 d = defaultdict(dict)
20 current = 1
21 file_contents = gluePromo.read()
22 for line in file_contents.splitlines():
23 if "=" in line:
24 key, value = map(str.strip, line.split('='))
25 if key in d[current]:
26 current += 1
27 d[current][key] = value
28 gluePromo.close()
29 return d
30
31#Loop over the key, vals in the dictionary created above. Extract Parameter information and assign below for Glue Job Creation
32for key, val in gluedict.items():
33 glueJobName = gluedict[key]['Name']
34 glueJobDesc = gluedict[key]['Description']
35 glueJobRole = gluedict[key]['Role']
36 glueScript = gluedict[key]['ScriptLocation']
37 glueTempDir = gluedict[key]['TempDir']
38 gluePythonLib = gluedict[key]['PythonLibraryPath']
39 glueEnvDir = gluedict[key]['envFilePath']
40 glueCfgDir = gluedict[key]['cfgFilePath']
41 glueWorkerT = gluedict[key]['WorkerType']
42 glueWorkerCount = gluedict[key]['NumberOfWorkers']
43 #Pass glueWorkCount as int, Boto3 API only accepts INT variable for # of Glue Workers
44 glueWorkersC = int(glueWorkerCount)
45
46
47
48print(glueJobName)
49print("Creating new Glue Job ......." + glueJobName)
50 # Initialize Glue Create Job API and feed variables from above Dict into Boto3 Glue Json Args below
51 response = client.create_job(
52 Name=glueJobName,
53 Description=glueJobDesc,
54 Role=glueJobRole,
55
56 Command={
57 'Name': 'glueetl',
58 'ScriptLocation': glueScript,
59 'PythonVersion': '2'
60 },
61 DefaultArguments={
62 '--job-language': 'python',
63 '--enable-metrics': " ",
64 "--TempDir": glueTempDir,
65 '--job-bookmark-option': 'job-bookmark-disable',
66 '--extra-py-files': gluePythonLib,
67 '--env_file_path': glueEnvDir,
68 '--cfg_dir_path': glueCfgDir,
69
70 },
71 #Timeout=3600,
72 #MaxCapacity=10,
73 NumberOfWorkers=glueWorkersC,
74 WorkerType=glueWorkerT,
75
76 )
77 print("Glue Job Created......." + glueJobName)