· 6 years ago · Apr 22, 2020, 11:46 PM
1import csv
2from datetime import datetime, timezone
3import httpx
4import logging
5import os
6import pandas as pd
7import pickle
8import pytz
9import shutil
10
11
12from study_automation import configSettings
13
14logger = logging.getLogger(__name__)
15startTimeStamp = datetime.now(timezone.utc)
16startTime = datetime.strftime(startTimeStamp, '%Y-%m-%d_%H%M%S')
17localStartTime = datetime.now()
18
19# IT IS SETUP TO ALIGN THEM TO THE SAME NAMING CONVENTION AS FOUND IN THE ASSIGNMENT API (tasks tab in the UI)
20KI_ROLE_LU = {'Lead Clinical Data Manager': 'DM',
21 'Backup Clinical Data Manager': 'DM',
22 'Lead Clinical Data Analyst': 'DM',
23 'Lead Clinical Programmer': 'CP',
24 'Medical Coder': 'DM',
25 'IRT Project Manager': 'IRT',
26 'Lead Biostatistician': 'BS-BIOSTAT',
27 'Lead Statistical Programmer': 'SAS-PRG',
28 'Biostatistics QC': 'BS-QC',
29 'Reviewing Biostatistician': 'BS-BIOSTAT',
30 'Lead Validating Statistical Programmer': 'SAS-PRG-VAL',
31 'Unmasked Biostatistician': 'BS-BIOSTAT',
32 'Project Manager': 'PM',
33 'Data Manager Co-Lead': 'DM',
34 'IRT Project Manager Co-Lead': 'IRT',
35 'Biostatistician Co-Lead': 'BS-BIOSTAT'
36 }
37
38# These are the roles from the task page. Normalizing them a bit. Used for assignment API
39KI_TASK_ROLE_LU = {'BS-AS-DIR': 'BS-BIOSTAT',
40 'BS-BIOSTAT': 'BS-BIOSTAT',
41 'BS-BIOSTAT2': 'BS-BIOSTAT',
42 'BS-DIR': 'BS-BIOSTAT',
43 'BS-MGR': 'BS-BIOSTAT',
44 'BS-PRIN-BIOSTAT': 'BS-BIOSTAT',
45 'BS-PRIN-RS': 'BS-BIOSTAT',
46 'BS-QC': 'BS-QC',
47 'CP': 'CP',
48 'CP-MGR': 'CP',
49 'DM-CDA': 'DM',
50 'DM-CDM': 'DM',
51 'DM-DIR': 'DM',
52 'DM-MEDCOD': 'DM',
53 'DM-MEDCOD-MGR': 'DM',
54 'DM-MGR': 'DM',
55 'IRT-PM': 'IRT',
56 'IRT-SERV-MGR': 'IRT',
57 'PM': 'PM',
58 'SAS-PRG': 'SAS-PRG',
59 'SAS-PRG-MGR': 'SAS-PRG',
60 'SSC-PRG-VAL': 'SAS-PRG=VAL',
61 }
62
63# these are the keys that need to be extracted from the project API that correspond to the roles above
64role_key = [218, 282, 293, 294, 295, 296, 283, 284, 297, 298, 299, 300, 301, 326, 327, 328]
65
66# for the project data, we dont care about these keys for the most part
67ignoredFields = ['startDate', 'Last Edit Date', 'newStudy',
68 'description', 'clientDescription', 'duplicates', 'employees']
69
70
71class StudyAutomation():
72 """
73 monitors keyedin API and automates some processes
74 """
75
76 def __init__(self):
77 """
78 Init reports locations of files and files the most previous versions of resourceDict and projectData.
79 """
80 logger.info("Study Automation process begun.")
81
82 self.DATA_LOCATION = configSettings['DATA_LOC']
83 logger.info('Previous data located at {0}'.format(self.DATA_LOCATION))
84
85 self.PROJ_DATA_LOCATION = configSettings['DATA_LOC'] + 'projectData\\'
86 logger.info('Previous project data located at {0}'.format(self.PROJ_DATA_LOCATION))
87
88 self.RES_DATA_LOCATION = configSettings['DATA_LOC'] + 'resourceDict\\'
89 logger.info('Previous resource data located at {0}'.format(self.RES_DATA_LOCATION))
90
91 self.STATS_LOCATION = configSettings['STATS_LOC']
92 logger.info('Stats drive root located at {0}'.format(self.STATS_LOCATION))
93
94 self.DM_LOCATION = configSettings['DM_LOC']
95 logger.info('DM drive root located at {0}'.format(self.DM_LOCATION))
96
97 self.STATS_COPY_LOCATION = configSettings['STATS_COPY_LOC']
98 logger.info('Stats copy drive root located at {0}'.format(self.STATS_COPY_LOCATION))
99
100 self.DM_COPY_LOCATION = configSettings['DM_COPY_LOC']
101 logger.info('DM copy drive root located at {0}'.format(self.DM_COPY_LOCATION))
102
103 self.OUTPUT_LOCATION = configSettings['OUTPUT_LOC']
104 logger.info('Output location located at {0}'.format(self.OUTPUT_LOCATION))
105
106 self.project_mappings = pd.read_csv('project_mappings.csv', index_col='project')
107 logger.info('Project mappings located at {}'.format(os.getcwd() + '\\project_mappings.csv'))
108
109 self.permission_mappings = pd.read_csv('permission_mappings.csv')
110 logger.info('Permission mappings located at {}'.format(os.getcwd() + '\\permission_mappings.csv'))
111
112 self.newProjectsDesc = []
113
114 self._load_previous_resource_dict()
115 self._load_previous_project_data()
116 self._get_excel_serial_date(self.previousProjectTimestamp)
117
118 def call_api_get_data(self, refresh_data=False):
119 """
120 Collects the data needed to determine differences.
121
122 Parameters:
123 refresh_data (bool): Whether to completely refresh data from the API or use non changed values.
124
125 """
126 logger.info("Refresh data set to {0}".format(str(refresh_data)))
127
128 # if it has been longer than a day since the resource dict has been refreshed, refresh it
129
130 if (startTimeStamp.date() - self.previousResourceTimestamp.date()).days > 1:
131 self._create_resource_dict(True)
132 logger.info('Resource Dict refresh over a day old. Forcing Refresh')
133 else:
134 self._create_resource_dict(refresh_data)
135
136 self._get_project_data()
137
138 def _load_previous_resource_dict(self):
139 """
140 Loads previous resource dict.
141
142 Creates:
143 self.previousResourceTimestamp (datetime): Last time a resource dict was created. In UTC
144 self.previousResourceDictLoc (str): Where the file was located
145 self.previousResourceDict (dict): Keys are employee codes from KI. Fields include name, role, department
146 """
147
148 previousLoc, self.previousResourceTimestamp = self._find_most_recent_file('resourceDict')
149 self.previousResourceDictLoc = self.RES_DATA_LOCATION + previousLoc
150 self.previousResourceDict = pickle.load(open(self.previousResourceDictLoc, 'rb'))
151 logger.info("Previous Resource dictionary loaded from {0}".format(self.previousResourceDictLoc))
152
153 def _load_previous_project_data(self):
154 """
155 Loads previous project data.
156
157 Creates:
158 self.previousProjectTimestamp (datetime): Last time project data was created. In UTC.
159 self.previousProjectDataLoc (str): Where the file was located
160 self.previousProjectData (dict): Project Data. Defined below.
161 """
162 previousLoc, self.previousProjectTimestamp = self._find_most_recent_file('projectData')
163 self.previousProjectDataLoc = self.PROJ_DATA_LOCATION + previousLoc
164 self.previousProjectData = pickle.load(open(self.previousProjectDataLoc, 'rb'))
165 logger.info("Previous Project Data loaded from:{0}".format(self.previousProjectDataLoc))
166
167 def _create_resource_dict(self, refresh_data=True):
168 """
169 This method creates the resource dict if refresh data is True. If it is false or there is some error, the most recent dictionary is loaded.
170
171 Parameters:
172 refresh_data (bool): Whether to completely refresh data from the API or use non changed values.
173
174 Creates:
175 self.resourceJson (dict): JSON returned from API. See URL definition below for filters and fields included.
176 self.resourceDict (dict): Keys are employee codes from KI. Fields include name, role, department
177 """
178
179 if refresh_data:
180 try:
181 self.resourceJson = self._return_json_from_api('resource')
182 self.resourceDict = {}
183 for item in self.resourceJson:
184 self.resourceDict[item['code']] = {'name': item['name'], 'role': item['primaryRole'],
185 'dept': item['departmentDescription'], 'manager': item['lineManagerDescription']}
186
187 except:
188 logger.error("Resource Connection failed. Using previous resource Dict")
189 self.resourceDict = self.previousResourceDict
190
191 else:
192 self.resourceDict = self.previousResourceDict
193
194 logger.info("Resource Dict: {0}".format(self.resourceDict))
195
196 def _get_project_data(self):
197 """
198 This method calls the project API, determines which projects have changed and then calls the assignment API.
199
200 Note that the employees attached have to come through two different calls. The project call (where their roles are standardized)
201 and the assignment call which comes with those standardized names.
202
203 Parameters:
204
205
206 Creates:
207 self.projectJson (dict): JSON returned from API. See URL definition below for filters and fields included.
208 self.changedProjects (array): Array of project codes that have changed since last check or are new.
209 self.projectData (dict): Has project codes, sponsors, projects, start date, last edit date and employees attached
210 self.assignmentJson: JSON returned from API. See URL definition below for filters and fields included.
211
212 """
213 self.projectJson = self._return_json_from_api('project')
214
215 self.projectData = {}
216
217 for item in self.projectJson:
218 project_code = item['code']
219 code_data = {}
220 # emp match is to see if one employee has two different roles
221 emp_match = {}
222 code_data['description'] = item['description']
223 code_data['clientDescription'] = item['clientDescription']
224 code_data['startDate'] = item['startDate']
225
226 # dont include None, only valid references
227 if item.get('manager') is not None:
228 code_data['PM'] = {item['manager']}
229 emp_match[item['manager']] = {'PM'}
230
231 for field in item['customFieldValues']:
232 if field['key'] in role_key and len(field['value']) > 0:
233 # edit date key
234 if field['key'] == 218:
235 code_data[field['name']] = field['value']
236 else:
237 ki_role = KI_ROLE_LU[field['name']]
238 emp_id = field['value']
239 if ki_role in code_data.keys():
240 code_data[ki_role].add(emp_id)
241 else:
242 code_data[ki_role] = {emp_id}
243
244 if emp_id in emp_match.keys():
245 emp_match[emp_id].add(ki_role)
246 else:
247 emp_match[emp_id] = {ki_role}
248
249 # check if any employees are in two different roles, alert and remove
250 if len(emp_match) > 0:
251 employees_with_mult_roles = [k for k, v in emp_match.items() if len(v) > 1]
252 employees = list(emp_match.keys())
253 projRoles = [k for k, v in code_data.items() if k not in ignoredFields]
254
255 if len(employees_with_mult_roles) > 0:
256 for e in employees_with_mult_roles:
257 for role in projRoles:
258 assignedEmployees = code_data[role]
259 if e in assignedEmployees:
260 code_data[role].remove(e)
261 logger.warn("Employee {0} with multiple roles in project {1}. Removing {2} role.".format(
262 e, project_code, role))
263
264 else:
265 logger.info("No employees found for project {0}".format(project_code))
266
267 code_data['employees'] = employees
268 self.projectData[project_code] = code_data
269
270 self.assignmentJson = self._return_json_from_api('assignment')
271
272 self.changedProjects = []
273
274 # put the assignment data back into the projectData. However if the employee already has a home page role, do not add.
275 for item in self.assignmentJson:
276 mappedRole = KI_TASK_ROLE_LU.get(item['role'])
277
278 if item['resource'] in self.projectData[item['project']]['employees']:
279 # only log if employee is in different role from home page
280 if mappedRole not in self.projectData[item['project']].keys() or item['resource'] not in self.projectData[item['project']][mappedRole]:
281 logger.info("Employee {0} already assigned to role in project {1} from home screen. Ignoring addl role {2} via {3}."
282 .format(item['resource'], item['project'], mappedRole, item['role']))
283
284 else:
285
286 if mappedRole in self.projectData[item['project']].keys():
287 self.projectData[item['project']][mappedRole].add(item['resource'])
288 else:
289 self.projectData[item['project']][mappedRole] = {item['resource']}
290
291 for key in self.projectData.keys():
292 if key not in self.previousProjectData.keys():
293 self.newProjectsDesc.append([self.projectData[key]['description']])
294 self.changedProjects.append(key)
295 self._create_folders(self.projectData[key]['description'], key)
296 logger.info("Project new since last check: {0}".format(key))
297 # have to subset the object for the keys we care about
298 elif ({i: self.projectData[key][i] for i in self.projectData[key].keys() if i not in ignoredFields} !=
299 {i: self.previousProjectData[key][i] for i in self.previousProjectData[key].keys() if i not in ignoredFields}):
300 self.changedProjects.append(key)
301 logger.info("Project changed since last check: {0}".format(key))
302 else:
303 pass
304
305 logger.info("List of changed projects:{0}".format(str(self.changedProjects)))
306
307 """
308 Sometimes people have multiple roles within a project. This section prioritizes and dedupes them all.
309
310 For instance, the validating programmer will also be a SAS-PRG. However, they should not have the same permissions
311 as a standard SAS-PRG
312
313 """
314
315 def find_differences(self):
316 """
317 This finds differences between the current project data and most previous
318
319 Creates:
320 self.permissionChanges (array): Project, person, role and action to take for permissions changes
321
322 """
323 self.permissionChanges = []
324
325 if len(self.changedProjects) == 0:
326 logger.info("There are no changes between the new and most recent project data files.")
327
328 else:
329 logger.info("There are {0} changes between the new and most recent project data files.".format(
330 str(len(self.changedProjects))))
331
332 for code in self.changedProjects:
333 logger.info("Starting {0}".format(code))
334 newData = self.projectData[code]
335 previousData = self.previousProjectData.get(code, None)
336
337 # this means is a new project, all permissions need to be added after the folders are created
338 if previousData is None:
339
340 roles = newData.keys()
341
342 roles = [r for r in roles if r not in ignoredFields]
343 for role in roles:
344 for resource in newData[role]:
345 self._create_permissions_row(code, role, resource, 'allow')
346
347 else:
348 # check to see if sponsor or project have changed names
349 if newData['clientDescription'] != previousData['clientDescription']:
350 logger.warn("Sponsor names have changed. Previously {0}, now {1}".format(
351 newData['clientDescription'], previousData['clientDescription']))
352 if newData['description'] != previousData['description']:
353 logger.warn("Study names have changed. Previously {0}, now {1}".format(
354 newData['description'], previousData['description']))
355
356 roles = list(set(list(newData.keys()) + list(previousData.keys())))
357 roles = [f for f in roles if f not in ignoredFields]
358
359 for role in roles:
360 if role not in previousData.keys():
361 for resource in newData[role]:
362 self._create_permissions_row(code, role, resource, 'allow')
363 elif role not in newData.keys():
364 for resource in previousData[role]:
365 self._create_permissions_row(code, role, resource, 'deny')
366 elif previousData[role] == newData[role]:
367 pass
368 else:
369 adds = newData[role] - previousData[role]
370 for resource in adds:
371 self._create_permissions_row(code, role, resource, 'allow')
372 rems = previousData[role] - newData[role]
373 for resource in rems:
374 self._create_permissions_row(code, role, resource, 'deny')
375
376 def _find_most_recent_file(self, typ):
377 """
378 This finds the most recent file for a given type. Since files are all in the same folder it needs to determine which one
379
380 Creates:
381 typ (string): Either projectData or resourceDict
382
383 """
384
385 if typ == 'projectData':
386 eligible_files = [f for f in os.listdir(self.PROJ_DATA_LOCATION)
387 if os.path.isfile(os.path.join(self.PROJ_DATA_LOCATION, f))]
388 elif typ == 'resourceDict':
389 eligible_files = [f for f in os.listdir(self.RES_DATA_LOCATION)
390 if os.path.isfile(os.path.join(self.RES_DATA_LOCATION, f))]
391 else:
392 raise ValueError("{0} not a valid type".format(typ))
393
394 most_recent_file = sorted(eligible_files, reverse=True)[0]
395 previous_timestamp = datetime.strptime(most_recent_file.replace(
396 typ + '_', '').replace('.pkl', ' +0000'), '%Y-%m-%d_%H%M%S %z')
397 logger.info("Most recent {1} timestamp is {0}".format(previous_timestamp, typ))
398
399 logger.info("Most recent file of type {0} is {1}".format(typ, most_recent_file))
400
401 return(most_recent_file, previous_timestamp)
402
403 def _return_json_from_api(self, api_type, additional_info=False):
404 """
405 Returns a json from the api specificed.
406
407
408 Parameters:
409 api_type (string): One of the three APIs that will be called.
410 **kwargs (dict): the assignment API needs two additional paramtners, a list of projects and the last edit date
411 Returns:
412 api_json (dict): The desired JSON
413 """
414
415 if api_type == 'resource':
416 url = 'https://api.keyedinprojects.com/V3/api/search/resource?fields=code,name,department,primaryRole,lineManager&resultsPerPage=1000&criteria=active=True&pageNumber='
417 elif api_type == 'project':
418 url = 'https://api.keyedinprojects.com/V3/api/search/project?resultsPerPage=1000&criteria=code contains(PRJ) and active=-1&pageNumber='
419 # this is a custom API call based on a report. Edits to the report on the UI would have to take place to change it
420 elif api_type == 'assignment':
421 url = 'https://api.keyedinprojects.com/V3/api/report?resultsPerPage=1000&key=277&pageNumber='
422 else:
423 logger.error("Invalid api type: {0}".format(api_type))
424
425 logger.info("API type of {0} with url {1} called.".format(api_type, url))
426
427 complete = False
428 api_json = []
429 page_number = 1
430 attempts = 1
431
432 # the idea behind this is that for every page needed, it tries to obtain it three times before moving on
433 # maybe should wait or something in case that is the issue.
434 # needs further testing.
435
436 while not complete:
437 attempts = 1
438 while attempts <= 3:
439 try:
440 call = url + str(page_number)
441 self.connection = httpx.get(call, auth=(configSettings['KI_USER'], configSettings['KI_PW']))
442 if self.connection.status_code == 200:
443 logger.info("{0} Connection {1} successful: {2}".format(api_type, str(page_number), call))
444 api_json += self.connection.json()['Data']
445 if self.connection.json()['PageNumber'] >= self.connection.json()['TotalPages']:
446 complete = True
447 return(api_json)
448 else:
449 page_number += 1
450 else:
451 logger.error("Connection failed with status code:{0}: {1}".format(
452 str(self.connection.status_code)), call)
453 except:
454 logger.error("API return failure on attempt {0}.".format(attempts))
455 attempts += 1
456
457 def _create_permissions_row(self, code, role, resource, typ):
458 """
459 Creates a row for a csv that will be acted on for permissions
460
461 Parameters:
462 code (string): Project ID
463 role (string): The role of the person
464 resource (string): Persons name
465 typ (string): Either to be added or removed.
466
467 Creates:
468 self.permissionChanges (array): List of changes to be made.
469 """
470
471 # based on the role of the person need to determine location of permissions
472 # directors and some departments have full access, this is where they are filtered.
473
474 try:
475 resourceInfo = self.resourceDict[resource]
476
477 except KeyError:
478 logger.warn("Resource not found in dict. {0}".format(resource))
479 return
480 # people with the following primary role are out of scope for this
481
482 if resourceInfo['role'] in ['AD-EXEC', 'AD-FIN-HR', 'AD-IT', 'AD-IT-INT', 'AD-QA', 'BD', 'BS-AS-DIR', 'BS-DIR',
483 'BS-PRIN-RS', 'DM-DIR', 'SSC-AI', 'SSC-CSO']:
484 logger.info('{0} with primary role {1} out of scope.'.format(resourceInfo['name'], resourceInfo['role']))
485 else:
486
487 perChanges = (self.permission_mappings.copy()[(self.permission_mappings['role'] == role) &
488 (self.permission_mappings['action'] == typ)])
489 for index, row in perChanges.iterrows():
490 if code not in self.project_mappings.index:
491 logger.warn('Project Code not in project mappings'.format(code))
492 return
493
494 try:
495 folder = self.project_mappings.at[code, row['location']]
496 except KeyError:
497 logger.warn("Folder not found. {0}".format(str([code, role, row['location']])))
498
499 if pd.isnull(folder):
500 logger.warn("Folder not found. {0}".format(str([code, role, row['location']])))
501 else:
502
503 newRow = [folder, row['role'], resourceInfo['name'], row['permissionChange']]
504 self.permissionChanges.append(newRow)
505 logger.info("Permission changed updated: {0}".format(str(newRow)))
506
507 def _create_folders(self, project_name, project_key):
508 """
509 Creates new folders
510
511 Parameters:
512 project_name (string): Name of the new project that needs folders to be created.
513 project_key(string)L Project ID
514 """
515 # need to change this so there is a sponsor/study nomenclature, dummy for now
516 try:
517 sponsor, study = (project_name).split('_', 1)
518 except:
519 logger.error("Project Name not in correct format: {0}".format(project_name))
520 return
521 stats_loc = self.STATS_LOCATION + sponsor + '\\' + study
522 dm_loc = self.DM_LOCATION + sponsor + '\\' + study
523
524 try:
525 shutil.copytree(self.STATS_COPY_LOCATION, stats_loc)
526 logger.info('Stats directory created at: {0}'.format(stats_loc))
527 except:
528 logger.warn('Stats directory already exists at: {0}'.format(stats_loc))
529 try:
530 shutil.copytree(self.DM_COPY_LOCATION, dm_loc)
531 logger.info('DM directory created at: {0}'.format(dm_loc))
532 except:
533 logger.warn('DM directory already exists at: {0}'.format(dm_loc))
534
535 # if thens because if the stats_copy_location changes, its possible these are in different places.
536 if os.path.isdir(stats_loc + '\\Statistics\\Programs\\Primary Programs'):
537 primProg = stats_loc + '\\Statistics\\Programs\\Primary Programs'
538 else:
539 primProg = ''
540 logger.warn("Project Mappings primary program not found for {0} {1}".format(project_name, project_name))
541
542 if os.path.isdir(stats_loc + '\\Statistics\\Programs\\Validation Programs'):
543 valProg = stats_loc + '\\Statistics\\Programs\\Validation Programs'
544 else:
545 valProg = ''
546 logger.warn("Project Mappings validation program not found for {0} {1}".format(project_name, project_name))
547
548 if os.path.isdir(stats_loc + '\\Statistics\\Randomization'):
549 rand = stats_loc + '\\Statistics\\Randomization'
550 else:
551 rand = ''
552 logger.warn("Project Mappings randomization not found for {0} {1}".format(project_name, project_name))
553
554 if os.path.isdir(stats_loc + '\\DMData'):
555 dmd = stats_loc + '\\DMData'
556 else:
557 dmd = ''
558 logger.warn("Project Mappings dmdata not found for {0} {1}".format(project_name, project_name))
559
560 newProject = pd.DataFrame([[dm_loc, stats_loc, primProg, valProg, rand, dmd]], columns=[
561 'dmDrive', 'statsDrive', 'primaryProgram', 'validationProgram', 'randomization', 'dmdata'], index=[project_key])
562 self.project_mappings = self.project_mappings.append(newProject)
563 logger.info("Project mappings created for {0} {1}".format(project_name, project_name))
564
565 def output_csv(self):
566 """
567 Outputs CSVs for powershell script to take action on. Also for auditing
568
569
570 """
571 with open(self.OUTPUT_LOCATION + 'permissionsChanges_' + startTime + '.csv', 'w', newline="") as handle:
572 writer = csv.writer(handle)
573 writer.writerow(['folder', 'role', 'person', 'action'])
574 writer.writerows(self.permissionChanges)
575 logger.info("CSV succssfully output: {0} ".format(self.OUTPUT_LOCATION + 'permissionsChanges_' + startTime + '.csv'))
576
577 with open(self.OUTPUT_LOCATION + 'newProjects_' + startTime + '.csv', 'w', newline="") as handle:
578 writer = csv.writer(handle)
579 writer.writerow(['study'])
580 writer.writerows(self.newProjectsDesc)
581 logger.info("CSV succssfully output: {0} ".format(self.OUTPUT_LOCATION + 'newProjects_' + startTime + '.csv'))
582
583 def _get_excel_serial_date(self, dttm):
584 """
585 KeyedIn Assignment API uses the excel serial date format in eastern timezone. This converts to that
586
587 Parameters:
588 dttm (datetime): datetime to calculate from
589
590
591 Creates:
592 self.excelSerialDate (float): The float date to be used
593 """
594 self.eastern = pytz.timezone('America/New_York')
595
596 temp = datetime(1899, 12, 30, tzinfo=self.eastern) # Note, not 31st Dec but 30th!
597
598 dttm = dttm.astimezone(self.eastern)
599
600 delta = dttm - temp
601 self.excelSerialDate = round(float(delta.days) + (float(delta.seconds) / 86400), 3)
602 logger.info("Excel Serial Date is: {0}".format(self.excelSerialDate))
603
604 def wrap_up(self):
605 """
606 Compresses older files for space purposes. Logs and old data compressed every month.
607
608 Outputs the current resourceDict and projectData as previous ones.
609
610 """
611
612 # if the month is different or the year (when Dec goes to Jan)
613 # needs more testing.
614 if startTimeStamp.month > self.previousProjectTimestamp.month or startTimeStamp.year > self.previousProjectTimestamp.year:
615 logger.info("New Month detected, zipping files: {0} to {1}".format(datetime.strftime(self.previousProjectTimestamp, '%Y-%m-%d'),
616 datetime.strftime(startTimeStamp, '%Y-%m-%d')))
617 self._zip_files(self.PROJ_DATA_LOCATION)
618 self._zip_files(self.RES_DATA_LOCATION)
619 self._zip_files(self.DATA_LOCATION + 'logs\\')
620 self._zip_files(self.DATA_LOCATION + 'json\\')
621
622 # save most rescent version, overwrite one that is there.
623 with open(self.PROJ_DATA_LOCATION + 'projectData_' + startTime + '.pkl', 'wb') as handle:
624 pickle.dump(self.projectData, handle)
625 logger.info("Project data saved as pickle file: {0}".format(
626 self.PROJ_DATA_LOCATION + 'projectData_' + startTime + '.pkl'))
627
628 with open(self.RES_DATA_LOCATION + 'resourceDict_' + startTime + '.pkl', 'wb') as handle:
629 pickle.dump(self.resourceDict, handle)
630 logger.info("Resource dict saved as pickle file: {0}".format(
631 self.RES_DATA_LOCATION + 'resourceDict_' + startTime + '.pkl'))
632
633 with open(self.DATA_LOCATION + 'json\\projectJson_' + startTime + '.pkl', 'wb') as handle:
634 pickle.dump(self.projectJson, handle)
635 logger.info("Project json saved as pickle file: {0}".format(
636 self.DATA_LOCATION + 'json\\projectJson_' + startTime + '.pkl'))
637
638 with open(self.DATA_LOCATION + 'json\\assignmentJson_' + startTime + '.pkl', 'wb') as handle:
639 pickle.dump(self.assignmentJson, handle)
640 logger.info("Assignment json saved as pickle file: {0}".format(
641 self.DATA_LOCATION + 'json\\assignmentJson_' + startTime + '.pkl'))
642
643 # resourceJson does not always exist.
644 try:
645 if len(self.resourceJson) > 0:
646 with open(self.DATA_LOCATION + 'json\\resourceJson_' + startTime + '.pkl', 'wb') as handle:
647 pickle.dump(self.resourceJson, handle)
648 logger.info("Resource json saved as pickle file: {0}".format(
649 self.DATA_LOCATION + 'json\\resourceJson_' + startTime + '.pkl'))
650 except:
651 logger.info("Resource json does not exist")
652
653 # need to make sure changes made because of new projects are available for future runs
654 self.project_mappings.to_csv('project_mappings.csv')
655
656 def _zip_files(self, loc):
657 """
658 Creates and archive, deletes the files in the current dir
659
660 Parameters:
661 loc (str): The file path of the directory to modify
662 """
663 logger.info("Starting archive process for {0}".format(loc))
664 loc_name = loc.split('\\')[-2]
665
666 fls = os.listdir(loc)
667 fls = [f for f in fls if f != 'mylog.log']
668 newest = sorted(fls, reverse=True)[0]
669 oldest = sorted(fls, reverse=False)[0]
670
671 if loc_name != 'logs':
672 newest_ts = datetime.strptime(newest.replace(loc_name + '_', '').replace('.pkl', ''), '%Y-%m-%d_%H%M%S')
673 oldest_ts = datetime.strptime(oldest.replace(loc_name + '_', '').replace('.pkl', ''), '%Y-%m-%d_%H%M%S')
674 else:
675
676 newest_ts = datetime.strptime(newest.replace('mylog.log.', '').replace('.pkl', ''), '%Y-%m-%d')
677 oldest_ts = datetime.strptime(oldest.replace('mylog.log.', '').replace('.pkl', ''), '%Y-%m-%d')
678
679 newest_ts_str = datetime.strftime(newest_ts, '%Y-%m-%d')
680 oldest_ts_str = datetime.strftime(oldest_ts, '%Y-%m-%d')
681
682 logger.info("{0} archive contains {1} files from dates {2} to {3}.".format(
683 loc, str(len(fls)), oldest_ts_str, newest_ts_str))
684
685 shutil.make_archive(self.DATA_LOCATION + 'archive\\' + loc_name + '_' + oldest_ts_str +
686 '_' + newest_ts_str, root_dir=loc, format='tar', logger=logger)
687
688 for f in fls:
689 os.remove(loc + f)
690 logger.info("File removed at {0}".format(loc + f))