mFwsCwyX

· 6 years ago · Apr 22, 2020, 11:46 PM
1import csv
2from datetime import datetime, timezone
3import httpx
4import logging
5import os
6import pandas as pd
7import pickle
8import pytz
9import shutil
10
11
12from study_automation import configSettings
13
14logger = logging.getLogger(__name__)
15startTimeStamp = datetime.now(timezone.utc)
16startTime = datetime.strftime(startTimeStamp, '%Y-%m-%d_%H%M%S')
17localStartTime = datetime.now()
18
19# IT IS SETUP TO ALIGN THEM TO THE SAME NAMING CONVENTION AS FOUND IN THE ASSIGNMENT API (tasks tab in the UI)
20KI_ROLE_LU = {'Lead Clinical Data Manager': 'DM',
21              'Backup Clinical Data Manager': 'DM',
22              'Lead Clinical Data Analyst': 'DM',
23              'Lead Clinical Programmer': 'CP',
24              'Medical Coder': 'DM',
25              'IRT Project Manager': 'IRT',
26              'Lead Biostatistician': 'BS-BIOSTAT',
27              'Lead Statistical Programmer': 'SAS-PRG',
28              'Biostatistics QC': 'BS-QC',
29              'Reviewing Biostatistician': 'BS-BIOSTAT',
30              'Lead Validating Statistical Programmer': 'SAS-PRG-VAL',
31              'Unmasked Biostatistician': 'BS-BIOSTAT',
32              'Project Manager': 'PM',
33              'Data Manager Co-Lead': 'DM',
34              'IRT Project Manager Co-Lead': 'IRT',
35              'Biostatistician Co-Lead': 'BS-BIOSTAT'
36              }
37
38# These are the roles from the task page.  Normalizing them a bit.  Used for assignment API
39KI_TASK_ROLE_LU = {'BS-AS-DIR': 'BS-BIOSTAT',
40                   'BS-BIOSTAT': 'BS-BIOSTAT',
41                   'BS-BIOSTAT2': 'BS-BIOSTAT',
42                   'BS-DIR': 'BS-BIOSTAT',
43                   'BS-MGR': 'BS-BIOSTAT',
44                   'BS-PRIN-BIOSTAT': 'BS-BIOSTAT',
45                   'BS-PRIN-RS': 'BS-BIOSTAT',
46                   'BS-QC': 'BS-QC',
47                   'CP': 'CP',
48                   'CP-MGR': 'CP',
49                   'DM-CDA': 'DM',
50                   'DM-CDM': 'DM',
51                   'DM-DIR': 'DM',
52                   'DM-MEDCOD': 'DM',
53                   'DM-MEDCOD-MGR': 'DM',
54                   'DM-MGR': 'DM',
55                   'IRT-PM': 'IRT',
56                   'IRT-SERV-MGR': 'IRT',
57                   'PM': 'PM',
58                   'SAS-PRG': 'SAS-PRG',
59                   'SAS-PRG-MGR': 'SAS-PRG',
60                   'SSC-PRG-VAL': 'SAS-PRG=VAL',
61                   }
62
63# these are the keys that need to be extracted from the project API that correspond to the roles above
64role_key = [218, 282, 293, 294, 295, 296, 283, 284, 297, 298, 299, 300, 301, 326, 327, 328]
65
66# for the project data, we dont care about these keys for the most part
67ignoredFields = ['startDate', 'Last Edit Date', 'newStudy',
68                 'description', 'clientDescription', 'duplicates', 'employees']
69
70
71class StudyAutomation():
72    """
73    monitors keyedin API and automates some processes
74    """
75
76    def __init__(self):
77        """
78        Init reports locations of files and files the most previous versions of resourceDict and projectData.
79        """
80        logger.info("Study Automation process begun.")
81
82        self.DATA_LOCATION = configSettings['DATA_LOC']
83        logger.info('Previous data located at {0}'.format(self.DATA_LOCATION))
84
85        self.PROJ_DATA_LOCATION = configSettings['DATA_LOC'] + 'projectData\\'
86        logger.info('Previous project data located at {0}'.format(self.PROJ_DATA_LOCATION))
87
88        self.RES_DATA_LOCATION = configSettings['DATA_LOC'] + 'resourceDict\\'
89        logger.info('Previous resource data located at {0}'.format(self.RES_DATA_LOCATION))
90
91        self.STATS_LOCATION = configSettings['STATS_LOC']
92        logger.info('Stats drive root located at {0}'.format(self.STATS_LOCATION))
93
94        self.DM_LOCATION = configSettings['DM_LOC']
95        logger.info('DM drive root located at {0}'.format(self.DM_LOCATION))
96
97        self.STATS_COPY_LOCATION = configSettings['STATS_COPY_LOC']
98        logger.info('Stats copy drive root located at {0}'.format(self.STATS_COPY_LOCATION))
99
100        self.DM_COPY_LOCATION = configSettings['DM_COPY_LOC']
101        logger.info('DM copy drive root located at {0}'.format(self.DM_COPY_LOCATION))
102
103        self.OUTPUT_LOCATION = configSettings['OUTPUT_LOC']
104        logger.info('Output location located at {0}'.format(self.OUTPUT_LOCATION))
105
106        self.project_mappings = pd.read_csv('project_mappings.csv', index_col='project')
107        logger.info('Project mappings located at {}'.format(os.getcwd() + '\\project_mappings.csv'))
108
109        self.permission_mappings = pd.read_csv('permission_mappings.csv')
110        logger.info('Permission mappings located at {}'.format(os.getcwd() + '\\permission_mappings.csv'))
111
112        self.newProjectsDesc = []
113
114        self._load_previous_resource_dict()
115        self._load_previous_project_data()
116        self._get_excel_serial_date(self.previousProjectTimestamp)
117
118    def call_api_get_data(self, refresh_data=False):
119        """
120        Collects the data needed to determine differences.
121
122        Parameters:
123           refresh_data (bool): Whether to completely refresh data from the API or use non changed values.
124
125        """
126        logger.info("Refresh data set to {0}".format(str(refresh_data)))
127
128        # if it has been longer than a day since the resource dict has been refreshed, refresh it
129
130        if (startTimeStamp.date() - self.previousResourceTimestamp.date()).days > 1:
131            self._create_resource_dict(True)
132            logger.info('Resource Dict refresh over a day old.  Forcing Refresh')
133        else:
134            self._create_resource_dict(refresh_data)
135
136        self._get_project_data()
137
138    def _load_previous_resource_dict(self):
139        """
140        Loads previous resource dict.
141
142        Creates:
143            self.previousResourceTimestamp (datetime): Last time a resource dict was created.  In UTC
144            self.previousResourceDictLoc (str): Where the file was located
145            self.previousResourceDict (dict): Keys are employee codes from KI.  Fields include name, role, department
146        """
147
148        previousLoc, self.previousResourceTimestamp = self._find_most_recent_file('resourceDict')
149        self.previousResourceDictLoc = self.RES_DATA_LOCATION + previousLoc
150        self.previousResourceDict = pickle.load(open(self.previousResourceDictLoc, 'rb'))
151        logger.info("Previous Resource dictionary loaded from  {0}".format(self.previousResourceDictLoc))
152
153    def _load_previous_project_data(self):
154        """
155        Loads previous project data.
156
157        Creates:
158            self.previousProjectTimestamp (datetime): Last time project data was created.  In UTC.
159            self.previousProjectDataLoc (str): Where the file was located
160            self.previousProjectData (dict): Project Data. Defined below.
161        """
162        previousLoc, self.previousProjectTimestamp = self._find_most_recent_file('projectData')
163        self.previousProjectDataLoc = self.PROJ_DATA_LOCATION + previousLoc
164        self.previousProjectData = pickle.load(open(self.previousProjectDataLoc, 'rb'))
165        logger.info("Previous Project Data loaded from:{0}".format(self.previousProjectDataLoc))
166
167    def _create_resource_dict(self, refresh_data=True):
168        """
169        This method creates the resource dict if refresh data is True.  If it is false or there is some error, the most recent dictionary is loaded.
170
171        Parameters:
172           refresh_data (bool): Whether to completely refresh data from the API or use non changed values.
173
174        Creates:
175            self.resourceJson (dict): JSON returned from API.  See URL definition below for filters and fields included.
176            self.resourceDict (dict): Keys are employee codes from KI.  Fields include name, role, department
177        """
178
179        if refresh_data:
180            try:
181                self.resourceJson = self._return_json_from_api('resource')
182                self.resourceDict = {}
183                for item in self.resourceJson:
184                    self.resourceDict[item['code']] = {'name': item['name'], 'role': item['primaryRole'],
185                                                       'dept': item['departmentDescription'], 'manager': item['lineManagerDescription']}
186
187            except:
188                logger.error("Resource Connection failed.  Using previous resource Dict")
189                self.resourceDict = self.previousResourceDict
190
191        else:
192            self.resourceDict = self.previousResourceDict
193
194        logger.info("Resource Dict: {0}".format(self.resourceDict))
195
196    def _get_project_data(self):
197        """
198        This method calls the project API, determines which projects have changed and then calls the assignment API.
199
200        Note that the employees attached have to come through two different calls.  The project call (where their roles are standardized)
201        and the assignment call which comes with those standardized names.
202
203        Parameters:
204
205
206        Creates:
207            self.projectJson (dict): JSON returned from API.  See URL definition below for filters and fields included.
208            self.changedProjects (array): Array of project codes that have changed since last check or are new.
209            self.projectData (dict): Has project codes, sponsors, projects, start date, last edit date and employees attached
210            self.assignmentJson: JSON returned from API.  See URL definition below for filters and fields included.
211
212        """
213        self.projectJson = self._return_json_from_api('project')
214
215        self.projectData = {}
216
217        for item in self.projectJson:
218            project_code = item['code']
219            code_data = {}
220            # emp match is to see if one employee has two different roles
221            emp_match = {}
222            code_data['description'] = item['description']
223            code_data['clientDescription'] = item['clientDescription']
224            code_data['startDate'] = item['startDate']
225
226            # dont include None, only valid references
227            if item.get('manager') is not None:
228                code_data['PM'] = {item['manager']}
229                emp_match[item['manager']] = {'PM'}
230
231            for field in item['customFieldValues']:
232                if field['key'] in role_key and len(field['value']) > 0:
233                    # edit date key
234                    if field['key'] == 218:
235                        code_data[field['name']] = field['value']
236                    else:
237                        ki_role = KI_ROLE_LU[field['name']]
238                        emp_id = field['value']
239                        if ki_role in code_data.keys():
240                            code_data[ki_role].add(emp_id)
241                        else:
242                            code_data[ki_role] = {emp_id}
243
244                        if emp_id in emp_match.keys():
245                            emp_match[emp_id].add(ki_role)
246                        else:
247                            emp_match[emp_id] = {ki_role}
248
249            # check if any employees are in two different roles, alert and remove
250            if len(emp_match) > 0:
251                employees_with_mult_roles = [k for k, v in emp_match.items() if len(v) > 1]
252                employees = list(emp_match.keys())
253                projRoles = [k for k, v in code_data.items() if k not in ignoredFields]
254
255                if len(employees_with_mult_roles) > 0:
256                    for e in employees_with_mult_roles:
257                        for role in projRoles:
258                            assignedEmployees = code_data[role]
259                            if e in assignedEmployees:
260                                code_data[role].remove(e)
261                                logger.warn("Employee {0} with multiple roles in project {1}. Removing {2} role.".format(
262                                    e, project_code, role))
263
264            else:
265                logger.info("No employees found for project {0}".format(project_code))
266
267            code_data['employees'] = employees
268            self.projectData[project_code] = code_data
269
270        self.assignmentJson = self._return_json_from_api('assignment')
271
272        self.changedProjects = []
273
274        # put the assignment data back into the projectData. However if the employee already has a home page role, do not add.
275        for item in self.assignmentJson:
276            mappedRole = KI_TASK_ROLE_LU.get(item['role'])
277
278            if item['resource'] in self.projectData[item['project']]['employees']:
279                # only log if employee is in different role from home page
280                if mappedRole not in self.projectData[item['project']].keys() or item['resource'] not in self.projectData[item['project']][mappedRole]:
281                    logger.info("Employee {0} already assigned to role in project {1} from home screen.  Ignoring addl role {2} via {3}."
282                                .format(item['resource'], item['project'], mappedRole, item['role']))
283
284            else:
285
286                if mappedRole in self.projectData[item['project']].keys():
287                    self.projectData[item['project']][mappedRole].add(item['resource'])
288                else:
289                    self.projectData[item['project']][mappedRole] = {item['resource']}
290
291        for key in self.projectData.keys():
292            if key not in self.previousProjectData.keys():
293                self.newProjectsDesc.append([self.projectData[key]['description']])
294                self.changedProjects.append(key)
295                self._create_folders(self.projectData[key]['description'], key)
296                logger.info("Project new since last check: {0}".format(key))
297            # have to subset the object for the keys we care about
298            elif ({i: self.projectData[key][i] for i in self.projectData[key].keys() if i not in ignoredFields} !=
299                  {i: self.previousProjectData[key][i] for i in self.previousProjectData[key].keys() if i not in ignoredFields}):
300                self.changedProjects.append(key)
301                logger.info("Project changed since last check: {0}".format(key))
302            else:
303                pass
304
305        logger.info("List of changed projects:{0}".format(str(self.changedProjects)))
306
307        """
308        Sometimes people have multiple roles within a project.  This section prioritizes and dedupes them all.
309
310        For instance, the validating programmer will also be a SAS-PRG.  However, they should not have the same permissions
311        as a standard SAS-PRG
312
313        """
314
315    def find_differences(self):
316        """
317        This finds differences between the current project data and most previous
318
319        Creates:
320            self.permissionChanges (array): Project, person, role and action to take for permissions changes
321
322        """
323        self.permissionChanges = []
324
325        if len(self.changedProjects) == 0:
326            logger.info("There are no changes between the new and most recent project data files.")
327
328        else:
329            logger.info("There are {0} changes  between the new and most recent project data files.".format(
330                str(len(self.changedProjects))))
331
332            for code in self.changedProjects:
333                logger.info("Starting {0}".format(code))
334                newData = self.projectData[code]
335                previousData = self.previousProjectData.get(code, None)
336
337                # this means is a new project, all permissions need to be added after the folders are created
338                if previousData is None:
339
340                    roles = newData.keys()
341
342                    roles = [r for r in roles if r not in ignoredFields]
343                    for role in roles:
344                        for resource in newData[role]:
345                            self._create_permissions_row(code, role, resource, 'allow')
346
347                else:
348                    # check to see if sponsor or project have changed names
349                    if newData['clientDescription'] != previousData['clientDescription']:
350                        logger.warn("Sponsor names have changed.  Previously {0}, now {1}".format(
351                            newData['clientDescription'], previousData['clientDescription']))
352                    if newData['description'] != previousData['description']:
353                        logger.warn("Study names have changed.  Previously {0}, now {1}".format(
354                            newData['description'], previousData['description']))
355
356                    roles = list(set(list(newData.keys()) + list(previousData.keys())))
357                    roles = [f for f in roles if f not in ignoredFields]
358
359                    for role in roles:
360                        if role not in previousData.keys():
361                            for resource in newData[role]:
362                                self._create_permissions_row(code, role, resource, 'allow')
363                        elif role not in newData.keys():
364                            for resource in previousData[role]:
365                                self._create_permissions_row(code, role, resource, 'deny')
366                        elif previousData[role] == newData[role]:
367                            pass
368                        else:
369                            adds = newData[role] - previousData[role]
370                            for resource in adds:
371                                self._create_permissions_row(code, role, resource, 'allow')
372                            rems = previousData[role] - newData[role]
373                            for resource in rems:
374                                self._create_permissions_row(code, role, resource, 'deny')
375
376    def _find_most_recent_file(self, typ):
377        """
378        This finds the most recent file for a given type.  Since files are all in the same folder it needs to determine which one
379
380        Creates:
381            typ (string): Either projectData or resourceDict
382
383        """
384
385        if typ == 'projectData':
386            eligible_files = [f for f in os.listdir(self.PROJ_DATA_LOCATION)
387                              if os.path.isfile(os.path.join(self.PROJ_DATA_LOCATION, f))]
388        elif typ == 'resourceDict':
389            eligible_files = [f for f in os.listdir(self.RES_DATA_LOCATION)
390                              if os.path.isfile(os.path.join(self.RES_DATA_LOCATION, f))]
391        else:
392            raise ValueError("{0} not a valid type".format(typ))
393
394        most_recent_file = sorted(eligible_files, reverse=True)[0]
395        previous_timestamp = datetime.strptime(most_recent_file.replace(
396            typ + '_', '').replace('.pkl', ' +0000'), '%Y-%m-%d_%H%M%S %z')
397        logger.info("Most recent {1} timestamp is {0}".format(previous_timestamp, typ))
398
399        logger.info("Most recent file of type {0} is {1}".format(typ, most_recent_file))
400
401        return(most_recent_file, previous_timestamp)
402
403    def _return_json_from_api(self, api_type, additional_info=False):
404        """
405        Returns a json from the api specificed.
406
407
408        Parameters:
409            api_type (string): One of the three APIs that will be called.
410            **kwargs (dict): the assignment API needs two additional paramtners, a list of projects and the last edit date
411        Returns:
412            api_json (dict): The desired JSON
413        """
414
415        if api_type == 'resource':
416            url = 'https://api.keyedinprojects.com/V3/api/search/resource?fields=code,name,department,primaryRole,lineManager&resultsPerPage=1000&criteria=active=True&pageNumber='
417        elif api_type == 'project':
418            url = 'https://api.keyedinprojects.com/V3/api/search/project?resultsPerPage=1000&criteria=code contains(PRJ) and active=-1&pageNumber='
419        # this is a custom API call based on a report.  Edits to the report on the UI would have to take place to change it
420        elif api_type == 'assignment':
421            url = 'https://api.keyedinprojects.com/V3/api/report?resultsPerPage=1000&key=277&pageNumber='
422        else:
423            logger.error("Invalid api type: {0}".format(api_type))
424
425        logger.info("API type of {0} with url {1} called.".format(api_type, url))
426
427        complete = False
428        api_json = []
429        page_number = 1
430        attempts = 1
431
432        # the idea behind this is that for every page needed, it tries to obtain it three times before moving on
433        # maybe should wait or something in case that is the issue.
434        # needs further testing.
435
436        while not complete:
437            attempts = 1
438            while attempts <= 3:
439                try:
440                    call = url + str(page_number)
441                    self.connection = httpx.get(call, auth=(configSettings['KI_USER'], configSettings['KI_PW']))
442                    if self.connection.status_code == 200:
443                        logger.info("{0} Connection {1} successful: {2}".format(api_type, str(page_number), call))
444                        api_json += self.connection.json()['Data']
445                        if self.connection.json()['PageNumber'] >= self.connection.json()['TotalPages']:
446                            complete = True
447                            return(api_json)
448                        else:
449                            page_number += 1
450                    else:
451                        logger.error("Connection failed with status code:{0}: {1}".format(
452                            str(self.connection.status_code)), call)
453                except:
454                    logger.error("API return failure on attempt {0}.".format(attempts))
455                    attempts += 1
456
457    def _create_permissions_row(self, code, role, resource, typ):
458        """
459        Creates a row for a csv that will be acted on for permissions
460
461        Parameters:
462            code (string): Project ID
463            role (string): The role of the person
464            resource (string): Persons name
465            typ (string): Either to be added or removed.
466
467        Creates:
468            self.permissionChanges (array): List of changes to be made.
469        """
470
471        # based on the role of the person need to determine location of permissions
472        # directors and some departments have full access, this is where they are filtered.
473
474        try:
475            resourceInfo = self.resourceDict[resource]
476
477        except KeyError:
478            logger.warn("Resource not found in dict. {0}".format(resource))
479            return
480        # people with the following primary role are out of scope for this
481
482        if resourceInfo['role'] in ['AD-EXEC', 'AD-FIN-HR', 'AD-IT', 'AD-IT-INT', 'AD-QA', 'BD', 'BS-AS-DIR', 'BS-DIR',
483                                    'BS-PRIN-RS', 'DM-DIR', 'SSC-AI', 'SSC-CSO']:
484            logger.info('{0} with primary role {1} out of scope.'.format(resourceInfo['name'], resourceInfo['role']))
485        else:
486
487            perChanges = (self.permission_mappings.copy()[(self.permission_mappings['role'] == role) &
488                                                          (self.permission_mappings['action'] == typ)])
489            for index, row in perChanges.iterrows():
490                if code not in self.project_mappings.index:
491                    logger.warn('Project Code not in project mappings'.format(code))
492                    return
493
494                try:
495                    folder = self.project_mappings.at[code, row['location']]
496                except KeyError:
497                    logger.warn("Folder not found. {0}".format(str([code, role, row['location']])))
498
499                if pd.isnull(folder):
500                    logger.warn("Folder not found. {0}".format(str([code, role, row['location']])))
501                else:
502
503                    newRow = [folder, row['role'], resourceInfo['name'], row['permissionChange']]
504                    self.permissionChanges.append(newRow)
505                    logger.info("Permission changed updated: {0}".format(str(newRow)))
506
507    def _create_folders(self, project_name, project_key):
508        """
509        Creates new folders
510
511        Parameters:
512            project_name (string): Name of the new project that needs folders to be created.
513            project_key(string)L Project ID
514        """
515        # need to change this so there is a sponsor/study nomenclature, dummy for now
516        try:
517            sponsor, study = (project_name).split('_', 1)
518        except:
519            logger.error("Project Name not in correct format: {0}".format(project_name))
520            return
521        stats_loc = self.STATS_LOCATION + sponsor + '\\' + study
522        dm_loc = self.DM_LOCATION + sponsor + '\\' + study
523
524        try:
525            shutil.copytree(self.STATS_COPY_LOCATION, stats_loc)
526            logger.info('Stats directory created at: {0}'.format(stats_loc))
527        except:
528            logger.warn('Stats directory already exists at: {0}'.format(stats_loc))
529        try:
530            shutil.copytree(self.DM_COPY_LOCATION, dm_loc)
531            logger.info('DM directory created at: {0}'.format(dm_loc))
532        except:
533            logger.warn('DM directory already exists at: {0}'.format(dm_loc))
534
535        # if thens because if the stats_copy_location changes, its possible these are in different places.
536        if os.path.isdir(stats_loc + '\\Statistics\\Programs\\Primary Programs'):
537            primProg = stats_loc + '\\Statistics\\Programs\\Primary Programs'
538        else:
539            primProg = ''
540            logger.warn("Project Mappings primary program not found for {0} {1}".format(project_name, project_name))
541
542        if os.path.isdir(stats_loc + '\\Statistics\\Programs\\Validation Programs'):
543            valProg = stats_loc + '\\Statistics\\Programs\\Validation Programs'
544        else:
545            valProg = ''
546            logger.warn("Project Mappings validation program not found for {0} {1}".format(project_name, project_name))
547
548        if os.path.isdir(stats_loc + '\\Statistics\\Randomization'):
549            rand = stats_loc + '\\Statistics\\Randomization'
550        else:
551            rand = ''
552            logger.warn("Project Mappings randomization not found for {0} {1}".format(project_name, project_name))
553
554        if os.path.isdir(stats_loc + '\\DMData'):
555            dmd = stats_loc + '\\DMData'
556        else:
557            dmd = ''
558            logger.warn("Project Mappings dmdata not found for {0} {1}".format(project_name, project_name))
559
560        newProject = pd.DataFrame([[dm_loc, stats_loc, primProg, valProg, rand, dmd]], columns=[
561            'dmDrive', 'statsDrive', 'primaryProgram', 'validationProgram', 'randomization', 'dmdata'], index=[project_key])
562        self.project_mappings = self.project_mappings.append(newProject)
563        logger.info("Project mappings created for {0} {1}".format(project_name, project_name))
564
565    def output_csv(self):
566        """
567        Outputs CSVs for powershell script to take action on.  Also for auditing
568
569
570        """
571        with open(self.OUTPUT_LOCATION + 'permissionsChanges_' + startTime + '.csv', 'w', newline="") as handle:
572            writer = csv.writer(handle)
573            writer.writerow(['folder', 'role', 'person', 'action'])
574            writer.writerows(self.permissionChanges)
575        logger.info("CSV succssfully output: {0} ".format(self.OUTPUT_LOCATION + 'permissionsChanges_' + startTime + '.csv'))
576
577        with open(self.OUTPUT_LOCATION + 'newProjects_' + startTime + '.csv', 'w', newline="") as handle:
578            writer = csv.writer(handle)
579            writer.writerow(['study'])
580            writer.writerows(self.newProjectsDesc)
581        logger.info("CSV succssfully output: {0} ".format(self.OUTPUT_LOCATION + 'newProjects_' + startTime + '.csv'))
582
583    def _get_excel_serial_date(self, dttm):
584        """
585        KeyedIn Assignment API uses the excel serial date format in eastern timezone.  This converts to that
586
587        Parameters:
588            dttm (datetime): datetime to calculate from
589
590
591        Creates:
592            self.excelSerialDate (float): The float date to be used
593        """
594        self.eastern = pytz.timezone('America/New_York')
595
596        temp = datetime(1899, 12, 30, tzinfo=self.eastern)   # Note, not 31st Dec but 30th!
597
598        dttm = dttm.astimezone(self.eastern)
599
600        delta = dttm - temp
601        self.excelSerialDate = round(float(delta.days) + (float(delta.seconds) / 86400), 3)
602        logger.info("Excel Serial Date is: {0}".format(self.excelSerialDate))
603
604    def wrap_up(self):
605        """
606        Compresses older files for space purposes.  Logs and old data compressed every month.
607
608        Outputs the current resourceDict and projectData as previous ones.
609
610        """
611
612        # if the month is different or the year (when Dec goes to Jan)
613        # needs more testing.
614        if startTimeStamp.month > self.previousProjectTimestamp.month or startTimeStamp.year > self.previousProjectTimestamp.year:
615            logger.info("New Month detected, zipping files: {0} to {1}".format(datetime.strftime(self.previousProjectTimestamp, '%Y-%m-%d'),
616                                                                               datetime.strftime(startTimeStamp, '%Y-%m-%d')))
617            self._zip_files(self.PROJ_DATA_LOCATION)
618            self._zip_files(self.RES_DATA_LOCATION)
619            self._zip_files(self.DATA_LOCATION + 'logs\\')
620            self._zip_files(self.DATA_LOCATION + 'json\\')
621
622        # save most rescent version, overwrite one that is there.
623        with open(self.PROJ_DATA_LOCATION + 'projectData_' + startTime + '.pkl', 'wb') as handle:
624            pickle.dump(self.projectData, handle)
625        logger.info("Project data saved as pickle file: {0}".format(
626            self.PROJ_DATA_LOCATION + 'projectData_' + startTime + '.pkl'))
627
628        with open(self.RES_DATA_LOCATION + 'resourceDict_' + startTime + '.pkl', 'wb') as handle:
629            pickle.dump(self.resourceDict, handle)
630        logger.info("Resource dict saved as pickle file: {0}".format(
631            self.RES_DATA_LOCATION + 'resourceDict_' + startTime + '.pkl'))
632
633        with open(self.DATA_LOCATION + 'json\\projectJson_' + startTime + '.pkl', 'wb') as handle:
634            pickle.dump(self.projectJson, handle)
635        logger.info("Project json saved as pickle file: {0}".format(
636            self.DATA_LOCATION + 'json\\projectJson_' + startTime + '.pkl'))
637
638        with open(self.DATA_LOCATION + 'json\\assignmentJson_' + startTime + '.pkl', 'wb') as handle:
639            pickle.dump(self.assignmentJson, handle)
640        logger.info("Assignment json saved as pickle file: {0}".format(
641            self.DATA_LOCATION + 'json\\assignmentJson_' + startTime + '.pkl'))
642
643        # resourceJson does not always exist.
644        try:
645            if len(self.resourceJson) > 0:
646                with open(self.DATA_LOCATION + 'json\\resourceJson_' + startTime + '.pkl', 'wb') as handle:
647                    pickle.dump(self.resourceJson, handle)
648                logger.info("Resource json saved as pickle file: {0}".format(
649                    self.DATA_LOCATION + 'json\\resourceJson_' + startTime + '.pkl'))
650        except:
651            logger.info("Resource json does not exist")
652
653        # need to make sure changes made because of new projects are available for future runs
654        self.project_mappings.to_csv('project_mappings.csv')
655
656    def _zip_files(self, loc):
657        """
658        Creates and archive, deletes the files in the current dir
659
660        Parameters:
661            loc (str): The file path of the directory to modify
662        """
663        logger.info("Starting archive process for {0}".format(loc))
664        loc_name = loc.split('\\')[-2]
665
666        fls = os.listdir(loc)
667        fls = [f for f in fls if f != 'mylog.log']
668        newest = sorted(fls, reverse=True)[0]
669        oldest = sorted(fls, reverse=False)[0]
670
671        if loc_name != 'logs':
672            newest_ts = datetime.strptime(newest.replace(loc_name + '_', '').replace('.pkl', ''), '%Y-%m-%d_%H%M%S')
673            oldest_ts = datetime.strptime(oldest.replace(loc_name + '_', '').replace('.pkl', ''), '%Y-%m-%d_%H%M%S')
674        else:
675
676            newest_ts = datetime.strptime(newest.replace('mylog.log.', '').replace('.pkl', ''), '%Y-%m-%d')
677            oldest_ts = datetime.strptime(oldest.replace('mylog.log.', '').replace('.pkl', ''), '%Y-%m-%d')
678
679        newest_ts_str = datetime.strftime(newest_ts, '%Y-%m-%d')
680        oldest_ts_str = datetime.strftime(oldest_ts, '%Y-%m-%d')
681
682        logger.info("{0} archive contains {1} files from dates {2} to {3}.".format(
683            loc, str(len(fls)), oldest_ts_str, newest_ts_str))
684
685        shutil.make_archive(self.DATA_LOCATION + 'archive\\' + loc_name + '_' + oldest_ts_str +
686                            '_' + newest_ts_str, root_dir=loc, format='tar', logger=logger)
687
688        for f in fls:
689            os.remove(loc + f)
690            logger.info("File removed at {0}".format(loc + f))