· 7 years ago · Dec 07, 2018, 08:34 AM
1import pdb
2import copy
3import threading
4from itertools import product
5import datetime
6import logging
7
8from . import setup_environment
9
10log = logging.getLogger(__name__)
11
12
13def create_labels_table(config, table_name):
14 """Build the features table for the type of model (officer/dispatch) specified in the config file"""
15
16 engine = setup_environment.get_database()
17 if config['unit'] == 'officer':
18 create_officer_labels_table(config, table_name, engine)
19 # TODO:
20 #if config['unit'] == 'dispatch':
21 # create_dispatch_labels_table(config, table_name)
22
23
24def populate_labels_table(config, labels_config, table_name):
25 """Calculate values for all features which are set to True (in the config file)
26 for the appropriate run type (officer/dispatch)
27 """
28 engine = setup_environment.get_database()
29 if config['unit'] == 'officer':
30 populate_officer_labels_table(config, labels_config, table_name, engine)
31
32def create_officer_labels_table(config, table_name, engine):
33 """ Creates a features.table_name table within the features schema """
34
35
36 # drop the old features table
37 log.info("Dropping the old officer labels table: {}".format(table_name))
38 engine.execute("DROP TABLE IF EXISTS features.{}".format(table_name) )
39
40 # use the appropriate id column, depending on feature types (officer / dispatch)
41 id_column = '{}_id'.format(config['unit'])
42
43 # Create and execute a query to create a table with a column for each of the labels.
44 log.info("Creating new officer feature table: {}...".format(table_name))
45 create_query = ( "CREATE TABLE features.{} ( "
46 " {} int, "
47 " event_id int, "
48 " event_datetime timestamp, "
49 " event_type text, "
50 " value text);"
51 .format(
52 table_name,
53 id_column))
54
55 engine.execute(create_query)
56 query_index = ("CREATE INDEX ON features.{} (event_datetime, officer_id)".format(table_name))
57 query_index = ("CREATE INDEX ON features.{} (event_id)".format(table_name))
58 engine.execute(query_index)
59
60def column_date(nested_dict, dict_columns=dict()):
61 temp_dict= {}
62 if isinstance(nested_dict, dict):
63 temp_dict[nested_dict['COLUMN']] = nested_dict['DATE_COLUMN']
64 dict_columns.update(temp_dict)
65 for val in nested_dict['VALUES']:
66 if isinstance(val, dict):
67 for key in val.keys():
68 column_date(val[key], dict_columns)
69 return dict_columns
70
71def populate_officer_labels_table(config, labels_config, table_name, engine):
72 """ Populates officer labels table in the database using staging.incidents.
73 """
74
75 dict_columns = dict()
76 for labels in labels_config.keys():
77 dict_columns.update(column_date(labels_config[labels], dict_columns))
78
79 query_list = []
80 for column, date_column in dict_columns.items():
81 query_list.append("SELECT officer_id, "
82 " event_id, "
83 " {event_datetime} as event_datetime, "
84 " '{event_type}' as event_type, "
85 " {event_type}::TEXT as value "
86 " FROM staging.incidents "
87 " WHERE {event_type}::TEXT is not NULL "
88 " AND {event_datetime} is not NULL "
89 " AND officer_id is not NULL "
90 .format(event_datetime=date_column,
91 event_type=column))
92
93 query_join = " UNION ".join(query_list)
94 insert_query = ( "INSERT INTO features.{0} "
95 " ( officer_id, "
96 " event_id, "
97 " event_datetime, "
98 " event_type, "
99 " value ) "
100 " {1} "
101 .format(table_name, query_join))
102
103 engine.execute(insert_query)
104
105 # Create indexes
106 create_event_id_idx = (""" Create index on features.{0} (officer_id, event_id); """.format(table_name))
107 engine.execute(create_event_id_idx)