· 7 years ago · Oct 11, 2018, 08:56 AM
1import psycopg2
2
3# --- SETUP ---
4# The only things that need to be filled in to run the script:
5# dbname, user, password
6# and
7# year, old data filename, category, empty string for later
8input_for_script = (2016, 'ongeval.csv', 'ongevallen', '')
9
10#tables = ["ongevallen_2016.csv", "ongevallen_2017.csv"]
11#change input to
12#old_data = ongeval.csv
13#data_to_add = [data1, data2]
14
15# Connect to an existing database
16conn = psycopg2.connect("dbname=rws_2011_2017 user=postgres password=Prandall19s!")
17
18# --- EXECUTION ---
19
20#This code adds a new year of RWS data to the old dataset.
21#Requirements: 1. all tables are located in the same schema (old years, new year to add, reference tables)
22# 2. the table will only join the attributes from the old table
23# 3. the names of the reference tables end in '.txt.csv' */
24
25# Open a cursor to perform database operations
26cur = conn.cursor()
27
28# Creates settings table. input it takes:
29# the year (to add),
30# the name of the old table (containing the previous years)
31# and category (partij/ongeval/slachtoffer/voertuig/etcetera)
32cur.execute("""
33DROP TABLE IF EXISTS _rws_settings;
34CREATE TABLE _rws_settings (
35 year_to_add INT,
36 old_data text,
37 category text,
38 year_filename text);
39""")
40
41cur.execute("""
42INSERT INTO _rws_settings
43 (year_to_add, old_data, category, year_filename)
44VALUES
45 (%s, %s, %s, %s);
46 """,
47 input_for_script)
48
49cur.execute("""
50UPDATE _rws_settings
51SET year_filename = CONCAT(_rws_settings.category, '_', _rws_settings.year_to_add, '.csv');
52""")
53
54
55# Creates lookup table _rws_datatypes, which states the column names and datatypes of old_data from _rws_settings
56cur.execute("""
57DROP TABLE IF EXISTS _rws_datatypes;
58CREATE TABLE _rws_datatypes AS
59SELECT column_name, data_type FROM INFORMATION_SCHEMA.columns
60WHERE table_name = (SELECT old_data FROM _rws_settings);
61""")
62
63# Creates lookup table _rws_references: It takes the columns ending in '_ID' or '_CODE' and the name of the reference table.
64# With this, it'll be possible to look up whether columns in the new table (year to add) have to be transformed into '_OMS'.
65cur.execute("""
66DROP TABLE IF EXISTS _rws_references;
67CREATE TABLE _rws_references AS
68SELECT column_name, table_name FROM INFORMATION_SCHEMA.columns
69WHERE table_name LIKE '%.txt.csv'
70AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
71""")
72
73# Selects all columns from new table that end in '_ID' or '_CODE', and outputs it into a Python list
74cur.execute("""
75DROP TABLE IF EXISTS _rws_columns_to_convert;
76CREATE TABLE _rws_columns_to_convert AS
77SELECT column_name FROM INFORMATION_SCHEMA.columns
78WHERE table_name LIKE (SELECT _rws_settings.year_filename FROM _rws_settings)
79AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
80""")
81
82#Makes a list on what columns need to be converted with a reference table
83cur.execute("""
84SELECT _rws_columns_to_convert."column_name" FROM _rws_columns_to_convert;
85""")
86list_to_convert = cur.fetchall()
87#remove '(' and '),'
88list_to_convert = [i[0] for i in list_to_convert]
89
90print('reference table name manual: ', cur.execute("""SELECT table_name from _rws_references WHERE column_name = 'AOL_ID'"""))
91
92
93# --- DIT IS WAT RUDY GEDAAN HEEFT OMG
94#Query voor de loop, selecteert de juiste reference table gebaseerd op het iterable item in de loop
95get_table_name_query = """
96SELECT table_name from _rws_references WHERE column_name = '{0}'
97"""
98
99#Create tables (tables to convert/merge) ["ongevallen_2016.csv", "ongevallen_2017.csv"]
100#create all data table and fill it with the old data
101tables = ["ongevallen_2016.csv"]
102all_data = cur.execute("""
103DROP TABLE IF EXISTS _rws_all_temp;
104CREATE TABLE _rws_all_temp AS SELECT * FROM (SELECT _rws_settings.old_data FROM _rws_settings) AS derived_temp
105""")
106
107#function that converts _ID or _CODE into _OMS
108#0= data table (2016 for example) - table_to_convert
109#1= the reference table
110#2=item name (but the _OMS version, needs function turn_id_or_code_into_oms)
111#3=item name, iterable (_ID or _CODE)
112def run_rudy_query(table_to_convert, from_table, item_name, item_oms):
113 rudy_query = """
114 SELECT "{0}".*, oms_table."{3}"
115 FROM "{0}"
116 LEFT JOIN "{1}" as oms_table
117 ON "{0}"."{2}" = oms_table."{2}"
118 """
119
120 finished_query = rudy_query.format(table_to_convert, from_table, item_name, item_oms)
121 print('finished query: ', finished_query)
122 cur.execute(finished_query)
123
124#changes extension _ID and _CODE into _OMS (for easy lookup in reference table)
125def turn_id_or_code_into_oms(name):
126 if("_CODE" in name):
127 return name.replace("_CODE", "_OMS")
128 else:
129 return name.replace("_ID", "_OMS")
130
131# Loop through all the tables and convertable items, find all reference tables and start function run_rudy_query
132for table in tables:
133 for item in list_to_convert:
134 #print(cur.execute(get_table_name_query.format(item)))
135 print('ref table name automatic: ', cur.execute(get_table_name_query.format(item)))
136 print('OMS:', turn_id_or_code_into_oms(item))
137 from_table = cur.execute(get_table_name_query.format(item))
138 #from_table = cur.fetchone()[0]
139 run_rudy_query(table, from_table, item, turn_id_or_code_into_oms(item))
140
141
142#def update_employee(id, name, birthdate):
143# cur.execute("UPDATE employees SET name={0}, birthdate={1} WHERE ID = {2}".format(name, birthdate, id))
144
145# --- TOT HIER, OK?
146
147
148# --- THIS IS NOT WORKING, KEEPING IT HERE FOR REFERENCE
149# per column name to convert:
150# find reference file to use
151# use that file for a left join (create a new table the first time)
152#for item in list_to_convert:
153# print(item)
154# reference_table = (("""SELECT _rws_references."table_name" FROM _rws_references WHERE _rws_references."column_name" LIKE %s """), item)
155# rws_data = ("""SELECT _rws_settings.year_filename FROM _rws_settings""")
156
157# cur.execute("""DROP TABLE IF EXISTS _year_to_add_oms;
158# CREATE TABLE _year_to_add_oms AS
159# SELECT * FROM (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table1, %s
160# LEFT JOIN (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table2 ON
161# (SELECT * FROM (SELECT _rws_columns_to_convert.%s FROM _rws_columns_to_convert) AS derived_table5)
162# = (SELECT * FROM (SELECT _rws_references.%s FROM _rws_references) AS _derived_table4)""", reference_table, , rws_data, rws_data, item)
163# --- END OF NON-WORKING CODE
164
165# --- TO DO
166#change value from null-columns to correct datatype
167#check for and add missing columns
168#remove unnecessary columns
169#union old and new data
170#create index? aliases?
171# create linking tables (maybe other script)
172
173# --- FINISH UP RUNNING THE SCRIPT
174# Make the changes to the database persistent
175conn.commit()
176
177# Close communication with the database
178cur.close()
179conn.close()