· 7 years ago · Oct 11, 2018, 08:02 AM
1import psycopg2
2
3# --- SETUP ---
4# The only things that need to be filled in to run the script:
5# dbname, user, password
6# and
7# year, old data filename, category, empty string for later
8input_for_script = (2016, 'ongeval.csv', 'ongevallen', '')
9
10#tables = ["ongevallen_2016.csv", "ongevallen_2017.csv"]
11#change input to
12#old_data = ongeval.csv
13#data_to_add = [data1, data2]
14
15# Connect to an existing database
16conn = psycopg2.connect("dbname=rws_2011_2017 user=postgres password=Prandall19s!")
17
18# --- EXECUTION ---
19
20#This code adds a new year of RWS data to the old dataset.
21#Requirements: 1. all tables are located in the same schema (old years, new year to add, reference tables)
22# 2. the table will only join the attributes from the old table
23# 3. the names of the reference tables end in '.txt.csv' */
24
25# Open a cursor to perform database operations
26cur = conn.cursor()
27
28# Creates settings table. input it takes:
29# the year (to add),
30# the name of the old table (containing the previous years)
31# and category (partij/ongeval/slachtoffer/voertuig/etcetera)
32cur.execute("""
33DROP TABLE IF EXISTS _rws_settings;
34CREATE TABLE _rws_settings (
35 year_to_add INT,
36 old_data text,
37 category text,
38 year_filename text);
39""")
40
41cur.execute("""
42INSERT INTO _rws_settings
43 (year_to_add, old_data, category, year_filename)
44VALUES
45 (%s, %s, %s, %s);
46 """,
47 input_for_script)
48
49cur.execute("""
50UPDATE _rws_settings
51SET year_filename = CONCAT(_rws_settings.category, '_', _rws_settings.year_to_add, '.csv');
52""")
53
54
55# Creates lookup table _rws_datatypes, which states the column names and datatypes of old_data from _rws_settings
56cur.execute("""
57DROP TABLE IF EXISTS _rws_datatypes;
58CREATE TABLE _rws_datatypes AS
59SELECT column_name, data_type FROM INFORMATION_SCHEMA.columns
60WHERE table_name = (SELECT old_data FROM _rws_settings);
61""")
62
63# Creates lookup table _rws_references: It takes the columns ending in '_ID' or '_CODE' and the name of the reference table.
64# With this, it'll be possible to look up whether columns in the new table (year to add) have to be transformed into '_OMS'.
65cur.execute("""
66DROP TABLE IF EXISTS _rws_references;
67CREATE TABLE _rws_references AS
68SELECT column_name, table_name FROM INFORMATION_SCHEMA.columns
69WHERE table_name LIKE '%.txt.csv'
70AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
71""")
72
73# Selects all columns from new table that end in '_ID' or '_CODE', and outputs it into a Python list
74cur.execute("""
75DROP TABLE IF EXISTS _rws_columns_to_convert;
76CREATE TABLE _rws_columns_to_convert AS
77SELECT column_name FROM INFORMATION_SCHEMA.columns
78WHERE table_name LIKE (SELECT _rws_settings.year_filename FROM _rws_settings)
79AND (column_name LIKE '%\_ID' OR column_name LIKE '%\_CODE');
80""")
81
82#Makes a list on what columns need to be converted with a reference table
83cur.execute("""
84SELECT _rws_columns_to_convert."column_name" FROM _rws_columns_to_convert;
85""")
86list_to_convert = cur.fetchall()
87#remove '(' and '),'
88list_to_convert = [i[0] for i in list_to_convert]
89
90
91# --- DIT IS WAT RUDY GEDAAN HEEFT OMG
92#Query voor de loop, selecteert de juiste reference table gebaseerd op het iterable item in de loop
93get_table_name_query = """
94SELECT table_name from _rws_references WHERE column_name = '{0}'
95"""
96
97#Create tables (tables to convert/merge) ["ongevallen_2016.csv", "ongevallen_2017.csv"]
98#create all data table and fill it with the old data
99tables = ["ongevallen_2016.csv"]
100all_data = cur.execute("""
101DROP TABLE IF EXISTS _rws_all_temp;
102CREATE TABLE _rws_all_temp AS SELECT * FROM (SELECT _rws_settings.old_data FROM _rws_settings) AS derived_temp
103""")
104
105#function that converts _ID or _CODE into _OMS
106#0= data table (2016 for example) - table_to_convert
107#1= the reference table
108#2=item name (but the _OMS version, needs function turn_id_or_code_into_oms)
109#3=item name, iterable (_ID or _CODE)
110def run_rudy_query(table_to_convert, from_table, item_name):
111 rudy_query = """
112 SELECT "{0}".*, oms_table."{2}"
113 FROM "{0}"
114 LEFT JOIN "{1}" as oms_table
115 ON "{0}"."{3}" = oms_table."{3}"
116 """
117
118 finished_query = rudy_query.format(table_to_convert, from_table, turn_id_or_code_into_oms(item_name), item_name)
119 print(finished_query)
120 cur.execute(rudy_query)
121
122#changes extension _ID and _CODE into _OMS (for easy lookup in reference table)
123def turn_id_or_code_into_oms(name):
124 if(name.find("_CODE")):
125 return name.replace("_CODE", "_OMS")
126 else:
127 return name.replace("_ID", "_OMS")
128
129# Loop through all the tables and convertable items, find all reference tables and start function run_rudy_query
130for table in tables:
131 for item in list_to_convert:
132 print('ref table name: ', get_table_name_query.format(item))
133 from_table = cur.execute(get_table_name_query.format(item))
134 run_rudy_query(table, from_table, turn_id_or_code_into_oms(item))
135
136#def update_employee(id, name, birthdate):
137# cur.execute("UPDATE employees SET name={0}, birthdate={1} WHERE ID = {2}".format(name, birthdate, id))
138
139# --- TOT HIER, OK?
140
141
142# --- THIS IS NOT WORKING, KEEPING IT HERE FOR REFERENCE
143# per column name to convert:
144# find reference file to use
145# use that file for a left join (create a new table the first time)
146#for item in list_to_convert:
147# print(item)
148# reference_table = (("""SELECT _rws_references."table_name" FROM _rws_references WHERE _rws_references."column_name" LIKE %s """), item)
149# rws_data = ("""SELECT _rws_settings.year_filename FROM _rws_settings""")
150
151# cur.execute("""DROP TABLE IF EXISTS _year_to_add_oms;
152# CREATE TABLE _year_to_add_oms AS
153# SELECT * FROM (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table1, %s
154# LEFT JOIN (SELECT _rws_settings.year_filename FROM _rws_settings) AS _derived_table2 ON
155# (SELECT * FROM (SELECT _rws_columns_to_convert.%s FROM _rws_columns_to_convert) AS derived_table5)
156# = (SELECT * FROM (SELECT _rws_references.%s FROM _rws_references) AS _derived_table4)""", reference_table, , rws_data, rws_data, item)
157# --- END OF NON-WORKING CODE
158
159# --- TO DO
160#change value from null-columns to correct datatype
161#check for and add missing columns
162#remove unnecessary columns
163#union old and new data
164#create index? aliases?
165# create linking tables (maybe other script)
166
167# --- FINISH UP RUNNING THE SCRIPT
168# Make the changes to the database persistent
169conn.commit()
170
171# Close communication with the database
172cur.close()
173conn.close()