· 6 years ago · May 16, 2019, 11:08 PM
1#Create Schema
2pgquery (conn, """ CREATE SCHEMA IF NOT EXISTS Cyclability;
3set search_path = Cyclability;""", msg = "create new schema Cyclability ")
4
5#Read data
6statistical_areas = pd.read_csv('StatisticalAreas.csv',na_values = '')
7neighbourhoods = pd.read_csv('Neighbourhoods.csv', na_values = '')
8census_stats = pd.read_csv('CensusStats.csv',na_values='')
9business_stats = pd.read_csv('BusinessStats.csv',na_values='')
10business_stats = business_stats.dropna(subset=['num_businesses'])
11bike_sharing_pods = pd.read_csv('BikeSharingPods.csv',na_values='')
12
13#Drop tables
14pgquery(conn, """DROP TABLE IF EXISTS Neighbourhoods;
15DROP TABLE IF EXISTS CensusStats;
16DROP TABLE IF EXISTS BusinessStats;
17DROP TABLE IF EXISTS BikeSharingPods;
18DROP TABLE IF EXISTS StatisticalAreas;
19""",msg = 'Tables dropped')
20
21#Create tables and insert values
22
23pgquery(conn, """CREATE TABLE IF NOT EXISTS StatisticalAreas(
24area_id VARCHAR PRIMARY KEY,
25area_name VARCHAR NOT NULL,
26parent_area_id VARCHAR NOT NULL
27)""", msg = 'StatisticalAreas Table created')
28
29insert_stmt = """INSERT INTO StatisticalAreas(area_id,area_name,parent_area_id)
30 VALUES (%s,%s,%s)"""
31for idx,row in statistical_areas.iterrows():
32 pgexec (conn, insert_stmt, row, "row inserted", silent = True)
33
34pgquery(conn, """CREATE TABLE IF NOT EXISTS Neighbourhoods(
35area_id VARCHAR REFERENCES StatisticalAreas(area_id),
36area_name VARCHAR NOT NULL,
37land_area FLOAT NOT NULL,
38population INTEGER NOT NULL,
39number_of_dwellings INTEGER NOT NULL,
40number_of_businesses INTEGER NOT NULL
41) """, msg = ' Neighbourhood Table created')
42
43insert_stmt = """INSERT INTO Neighbourhoods(area_id,area_name,land_area,population,number_of_dwellings,number_of_businesses)
44 VALUES (%s,%s,%s,%s,%s,%s)"""
45for idx,row in neighbourhoods.iterrows():
46 pgexec (conn, insert_stmt, row, "row inserted", silent = True)
47
48pgquery(conn, """ CREATE TABLE IF NOT EXISTS CensusStats(
49area_id VARCHAR REFERENCES StatisticalAreas(area_id),
50median_annual_household_income INTEGER,
51avg_monthly_rent INTEGER
52) """, msg = 'CensusStats Table created')
53
54insert_stmt = """INSERT INTO CensusStats(area_id,median_annual_household_income,avg_monthly_rent)
55 VALUES (%s,%s,%s)"""
56for idx,row in census_stats.iterrows():
57 pgexec (conn, insert_stmt, row, "row inserted", silent = False)
58
59pgquery(conn, """CREATE TABLE IF NOT EXISTS BusinessStats(
60area_id VARCHAR REFERENCES StatisticalAreas(area_id),
61num_businesses INTEGER,
62retail_trade INTEGER,
63accomodation_and_food_services INTEGER,
64health_care_and_social_assistance INTEGER,
65education_and_training INTEGER,
66arts_and_recreation_services INTEGER
67) """, msg = 'BusinessStats Table created')
68
69insert_stmt = """INSERT INTO BusinessStats(area_id,num_businesses,retail_trade,accomodation_and_food_services,health_care_and_social_assistance,education_and_training,arts_and_recreation_services)
70 VALUES (%s,%s,%s,%s,%s,%s,%s)"""
71for idx,row in business_stats.iterrows():
72 pgexec (conn, insert_stmt, row, "row inserted", silent = True)
73
74
75pgquery(conn, """CREATE TABLE IF NOT EXISTS BikeSharingPods(
76station_id VARCHAR PRIMARY KEY,
77name VARCHAR UNIQUE,
78num_bikes INTEGER,
79num_scooters INTEGER,
80latitiude FLOAT,
81longitude FLOAT,
82description VARCHAR
83) """, msg = 'BikeSharingPods Table created')
84
85insert_stmt = """INSERT INTO BikeSharingPods(station_id,name,num_bikes,num_scooters,latitiude,longitude,description)
86 VALUES (%s,%s,%s,%s,%s,%s,%s)"""
87for idx,row in bike_sharing_pods.iterrows():
88 pgexec (conn, insert_stmt, row, "row inserted", silent = True)