· 4 years ago · May 12, 2021, 01:32 PM
1{
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 493,
6 "id": "median-think",
7 "metadata": {},
8 "outputs": [],
9 "source": [
10 "import pymysql as ms\n",
11 "import pandas as pd\n",
12 "\n",
13 "connection = ms.connect(host='127.0.0.1',\n",
14 " user = 'root',\n",
15 " password = 'Jijineko827',\n",
16 " db = 'project',\n",
17 " cursorclass = ms.cursors.DictCursor)\n",
18 "cursor = connection.cursor()"
19 ]
20 },
21 {
22 "cell_type": "code",
23 "execution_count": 494,
24 "id": "another-brisbane",
25 "metadata": {},
26 "outputs": [],
27 "source": [
28 "factor = pd.read_csv(\"number-of-deaths-by-risk-factor.csv\", usecols = [0, 2, 23, 6, 29], names = [\"Location_ID\",\"Year\", \"Smoking_Deaths\", \"Household_Air Pollution_Deaths\", \"Outdoor Air Pollution Deaths\"], skiprows = 1)\n",
29 "delete = factor[(factor[\"Year\"] ==1990)|(factor[\"Year\"]== 1991)|(factor[\"Year\"]== 1992)|(factor[\"Year\"]== 1993)|(factor[\"Year\"]== 1994)|(factor[\"Year\"]== 1995)|(factor[\"Year\"]== 1996)|(factor[\"Year\"]== 1997)|(factor[\"Year\"]== 1998)|(factor[\"Year\"]== 1999)|(factor[\"Year\"]== 2000)|(factor[\"Year\"]== 2001)|(factor[\"Year\"]== 2002)|(factor[\"Year\"]== 2003)|(factor[\"Year\"]== 2004)|(factor[\"Year\"]== 2005)|(factor[\"Year\"]== 2006)|(factor[\"Year\"]== 2007)|(factor[\"Year\"]== 2008)|(factor[\"Year\"]== 2009)|(factor[\"Year\"]== 2010)|(factor[\"Year\"]== 2011)|(factor[\"Year\"]== 2012)|(factor[\"Year\"]== 2013)|(factor[\"Year\"]== 2014)|(factor[\"Year\"]== 2015)|(factor[\"Year\"]== 2016)].index\n",
30 "factor.drop(delete , inplace = True)\n",
31 "\n",
32 "PM25 = pd.read_csv(\"PM25.csv\", usecols = [0, 7, 8, 9], names = [\"Location_ID\", \"PM2015\", \"PM2016\", \"PM2017\"], skiprows = 1)\n",
33 "covid_case = pd.read_csv(\"COVID_Case.csv\",low_memory=False, usecols = [0,1, 2,7] ,names =[\"Location_ID\",\"Region\",\"cumulative_cases\", \"cumulative_death\"])\n",
34 "\n"
35 ]
36 },
37 {
38 "cell_type": "code",
39 "execution_count": 495,
40 "id": "available-bleeding",
41 "metadata": {},
42 "outputs": [],
43 "source": [
44 "fact = pd.merge(factor,PM25, how = \"inner\", on = \"Location_ID\")\n",
45 "joined = fact[[\"Location_ID\",\"PM2015\", \"PM2016\", \"PM2017\", \"Smoking_Deaths\", \"Household_Air Pollution_Deaths\", \"Outdoor Air Pollution Deaths\"]]\n",
46 "fact_table = pd.merge(joined, covid_case, how = \"inner\", on = \"Location_ID\")\n"
47 ]
48 },
49 {
50 "cell_type": "code",
51 "execution_count": 496,
52 "id": "stable-blank",
53 "metadata": {
54 "scrolled": true
55 },
56 "outputs": [],
57 "source": [
58 "Correlation_Fact = pd.merge(right = fact_table, left = covid_case, how = \"inner\")\n",
59 "Correlation_Fact = Correlation_Fact[[\"Location_ID\",\"PM2015\", \"PM2016\", \"PM2017\", \"Smoking_Deaths\",\"Household_Air Pollution_Deaths\", \"Outdoor Air Pollution Deaths\"]]\n",
60 "\n"
61 ]
62 },
63 {
64 "cell_type": "code",
65 "execution_count": 497,
66 "id": "circular-million",
67 "metadata": {
68 "scrolled": true
69 },
70 "outputs": [],
71 "source": [
72 "location_dim = pd.merge(right = fact, left = covid_case, how = \"inner\", on = \"Location_ID\")\n",
73 "location_dim = location_dim[[\"Location_ID\",\"Region\"]]\n"
74 ]
75 },
76 {
77 "cell_type": "code",
78 "execution_count": 498,
79 "id": "sixth-violin",
80 "metadata": {},
81 "outputs": [],
82 "source": [
83 "sql2 = \"\"\"CREATE TABLE IF NOT EXISTS Location_Dim (\n",
84 " Location_ID varchar(100), \n",
85 " Country varchar(100),\n",
86 " PRIMARY KEY (Location_ID)\n",
87 " );\"\"\"\n",
88 "\n",
89 "cursor.execute(sql2)\n",
90 "connection.commit()\n"
91 ]
92 },
93 {
94 "cell_type": "code",
95 "execution_count": 512,
96 "id": "monetary-biology",
97 "metadata": {},
98 "outputs": [],
99 "source": [
100 "sql = \"\"\"CREATE TABLE IF NOT EXISTS Correlation_Fact (\n",
101 " Location_ID varchar(100), \n",
102 " PM2015 varchar(1000), \n",
103 " PM2016 varchar(1000), \n",
104 " PM2017 varchar(1000), \n",
105 " Smoking_Deaths varchar(1000), \n",
106 " Household_Air_Pollution_Deaths varchar(1000), \n",
107 " Outdoor_Air_Pollution_Deaths varchar(1000),\n",
108 " FOREIGN KEY (Location_ID) REFERENCES Location_Dim(Location_ID)\n",
109 " );\"\"\"\n",
110 "\n",
111 "\n",
112 "cursor.execute(sql)\n",
113 "connection.commit()\n"
114 ]
115 },
116 {
117 "cell_type": "code",
118 "execution_count": 513,
119 "id": "activated-victim",
120 "metadata": {},
121 "outputs": [],
122 "source": [
123 "Correlation_Fact_data = [tuple(i) for i in Correlation_Fact.values.tolist()]\n",
124 "location_dim_data = [tuple(m) for m in location_dim.values.tolist()]\n"
125 ]
126 },
127 {
128 "cell_type": "code",
129 "execution_count": 514,
130 "id": "artistic-vegetarian",
131 "metadata": {},
132 "outputs": [],
133 "source": [
134 "cursor.execute(\"TRUNCATE TABLE Location_Dim;\")\n",
135 "connection.commit()\n",
136 "\n",
137 "insertData = \"INSERT INTO Location_Dim VALUES (%s, %s);\"\n",
138 "cursor.executemany(insertData, location_dim_data)\n",
139 "connection.commit()\n"
140 ]
141 },
142 {
143 "cell_type": "code",
144 "execution_count": 515,
145 "id": "capable-avenue",
146 "metadata": {},
147 "outputs": [],
148 "source": [
149 "cursor.execute(\"TRUNCATE TABLE Correlation_Fact;\")\n",
150 "connection.commit()"
151 ]
152 },
153 {
154 "cell_type": "code",
155 "execution_count": 516,
156 "id": "sticky-ireland",
157 "metadata": {},
158 "outputs": [],
159 "source": [
160 "\n",
161 "insertData_fact = \"INSERT INTO Correlation_Fact VALUES (%s,%s,%s,%s,%s,%s,%s);\"\n",
162 "cursor.executemany(insertData_fact, Correlation_Fact_data)\n",
163 "connection.commit()\n"
164 ]
165 },
166 {
167 "cell_type": "code",
168 "execution_count": null,
169 "id": "consecutive-preference",
170 "metadata": {},
171 "outputs": [],
172 "source": []
173 }
174 ],
175 "metadata": {
176 "kernelspec": {
177 "display_name": "Python 3",
178 "language": "python",
179 "name": "python3"
180 },
181 "language_info": {
182 "codemirror_mode": {
183 "name": "ipython",
184 "version": 3
185 },
186 "file_extension": ".py",
187 "mimetype": "text/x-python",
188 "name": "python",
189 "nbconvert_exporter": "python",
190 "pygments_lexer": "ipython3",
191 "version": "3.8.2"
192 }
193 },
194 "nbformat": 4,
195 "nbformat_minor": 5
196}
197