· 6 years ago · Jan 21, 2020, 11:18 AM
1#!/usr/bin/python3
2import psycopg2
3from Object_classes import Gen
4from Object_classes import Eiwit
5from Object_classes import RNA
6from Object_classes import EC_nummer
7from Object_classes import Pathway
8from Object_classes import Cancer_pathway
9
10import Insert_functions
11
12
13# Een functie om met een database te connecten, het returned de connection en de cursor.
14def connect(host, dbname, user, password):
15 # connectie maken met de database en postgres
16 conn_string = "host= '%s' dbname= '%s' user= '%s' password='%s'" % (host, dbname, user, password)
17 # print("Connecting to database...\n ->%s" % (conn_string))
18 print("Connecting to database...")
19 conn = psycopg2.connect(conn_string)
20 # cursor aangemaakt om SQL commando's uit te voeren
21 cursor = conn.cursor()
22 print("Connected!\n")
23 return conn, cursor
24
25
26# Leegt de database als hij gecalled wordt, en verbonden is.
27def empty_database(ed_cursor):
28 # drop de tabellen als ze al bestaan
29 ed_cursor.execute("""DROP TABLE IF EXISTS gen_A6 CASCADE;""")
30 ed_cursor.execute("""DROP TABLE IF EXISTS RNA_A6 CASCADE;""")
31 ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_A6 CASCADE;""")
32 ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_EC_A6 CASCADE;""")
33 ed_cursor.execute("""DROP TABLE IF EXISTS pathway_A6 CASCADE;""")
34 ed_cursor.execute("""DROP TABLE IF EXISTS EC_pathway_A6 CASCADE;""")
35 ed_cursor.execute("""DROP TABLE IF EXISTS EC_nummer_A6 CASCADE;""")
36 ed_cursor.execute("""DROP TABLE IF EXISTS Pathway_Cancer_A6 CASCADE;""")
37 ed_cursor.execute("""DROP TABLE IF EXISTS pathway_SHR_A6 CASCADE;""")
38 print("Dropped previous tables with the same name if they existed...")
39
40
41# Roept de verwijderfunctie aan, maakt de tabellen en roept vervolgens de foreign keys en constaints functie aan.
42def tabellen_maken(conn, cursor):
43 empty_database(cursor)
44 sql = """
45 CREATE TABLE gen_A6(
46 Entry_id INT PRIMARY KEY,
47 Gen_id INT,
48 Gen_naam VARCHAR(100),
49 Gen_splicing_count_begin INT,
50 Gen_splicing_count_end INT
51 );
52
53 CREATE TABLE Pathway_Cancer_A6(
54 Cancer_id INT PRIMARY KEY ,
55 KO_code VARCHAR(7),
56 Naam_cancer_protein TEXT
57 );
58
59 CREATE TABLE Pathway_A6(
60 Pathway TEXT PRIMARY KEY
61 );
62
63 CREATE TABLE pathway_SHR_A6(
64 Pathway TEXT REFERENCES Pathway_A6 (Pathway),
65 SHR_code VARCHAR(10),
66 PRIMARY KEY (Pathway, SHR_code)
67 );
68
69 CREATE TABLE EC_nummer_A6(
70 EC_nummer VARCHAR(25) PRIMARY KEY,
71 EC_nummer_uitleg TEXT,
72 Reactie TEXT
73 );
74
75 --Koppel pathway en EC nummer
76 CREATE TABLE EC_pathway_A6(
77 Pathway VARCHAR(10) REFERENCES Pathway_A6 (Pathway),
78 EC_nummer VARCHAR(25) REFERENCES EC_nummer_A6 (EC_nummer),
79 PRIMARY KEY (Pathway, EC_nummer)
80 );
81
82 CREATE TABLE eiwit_A6(
83 Eiwit_naam VARCHAR(200),
84 Eiwit_sequence TEXT,
85 Eiwit_id TEXT PRIMARY KEY,
86 KO_code VARCHAR(7)
87 );
88
89
90
91 --Koppel gen en eiwit
92 CREATE TABLE RNA_A6(
93 Gen_id INT REFERENCES gen_A6 (Entry_id),
94 Gen_splicing_count_begin INT ,
95 Gen_splicing_count_end INT ,
96 RNA_code VARCHAR(15),
97 Eiwit_id TEXT REFERENCES eiwit_A6 (Eiwit_id),
98 RNA_sequence TEXT,
99 PRIMARY KEY (Gen_id, Gen_splicing_count_begin, Gen_splicing_count_end)
100 );
101
102
103 -- koppeltabel eiwit_ec
104 CREATE TABLE eiwit_EC_A6(
105 Eiwit_id TEXT REFERENCES eiwit_A6 (Eiwit_id) ,
106 EC_nummer VARCHAR(25) REFERENCES EC_nummer_A6 (EC_nummer),
107 PRIMARY KEY (Eiwit_id, EC_nummer)
108 );"""
109 cursor.execute(sql)
110 conn.commit()
111 print("New tables created!")
112
113 # add_constraints(conn, cursor)
114
115
116# Voegd de foreign keys toe aan de tabellen in de database wanneer het wordt gecalled.
117# def add_constraints(conn, cursor):
118# sql = """
119# ALTER TABLE pathway_SHR_A6
120# ADD FOREIGN KEY (Pathway) REFERENCES Pathway_A6(Pathway);
121#
122# ALTER TABLE EC_pathway_A6
123# ADD FOREIGN KEY (Pathway) REFERENCES pathway_A6(Pathway);
124# ALTER TABLE EC_pathway_A6
125# ADD FOREIGN KEY (EC_nummer) REFERENCES EC_nummer_A6(EC_nummer);
126#
127# ALTER TABLE RNA_A6
128# ADD FOREIGN KEY (Entry_id) REFERENCES gen_A6(Entry_id);
129# ALTER TABLE RNA_A6
130# ADD FOREIGN KEY (Gen_splicing_count) REFERENCES gen_a6(Gen_splicing_count);
131# ALTER TABLE RNA_A6
132# ADD FOREIGN KEY (Eiwit_id) REFERENCES Eiwit_A6(Eiwit_ID);
133#
134# ALTER TABLE eiwit_EC_A6
135# ADD FOREIGN KEY (Eiwit_id) REFERENCES Eiwit_A6(Eiwit_naam);
136# ALTER TABLE eiwit_EC_A6
137# ADD FOREIGN KEY (EC_nummer) REFERENCES EC_nummer_A6(EC_nummer);
138# """
139# cursor.execute(sql)
140# conn.commit()
141# print("Added specified table constraints and Foreign Keys!")
142
143
144# gecodeerd om eiwit_gen_naam.txt uit te lezen en lists te returnen voor alle elementen
145# (make sure dat er geen spaties of tabs tussen de komma's zitten!)
146def bestanduitlezer(bestandnaam):
147 inhoud = open(str(bestandnaam), "r")
148 big_gen_list = []
149 big_eiwit_list = []
150 big_rna_list = []
151 for regel in inhoud:
152 regel.lstrip()
153 inhoud_lijst = regel.split("@")
154 inhoud_lijst.remove("\n")
155 gen_list = []
156 eiwit_list = []
157 rna_list = []
158 eiwit_list.append(inhoud_lijst[1])
159 eiwit_list.append(inhoud_lijst[2])
160 eiwit_list.append(inhoud_lijst[0])
161 gen_list.append(inhoud_lijst[7])
162 gen_list.append(inhoud_lijst[6])
163
164 gen_list.append(inhoud_lijst[3].split("..")[0])
165 gen_list.append(inhoud_lijst[3].split("..")[1])
166
167 rna_list.append(inhoud_lijst[4])
168 rna_list.append(inhoud_lijst[5])
169 big_gen_list.append(gen_list)
170 big_eiwit_list.append(eiwit_list)
171 big_rna_list.append(rna_list)
172 # print(gen_list, "help")
173 # print(big_gen_list)
174 # print(big_eiwit_list)
175 # print(big_rna_list)
176 return big_gen_list, big_eiwit_list, big_rna_list
177
178
179def cancerbestanduitlezer(bestandnaam):
180 big_cancer_list = []
181 inhoud = open(str(bestandnaam), "r")
182 for regel in inhoud:
183 cancer_list = []
184 regel.lstrip()
185 inhoud_lijst = regel.split("@")
186 inhoud_lijst.remove("\n")
187 cancer_list.append(inhoud_lijst[0])
188 cancer_list.append(inhoud_lijst[1])
189 big_cancer_list.append(cancer_list)
190 # print(big_cancer_list)
191 return big_cancer_list
192
193
194def pathwaybestanduitlezer(il_file):
195 infile = open(il_file, 'r')
196 bestand = infile.readlines()
197 infile.close()
198 gegevens = bestand
199 big_pathway_list = []
200 Eiwit_KO_Code_list = []
201 for i in range(len(gegevens)):
202 pathway_list = []
203 data = gegevens[i].split('@')
204
205 shr, pathway, KO_code = data[0], data[1].split('|'), data[3]
206 pathway_list.append(shr)
207 pathway_list.append(pathway)
208 big_pathway_list.append(pathway_list)
209 Eiwit_KO_Code_list.append(KO_code)
210 return big_pathway_list, Eiwit_KO_Code_list
211
212
213def ECbestanduitlezer(EC_file):
214 infile = open(EC_file, 'r')
215 gegevens = infile.readlines()
216 infile.close()
217 big_EC_list = []
218 ec_nummer_lijst =[]
219
220 for i in range(len(gegevens)):
221 EC_list = []
222 data = gegevens[i].split('@')
223 EC, reactie, uitleg = data[0], data[1].split('|'), data[2]
224
225 if EC not in ec_nummer_lijst:
226 ec_nummer_lijst.append(EC)
227 # if EC[0] not in EC_list:
228 EC_list.append(EC)
229 EC_list.append(reactie)
230 EC_list.append(uitleg)
231 big_EC_list.append(EC_list)
232 # print(big_EC_list[0])
233 return big_EC_list
234
235
236# Input zijn 2 lijsten de functie maakt er objects van die die vervolgens in 2 andere lijsten stopt en dan returned
237def packager(p_eiwit_lijst, p_gen_lijst, p_rna_lijst, p_pathway_lijst, p_EC_lijst, p_cancer_lijst,
238 p_Eiwit_KO_Code_list):
239 genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist,only_pathways = [], [], [], [], [], [],[]
240 for genlijst in p_gen_lijst:
241 gen_temp_object = Gen()
242 gen_temp_object.Setgen_id(genlijst[0])
243 gen_temp_object.Setgen_naam(genlijst[1])
244 gen_temp_object.Setgen_splicing_count(genlijst[2:4])
245 genobjectlist.append(gen_temp_object)
246 for eiwitlijst in p_eiwit_lijst:
247 eiwit_temp_object = Eiwit()
248 eiwit_temp_object.Seteiwit_naam(eiwitlijst[0])
249 eiwit_temp_object.Seteiwit_sequence(eiwitlijst[1])
250 eiwit_temp_object.Seteiwit_id(eiwitlijst[2])
251 for code in p_Eiwit_KO_Code_list:
252 eiwit_temp_object.SetKO_code(code)
253 eiwitobjectlist.append(eiwit_temp_object)
254 for rnalijst in p_rna_lijst:
255 rna_temp_object = RNA()
256 rna_temp_object.SetRNA_sequence(rnalijst[1])
257 rna_temp_object.SetRNA_code(rnalijst[0])
258 rna_objectlist.append(rna_temp_object)
259 for eclijst in p_EC_lijst:
260 EC_temp_object = EC_nummer()
261 EC_temp_object.SetEC_nummer(eclijst[0])
262 EC_temp_object.Setuitleg(eclijst[1])
263 EC_temp_object.Setreactie(eclijst[2])
264 EC_objectlist.append(EC_temp_object)
265 for pathwaylijst in p_pathway_lijst:
266 pathway_temp_object = Pathway()
267 pathway_temp_object.Setpathway(pathwaylijst[1])
268 pathway_temp_object.SetSHR(pathwaylijst[0])
269 pathway_objectlist.append(pathway_temp_object)
270 for cancerlijst in p_cancer_lijst:
271 cancer_temp_object = Cancer_pathway()
272 cancer_temp_object.SetKO_code(cancerlijst[0])
273 cancer_temp_object.Setnaam_prot(cancerlijst[1])
274 cancer_objectlist.append(cancer_temp_object)
275
276 for x in p_pathway_lijst:
277 for y in x[1]:
278 if y !="" and y not in only_pathways:
279 only_pathways.append(y)
280
281 return genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathways
282
283
284def main():
285 conn, cursor = connect('145.97.16.241', 'bpapgea6_db', 'bpapgea6',
286 'BpapgeA6') # <---- verander deze string voor een andere database!
287 tabellen_maken(conn, cursor)
288
289 pathway_list, Eiwit_KO_Code_list = pathwaybestanduitlezer("kegg_pathway_EC.txt")
290 EC_list = ECbestanduitlezer("kegg_reactie_functie.txt")
291 gen_list, eiwit_list, rna_list = bestanduitlezer("tabel_info_nieuw.txt")
292 cancer_list = cancerbestanduitlezer("cancer_pathway.txt")
293
294 genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathway_list = \
295 packager(eiwit_list, gen_list, rna_list, pathway_list, EC_list, cancer_list, Eiwit_KO_Code_list)
296
297 # for i in genobjectlist:
298 # print(i.Getgen_id())
299
300 # for i in genobjectlist: print(i)
301
302 # for i in genobjectlist: print(i)
303 Insert_functions.geninserts(conn, cursor, genobjectlist)
304 Insert_functions.pathwayinserts(conn, cursor, only_pathway_list)
305 Insert_functions.cancerpathwayinserts(conn, cursor, cancer_objectlist)
306 Insert_functions.pathway_SHR_inserts(conn, cursor, pathway_objectlist)
307 Insert_functions.ec_nummerinserts(conn, cursor, EC_objectlist)
308 Insert_functions.eiwitinserts(conn, cursor, eiwitobjectlist)
309 Insert_functions.rnainserts(conn, cursor, genobjectlist, rna_objectlist, eiwitobjectlist)
310
311
312 print("pop")
313
314 Insert_functions.koppeltabelinserts(conn, cursor, EC_objectlist, eiwitobjectlist, pathway_objectlist)
315 # empty_database(cursor) <---- for if you need to clean up!
316 conn.close()
317
318
319if __name__ == "__main__":
320 main()