· 6 years ago · Jan 21, 2020, 05:21 PM
1#!/usr/bin/python3
2import psycopg2
3from Object_classes import Gen
4from Object_classes import Eiwit
5from Object_classes import RNA
6from Object_classes import EC_nummer
7from Object_classes import Pathway
8from Object_classes import Cancer_pathway
9
10import Insert_functions
11
12
13# Een functie om met een database te connecten, het returned de connection en de cursor.
14def connect(host, dbname, user, password):
15 conn_string = "host= '%s' dbname= '%s' user= '%s' password='%s'" % (host, dbname, user, password)
16 print("Connecting to database...")
17 conn = psycopg2.connect(conn_string)
18 cursor = conn.cursor()
19 print("Connected!\n")
20 return conn, cursor
21
22# Leegt de database als hij gecalled wordt, en verbonden is.
23def empty_database(ed_cursor):
24 # drop de tabellen als ze al bestaan
25 ed_cursor.execute("""DROP TABLE IF EXISTS gen_A6 CASCADE;""")
26 ed_cursor.execute("""DROP TABLE IF EXISTS RNA_A6 CASCADE;""")
27 ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_A6 CASCADE;""")
28 ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_EC_A6 CASCADE;""")
29 ed_cursor.execute("""DROP TABLE IF EXISTS pathway_A6 CASCADE;""")
30 ed_cursor.execute("""DROP TABLE IF EXISTS EC_pathway_A6 CASCADE;""")
31 ed_cursor.execute("""DROP TABLE IF EXISTS EC_nummer_A6 CASCADE;""")
32 ed_cursor.execute("""DROP TABLE IF EXISTS Pathway_Cancer_A6 CASCADE;""")
33 ed_cursor.execute("""DROP TABLE IF EXISTS pathway_SHR_A6 CASCADE;""")
34 print("Dropped previous tables with the same name if they existed...")
35
36# Roept de verwijderfunctie aan, maakt de tabellen en roept vervolgens de foreign keys en constaints functie aan.
37def tabellen_maken(conn, cursor):
38 empty_database(cursor)
39 sql = """
40 CREATE TABLE gen_A6(
41 Entry_id INT PRIMARY KEY,
42 Gen_id INT,
43 Gen_naam VARCHAR(100),
44 Gen_splicing_count_begin INT,
45 Gen_splicing_count_end INT
46 );
47
48
49
50 CREATE TABLE Pathway_Cancer_A6(
51 Cancer_id INT PRIMARY KEY ,
52 KO_code VARCHAR(7),
53 Naam_cancer_protein TEXT
54 );
55
56 CREATE TABLE Pathway_A6(
57 Pathway TEXT PRIMARY KEY
58 );
59
60
61
62 CREATE TABLE EC_nummer_A6(
63 EC_nummer VARCHAR(25) PRIMARY KEY,
64 Reactie TEXT,
65
66 Functie TEXT
67
68 );
69
70 --Koppel pathway en EC nummer
71 CREATE TABLE EC_pathway_A6(
72 Pathway TEXT REFERENCES Pathway_A6 (Pathway),
73 EC_nummer VARCHAR(25) REFERENCES EC_nummer_A6 (EC_nummer),
74 PRIMARY KEY (Pathway, EC_nummer)
75 );
76
77 CREATE TABLE eiwit_A6(
78 Eiwit_naam VARCHAR(200),
79 Eiwit_sequence TEXT,
80 Eiwit_id TEXT PRIMARY KEY,
81 KO_code VARCHAR(7)
82 );
83 CREATE TABLE pathway_SHR_A6(
84 Eiwit_id TEXT REFERENCES eiwit_A6(Eiwit_id),
85 Pathway TEXT REFERENCES Pathway_A6 (Pathway),
86 SHR_code VARCHAR(10),
87 PRIMARY KEY (Pathway, SHR_code)
88 );
89
90
91
92 --Koppel gen en eiwit
93 CREATE TABLE rna_a6(
94 Gen_id INT,
95 Entry_id INT,
96 Gen_splicing_count_begin INT ,
97 Gen_splicing_count_end INT ,
98 RNA_code VARCHAR(15),
99 Eiwit_id TEXT REFERENCES eiwit_A6 (Eiwit_id),
100 RNA_sequence TEXT,
101 PRIMARY KEY (Entry_id, Gen_id, Gen_splicing_count_begin, Gen_splicing_count_end)
102 );
103
104
105 """
106 cursor.execute(sql)
107 conn.commit()
108 print("New tables created!")
109
110
111
112# gecodeerd om eiwit_gen_naam.txt uit te lezen en lists te returnen voor alle elementen
113# (make sure dat er geen spaties of tabs tussen de komma's zitten!)
114def bestanduitlezer(bestandnaam):
115 inhoud = open(str(bestandnaam), "r")
116 big_gen_list = []
117 big_eiwit_list = []
118 big_rna_list = []
119 for regel in inhoud:
120 regel.lstrip()
121 inhoud_lijst = regel.split("@")
122 inhoud_lijst.remove("\n")
123 gen_list = []
124 eiwit_list = []
125 rna_list = []
126 eiwit_list.append(inhoud_lijst[1])
127 eiwit_list.append(inhoud_lijst[2])
128 eiwit_list.append(inhoud_lijst[0])
129
130 if eiwit_list not in big_eiwit_list:
131 big_eiwit_list.append(eiwit_list)
132
133 gen_list.append(inhoud_lijst[7])
134 gen_list.append(inhoud_lijst[6])
135
136 gen_list.append(inhoud_lijst[3].split("..")[0])
137 gen_list.append(inhoud_lijst[3].split("..")[1])
138
139 big_gen_list.append(gen_list)
140
141
142
143 rna_list.append(inhoud_lijst[4])
144 rna_list.append(inhoud_lijst[5])
145
146 big_rna_list.append(rna_list)
147
148
149 return big_gen_list, big_eiwit_list, big_rna_list
150
151
152def cancerbestanduitlezer(bestandnaam):
153 big_cancer_list = []
154 inhoud = open(str(bestandnaam), "r")
155 for regel in inhoud:
156 cancer_list = []
157 regel.lstrip()
158 inhoud_lijst = regel.split("@")
159 inhoud_lijst.remove("\n")
160 cancer_list.append(inhoud_lijst[0])
161 cancer_list.append(inhoud_lijst[1])
162 big_cancer_list.append(cancer_list)
163 return big_cancer_list
164
165
166def pathwaybestanduitlezer(il_file):
167 infile = open(il_file, 'r')
168 bestand = infile.readlines()
169 infile.close()
170 gegevens = bestand
171 big_pathway_list = []
172 Eiwit_KO_Code_list = []
173 big_ec_list=[]
174 ec_dict={}
175 for i in range(len(gegevens)):
176 pathway_list = []
177 data = gegevens[i].split('@')
178
179 shr, pathway, KO_code = data[0], data[1].split('|'), data[3]
180
181 if data[2] != " " and pathway != "":
182 ec_nummer=list(data[2].strip("ec:").split(" "))
183
184
185 for x in range(len(ec_nummer)):
186
187 if ec_nummer[x] not in ec_dict.keys():
188 ec_dict[ec_nummer[x]] = pathway
189
190 # print(ec_dict.values(), "hi")
191
192
193
194 else:
195 for key, value in ec_dict.items():
196 if key == ec_nummer[x]:
197 for y in pathway:
198 if y not in value:
199 ec_dict[ec_nummer[x]].append(pathway)
200
201 # print(ec_dict)
202
203 for key, value in ec_dict.items():
204 lijst=[]
205 lijst.append(key)
206 lijst.append(value)
207 print(lijst[1][0])
208 big_ec_list.append(lijst)
209 # print(key, value)
210 #
211
212
213
214
215
216 pathway_list.append(shr)
217 pathway_list.append(pathway)
218 big_pathway_list.append(pathway_list)
219 Eiwit_KO_Code_list.append(KO_code)
220 return big_pathway_list, Eiwit_KO_Code_list, big_ec_list
221
222
223def ECbestanduitlezer(EC_file):
224 infile = open(EC_file, 'r')
225 gegevens = infile.readlines()
226 infile.close()
227 big_EC_list = []
228 ec_nummer_lijst =[]
229 ec_dict={}
230
231 for i in range(len(gegevens)):
232 EC_list = []
233 data = gegevens[i].split('@')
234 EC, reactie, uitleg = data[0], data[1].split('|')[0], data[2]
235
236
237
238 if EC not in ec_nummer_lijst:
239 ec_nummer_lijst.append(EC)
240 EC_list.append(EC)
241 EC_list.append(reactie)
242 EC_list.append(uitleg)
243 big_EC_list.append(EC_list)
244
245
246 return big_EC_list
247
248
249# Input zijn 2 lijsten de functie maakt er objects van die die vervolgens in 2 andere lijsten stopt en dan returned
250def packager(p_eiwit_lijst, p_gen_lijst, p_rna_lijst, p_pathway_lijst, p_EC_lijst, p_cancer_lijst,
251 p_Eiwit_KO_Code_list):
252 genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist,only_pathways = [], [], [], [], [], [],[]
253 for genlijst in p_gen_lijst:
254 gen_temp_object = Gen()
255 gen_temp_object.Setgen_id(genlijst[0])
256 gen_temp_object.Setgen_naam(genlijst[1])
257 gen_temp_object.Setgen_splicing_count(genlijst[2:4])
258 genobjectlist.append(gen_temp_object)
259 for eiwitlijst in p_eiwit_lijst:
260 eiwit_temp_object = Eiwit()
261 eiwit_temp_object.Seteiwit_naam(eiwitlijst[0])
262 eiwit_temp_object.Seteiwit_sequence(eiwitlijst[1])
263 eiwit_temp_object.Seteiwit_id(eiwitlijst[2])
264 for code in p_Eiwit_KO_Code_list:
265 eiwit_temp_object.SetKO_code(code)
266 eiwitobjectlist.append(eiwit_temp_object)
267 for rnalijst in p_rna_lijst:
268 rna_temp_object = RNA()
269 rna_temp_object.SetRNA_sequence(rnalijst[1])
270 rna_temp_object.SetRNA_code(rnalijst[0])
271 rna_objectlist.append(rna_temp_object)
272 for eclijst in p_EC_lijst:
273 EC_temp_object = EC_nummer()
274 EC_temp_object.SetEC_nummer(eclijst[0])
275 EC_temp_object.Setuitleg(eclijst[1])
276 EC_temp_object.Setreactie(eclijst[2])
277 EC_objectlist.append(EC_temp_object)
278 for pathwaylijst in p_pathway_lijst:
279 pathway_temp_object = Pathway()
280 pathway_temp_object.Setpathway(pathwaylijst[1])
281 pathway_temp_object.SetSHR(pathwaylijst[0])
282 pathway_objectlist.append(pathway_temp_object)
283 for cancerlijst in p_cancer_lijst:
284 cancer_temp_object = Cancer_pathway()
285 cancer_temp_object.SetKO_code(cancerlijst[0])
286 cancer_temp_object.Setnaam_prot(cancerlijst[1])
287 cancer_objectlist.append(cancer_temp_object)
288
289 for x in p_pathway_lijst:
290 for y in x[1]:
291 if y !="" and y not in only_pathways:
292 only_pathways.append(y)
293
294 return genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathways
295
296
297def main():
298 conn, cursor = connect('145.97.16.241', 'bpapgea6_db', 'bpapgea6',
299 'BpapgeA6') # <---- verander deze string voor een andere database!
300 tabellen_maken(conn, cursor)
301
302 pathway_list, Eiwit_KO_Code_list, ec_list = pathwaybestanduitlezer("kegg_pathway_EC.txt")
303 EC_list = ECbestanduitlezer("kegg_reactie_functie.txt")
304 gen_list, eiwit_list, rna_list = bestanduitlezer("tabel_info_nieuw.txt")
305 cancer_list = cancerbestanduitlezer("cancer_pathway.txt")
306
307 genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathway_list = \
308 packager(eiwit_list, gen_list, rna_list, pathway_list, EC_list, cancer_list, Eiwit_KO_Code_list)
309
310
311
312 Insert_functions.geninserts(conn, cursor, genobjectlist)
313 Insert_functions.pathwayinserts(conn, cursor, only_pathway_list)
314 Insert_functions.eiwitinserts(conn, cursor, eiwitobjectlist)
315
316 Insert_functions.cancerpathwayinserts(conn, cursor, cancer_objectlist)
317 Insert_functions.pathway_SHR_inserts(conn, cursor, pathway_objectlist, eiwitobjectlist)
318 Insert_functions.ec_nummerinserts(conn, cursor, EC_objectlist)
319 Insert_functions.rnainserts(conn, cursor, genobjectlist, rna_objectlist, eiwitobjectlist)
320 Insert_functions.koppeltabelinserts(conn, cursor, ec_list)
321
322
323 print("Done!")
324
325 conn.close()
326
327
328if __name__ == "__main__":
329 main()