· 6 years ago · Apr 15, 2019, 02:18 PM
1#!/usr/bin/env python3
2import sqlite3
3import sys
4import shutil
5import os
6
7#Existing DB
8exist="probe.matches.sqlite"
9
10#To be added DB
11toadd="new-probe.matches.sqlite"
12
13#Output DB
14outdb = "merged.sqlite"
15
16#Create new merged DB (copy of "exist" DB)
17if os.path.exists(outdb):
18 print ("ERROR: output DB already exists: " + outdb)
19 sys.exit()
20shutil.copy2(exist,outdb)
21
22try:
23 conn1 = sqlite3.connect(outdb)
24 c1 = conn1.cursor()
25except:
26 print ("ERROR: cannot open database: " + outdb)
27
28try:
29 conn2 = sqlite3.connect(toadd)
30 c2 = conn2.cursor()
31except:
32 print ("ERROR: cannot open database: " + toadd)
33
34#Handle new taxa
35query = "SELECT * FROM matches"
36taxa1 = list(map(lambda x: x[0], c1.execute(query).description))
37taxa1.remove('uce')
38taxa2 = list(map(lambda x: x[0], c2.execute(query).description))
39taxa2.remove('uce')
40
41new_taxa = list(set(taxa2) - set(taxa1))
42if not len(new_taxa):
43 print ("ERROR: no new taxa found!")
44 print ("Exiting...")
45 os.remove(outdb)
46 sys.exit()
47print ("New taxa: " + str(len(new_taxa)))
48
49dup_taxa = list(set(taxa2) & set(taxa1))
50if len(dup_taxa):
51 print ("ERROR: Duplicated taxa names found: " + str(len(dup_taxa)))
52 print (', '.join(map(str,dup_taxa)))
53 print ("Exiting...")
54 os.remove(outdb)
55 sys.exit()
56
57
58#Add columns for new taxa
59for new_taxon in new_taxa:
60 try:
61 query = "ALTER TABLE matches ADD COLUMN {0} text".format(new_taxon)
62 c1.execute(query)
63 query = "ALTER TABLE match_map ADD COLUMN {0} text".format(new_taxon)
64 c1.execute(query)
65 except:
66 print ("error with adding new taxon: " + new_taxon)
67 os.remove(outdb)
68 sys.exit()
69print("...Columns for new taxa added.")
70
71
72#Handle new loci
73query = "SELECT uce FROM matches"
74loci1 = c1.execute(query)
75loci2 = c2.execute(query)
76new_loci = list(set(loci2) - set(loci1))
77
78#Add rows for new loci
79if new_loci:
80 print ("New loci not already in source DB:", len(new_loci))
81 # try:
82 all_new_loci = [(new_locus) for new_locus in new_loci]
83 c1.executemany("INSERT INTO matches(uce) values (?)", all_new_loci)
84 c1.executemany("INSERT INTO match_map(uce) values (?)", all_new_loci)
85 print("...Rows for new loci have been added.")
86 # except:
87 # print ("+++error with adding new loci")
88 # sys.exit()
89else:
90 print("No new loci need to be added for the taxa being added.")
91
92
93#Populate new taxa
94for new_taxon in new_taxa:
95 # try:
96 print (new_taxon)
97 query = "SELECT uce, {0} FROM match_map WHERE {0} IS NOT NULL".format(new_taxon)
98 for row in c2.execute(query):
99 query = "UPDATE match_map SET {0} = ? WHERE uce = ?".format(new_taxon)
100 c1.execute(query,[row[1],row[0]])
101 query = "UPDATE matches SET {0} = 1 WHERE uce = ?".format(new_taxon)
102 c1.execute(query,[row[0]])
103 # except:
104 # print ("error with adding new taxon: " + new_taxon)
105 # sys.exit()
106print("Data for new taxa added.")
107
108conn1.commit()
109conn1.close()