· 4 years ago · May 03, 2021, 06:52 PM
1from clickhouse_driver import Client
2import subprocess
3
4
5def temp(arr):
6 res = 'CREATE TABLE IF NOT EXISTS ' + arr[0] + '\n(\n'
7 for i in range(1, len(arr), 2):
8 if i != len(arr) - 2:
9 res += arr[i] + ' ' + arr[i + 1] + ',\n'
10 else:
11 res += arr[i] + ' ' + arr[i + 1] + '\n'
12 res += ') ENGINE = MergeTree ORDER BY tuple()'
13 return res
14
15
16def type(now, dict):
17 for i in range(len(now)):
18 if dict[2 * (i + 1)] == "Int16" or dict[2 * (i + 1)] == "Int32":
19 now[i] = int(now[i])
20 elif dict[2 * (i + 1)][:3] == "Dec":
21 now[i] = float(now[i])
22 elif dict[2 * (i + 1)] == 'UInt8':
23 if now[i] == 't':
24 now[i] = 1
25 else:
26 now[i] = 0
27 return now
28
29
30tables = [
31 ['actor',
32 'actor_id', 'Int32',
33 'first_name', 'String',
34 'last_name', 'String',
35 'last_update', 'DateTime64'
36 ],
37 ['actor_info',
38 'actor_id', 'Int32',
39 'first_name', 'String',
40 'last_name', 'String',
41 'film_info', 'String'
42 ],
43 ['customer_list',
44 'id', 'Int32',
45 'name', 'String',
46 'address', 'String',
47 'zipcode', 'String',
48 'phone', 'String',
49 'city', 'String',
50 'country', 'String',
51 'notes', 'String',
52 'sid', 'Int16'
53 ],
54 ['film_list',
55 'fid', 'Int32',
56 'title', 'String',
57 'description', 'String',
58 'category', 'String',
59 'price', 'Decimal(4, 2)',
60 'length', 'Int16',
61 'rating', 'String',
62 'actors', 'String'
63 ],
64 ['nicer_but_slower_film_list',
65 'fid', 'Int32',
66 'title', 'String',
67 'description', 'String',
68 'category', 'String',
69 'price', 'Decimal(4, 2)',
70 'length', 'Int16',
71 'rating', 'String',
72 'actors', 'String'
73 ],
74 ['sales_by_film_category',
75 'category', 'String',
76 'total_sales', 'Decimal(6, 2)'
77 ],
78 ['store',
79 'store_id', 'Int32',
80 'manager_staff_id', 'Int16',
81 'address_id', 'Int16',
82 'last_update', 'DateTime'
83 ],
84 ['sales_by_store',
85 'store', 'String',
86 'manager', 'String',
87 'total_sales', 'Decimal(7, 2)'
88 ],
89 ['staff_list',
90 'id', 'Int32',
91 'name', 'String',
92 'address', 'String',
93 'zipcode', 'String',
94 'phone', 'String',
95 'city', 'String',
96 'country', 'String',
97 'sid', 'Int16'
98 ],
99 ['address',
100 'address_id', 'Int32',
101 'address', 'String',
102 'address2', 'String',
103 'district', 'String',
104 'city_id', 'Int16',
105 'postal_code', 'String',
106 'phone', 'String',
107 'last_update', 'DateTime'
108 ],
109 ['category',
110 'category_id', 'Int32',
111 'name', 'String',
112 'last_update', 'DateTime'
113 ],
114 ['city',
115 'city_id', 'Int32',
116 'city', 'String',
117 'country_id', 'Int16',
118 'last_update', 'DateTime'
119 ],
120 ['country',
121 'country_id', 'Int32',
122 'country', 'String',
123 'last_update', 'DateTime'
124 ],
125 ['customer',
126 'customer_id', 'Int32',
127 'store_id', 'Int16',
128 'first_name', 'String',
129 'last_name', 'String',
130 'email', 'String',
131 'address_id', 'Int16',
132 'activebool', 'UInt8',
133 'create_date', 'Date',
134 'last_update', 'DateTime64',
135 'active', 'Int32'
136 ],
137 ['film_actor',
138 'actor_id', 'Int16',
139 'film_id', 'Int16',
140 'last_update', 'DateTime'
141 ],
142 ['film_category',
143 'film_id', 'Int16',
144 'category_id', 'Int16',
145 'last_update', 'DateTime'
146 ],
147 ['inventory',
148 'inventory_id', 'Int32',
149 'film_id', 'Int16',
150 'store_id', 'Int16',
151 'last_update', 'DateTime'
152 ],
153 ['language',
154 'language_id', 'Int32',
155 'name', 'String',
156 'last_update', 'DateTime'
157 ],
158 ['rental',
159 'rental_id', 'Int32',
160 'rental_date', 'DateTime',
161 'inventory_id', 'Int32',
162 'customer_id', 'Int16',
163 'return_date', 'DateTime',
164 'staff_id', 'Int16',
165 'last_update', 'DateTime'
166 ],
167 ['staff',
168 'staff_id', 'Int32',
169 'first_name', 'String',
170 'last_name', 'String',
171 'address_id', 'Int16',
172 'email', 'String',
173 'store_id', 'Int16',
174 'active', 'UInt8',
175 'username', 'String',
176 'password', 'String',
177 'last_update', 'DateTime64',
178 'picture', 'String'
179 ],
180 ['payment',
181 'payment_id', 'Int32',
182 'customer_id', 'Int16',
183 'staff_id', 'Int16',
184 'rental_id', 'Int32',
185 'amount', 'Decimal(5, 2)',
186 'payment_date', 'DateTime64'
187 ],
188 ['film',
189 'film_id', 'Int32',
190 'title', 'String',
191 'description', 'String',
192 'release_year', 'Int32',
193 'language_id', 'Int16',
194 'rental_duration', 'Int16',
195 'rental_rate', 'Decimal(4, 2)',
196 'length', 'Int16',
197 'replacement_cost', 'Decimal(5, 2)',
198 'rating', 'String',
199 'last_update', 'DateTime64',
200 'special_features', 'Array(String)',
201 'fulltext', 'String',
202 ]
203]
204
205client = Client('localhost')
206client.execute('drop database default')
207for i in tables:
208 client.execute(temp(i))
209
210for i in tables:
211 if i[0] == 'customer' or i[0] == 'staff':
212 f = open('/media/sf_im/' + i[0] + '.csv', 'w')
213 for line in f:
214 f.write(line.replace(';t;', ';1;'))
215 f.write(line.replace(';f;', ';0;'))
216 elif i[0] == 'film':
217 f = open('/media/sf_im/' + i[0] + '.csv', 'w')
218 for line in f:
219 f.write(line.replace('{', ''))
220 f.write(line.replace('}', ''))
221
222 res = "clickhouse-client --query=\"INSERT INTO " + i[0] + " FORMAT CSV\" < /media/sf_im/" + i[0] + ".csv"
223 print(res)
224 subprocess.run(res, shell=True)
225
226
227'''
228import psycopg2
229
230path = 'C:/im/'
231fields = open(path + 'field.txt', 'w')
232con = psycopg2.connect(
233 database="dvd",
234 user="postgres",
235 password="postgres",
236 host="127.0.0.1",
237 port="5432"
238)
239cursor = con.cursor()
240cursor.execute('SELECT table_name FROM information_schema.tables \
241WHERE table_schema NOT IN (\'information_schema\',\'pg_catalog\');')
242tables = []
243for row in cursor:
244 tables += [row[0]]
245for row in tables:
246 new_file = open(path + row + '.csv', 'w')
247 new_file.close()
248
249 cursor.execute('SELECT column_name, data_type \
250FROM INFORMATION_SCHEMA.COLUMNS \
251WHERE table_name = \'' + row + '\';')
252 temp = [row]
253 for i in cursor:
254 temp += [i[0].replace(' ', ''), i[1].replace(' ', '')]
255 fields.write(' '.join(temp) + '\n')
256
257 cursor.execute('COPY (SELECT * FROM "' + row + '") TO \'' + path + row + '.csv\' CSV;')
258fields.close()
259'''