· 6 years ago · Jul 19, 2019, 07:46 AM
1{
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 1,
6 "metadata": {},
7 "outputs": [],
8 "source": [
9 "import pandas as pd\n",
10 "from sqlalchemy.exc import IntegrityError\n",
11 "import numpy as np\n",
12 "from sqlalchemy import *\n",
13 "def connect_sql(database,echo):\n",
14 " engine = create_engine(\"mysql+pymysql://[your_mysql_account]:[your_mysql_password]@localhost:3306/{}\".format(database),echo=echo)\n",
15 " return engine\n",
16 "engine = connect_sql('twse',False)"
17 ]
18 },
19 {
20 "cell_type": "code",
21 "execution_count": 4,
22 "metadata": {},
23 "outputs": [],
24 "source": [
25 "import tejapi\n",
26 "tejapi.ApiConfig.api_key = \"[your_api_key]\"\n",
27 "data = tejapi.get('TRAIL/AIND')"
28 ]
29 },
30 {
31 "cell_type": "code",
32 "execution_count": 7,
33 "metadata": {},
34 "outputs": [],
35 "source": [
36 "text_create = '''create table TEJ_STOCK_DATA (\n",
37 " coid char(7) NOT NULL\n",
38 " ,mdate datetime\n",
39 " ,mkt char(5)\n",
40 " ,elist_day1 datetime\n",
41 " ,tseid char(7)\n",
42 " ,ind char(3)\n",
43 " ,ind1 char(2)\n",
44 " ,emm char(2)\n",
45 " ,ind_ban char(11)\n",
46 " ,isin char(12)\n",
47 " ,phone_c char(13)\n",
48 " ,phone_e char(16)\n",
49 " ,fax_c char(13)\n",
50 " ,fax_e char(16)\n",
51 " ,website char(60)\n",
52 " ,e_mail char(60)\n",
53 " ,chief_c char(20)\n",
54 " ,chief_e char(20)\n",
55 " ,president_c char(20)\n",
56 " ,president_e char(20)\n",
57 " ,spokes1_c char(42)\n",
58 " ,spokes1_e char(20)\n",
59 " ,spokes2_c char(42)\n",
60 " ,spokes2_e char(20)\n",
61 " ,empsum decimal(6,0)\n",
62 " ,zamt decimal(15,0)\n",
63 " ,parvalue decimal(15,3)\n",
64 " ,currency char(4)\n",
65 " ,estb_date datetime\n",
66 " ,pub_date datetime\n",
67 " ,list_day1 datetime\n",
68 " ,list_otc datetime\n",
69 " ,list_emg datetime\n",
70 " ,sub_prc decimal(7,2)\n",
71 " ,list_day2 datetime\n",
72 " ,pub_date2 datetime\n",
73 " ,d_cat41 datetime\n",
74 " ,list_day3 datetime\n",
75 " ,d_cat42 datetime\n",
76 " ,list_day4 datetime\n",
77 " ,d_cat43 datetime\n",
78 " ,list_day5 datetime\n",
79 " ,tejind1_c char(17)\n",
80 " ,tejind1_e char(27)\n",
81 " ,tejind2_c char(17)\n",
82 " ,tejind2_e char(27)\n",
83 " ,tejind3_c char(17)\n",
84 " ,tejind3_e char(27)\n",
85 " ,tejind4_c char(17)\n",
86 " ,tejind4_e char(27)\n",
87 " ,tejindwd_c char(30)\n",
88 " ,tejindwd_e char(30)\n",
89 " ,tejind5_c char(19)\n",
90 " ,tejind5_e char(34)\n",
91 " ,btindnm1 char(60)\n",
92 " ,btindnm2 char(60)\n",
93 " ,btindnm3 char(60)\n",
94 " ,btindnmp_c char(200)\n",
95 " ,btindnmp_e char(200)\n",
96 " ,gopna_c char(18)\n",
97 " ,gopna_e char(46)\n",
98 " ,fnamec char(30)\n",
99 " ,fnamee char(40)\n",
100 " ,inamec char(16)\n",
101 " ,inamee char(20)\n",
102 " ,chiadd_c char(60)\n",
103 " ,chiadd_e char(90)\n",
104 " ,rname_c char(44)\n",
105 " ,rname_e char(60)\n",
106 " ,rtel_c char(13)\n",
107 " ,rtel_e char(16)\n",
108 " ,radd_c char(44)\n",
109 " ,radd_e char(54)\n",
110 " ,chga_id1 char(7)\n",
111 " ,chga_dd1 datetime\n",
112 " ,chga_mkt1 char(5)\n",
113 " ,oldidx1_c char(16)\n",
114 " ,oldidx1_e char(26)\n",
115 " ,oldidx2_c char(16)\n",
116 " ,oldidx2_e char(26)\n",
117 " ,oldidx2_dd datetime\n",
118 " ,oldidx3_c char(16)\n",
119 " ,oldidx3_e char(26)\n",
120 " ,oldidx3_dd datetime\n",
121 " ,oldidx4_c char(16)\n",
122 " ,oldidx4_e char(26)\n",
123 " ,oldidx4_dd datetime\n",
124 " ,oldnma1_c char(20)\n",
125 " ,oldnma1_e char(47)\n",
126 " ,oldnma2_c char(20)\n",
127 " ,oldnmb2_c datetime\n",
128 " ,oldnma2_e char(47)\n",
129 " ,oldnmb2_e datetime\n",
130 " ,oldnma3_c char(20)\n",
131 " ,oldnmb3_c datetime\n",
132 " ,oldnma3_e char(47)\n",
133 " ,oldnmb3_e datetime\n",
134 " ,oldnma4_c char(20)\n",
135 " ,oldnmb4_c datetime\n",
136 " ,oldnma4_e char(47)\n",
137 " ,oldnmb4_e datetime\n",
138 " ,oldnma5_c char(20)\n",
139 " ,oldnmb5_c datetime\n",
140 " ,oldnma5_e char(47)\n",
141 " ,oldnmb5_e datetime\n",
142 " ,ctcri1_c char(16)\n",
143 " ,ctcri1_e char(26)\n",
144 " ,ctcri2_c char(16)\n",
145 " ,ctcri2_e char(26)\n",
146 " ,ctcri2_dd datetime\n",
147 " ,ctcri3_c char(16)\n",
148 " ,ctcri3_e char(26)\n",
149 " ,ctcri3_dd datetime\n",
150 " ,ctcri4_c char(16)\n",
151 " ,ctcri4_e char(26)\n",
152 " ,ctcri4_dd datetime\n",
153 " ,ctcri5_c char(16)\n",
154 " ,ctcri5_e char(26)\n",
155 " ,ctcri5_dd datetime\n",
156 " ,dflt_d datetime\n",
157 " ,dflt_cd datetime\n",
158 " ,xdist char(24)\n",
159 " ,fail_fg char(1)\n",
160 " ,xfg1 char(24)\n",
161 " ,tot_na char(28)\n",
162 " ,cpa1 char(20)\n",
163 " ,cpa2 char(20)\n",
164 " ,cpa3 char(20)\n",
165 " ,cpa_date datetime\n",
166 " ,chga_id2 char(7)\n",
167 " ,chga_dd2 datetime\n",
168 " ,chga_mkt2 char(5)\n",
169 " ,chga_id3 char(7)\n",
170 " ,chga_dd3 datetime\n",
171 " ,chga_mkt3 char(5)\n",
172 " ,chga_id4 char(7)\n",
173 " ,chga_dd4 datetime\n",
174 " ,chga_mkt4 char(5)\n",
175 " ,tejico1 char(5)\n",
176 " ,tejinm1_c char(10)\n",
177 " ,tejinm1_e char(20)\n",
178 " ,tejico2 char(5)\n",
179 " ,tejinm2_c char(10)\n",
180 " ,tejinm2_e char(20)\n",
181 " ,tejico3 char(6)\n",
182 " ,tejinm3_c char(10)\n",
183 " ,tejinm3_e char(20)\n",
184 " ,tejico4 char(5)\n",
185 " ,tejinm4_c char(10)\n",
186 " ,tejinm4_e char(20)\n",
187 ");'''"
188 ]
189 },
190 {
191 "cell_type": "code",
192 "execution_count": 8,
193 "metadata": {},
194 "outputs": [
195 {
196 "name": "stderr",
197 "output_type": "stream",
198 "text": [
199 "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pymysql/cursors.py:170: Warning: (1051, \"Unknown table 'twse.tej_stock_data'\")\n",
200 " result = self._query(query)\n"
201 ]
202 }
203 ],
204 "source": [
205 "from sqlalchemy.exc import InternalError\n",
206 "\n",
207 "try:\n",
208 " # Drop\n",
209 " engine.execute('drop table IF EXISTS TEJ_STOCK_DATA;')\n",
210 " # Create\n",
211 " engine.execute(text_create)\n",
212 "except InternalError:\n",
213 " print('table already exists')"
214 ]
215 },
216 {
217 "cell_type": "code",
218 "execution_count": 9,
219 "metadata": {},
220 "outputs": [],
221 "source": [
222 "# Insert Table\n",
223 "from sqlalchemy.exc import IntegrityError\n",
224 "\n",
225 "try:\n",
226 " data.to_sql(name='TEJ_STOCK_DATA', con=engine, if_exists='append', index=False)\n",
227 "# 若已有資料則不寫入\n",
228 "except IntegrityError:\n",
229 " print('catch duplicate data')"
230 ]
231 },
232 {
233 "cell_type": "code",
234 "execution_count": 11,
235 "metadata": {},
236 "outputs": [],
237 "source": [
238 "df = pd.read_sql_query('select * from TEJ_STOCK_DATA where coid = \"{}\"'.format(1301), con=engine)"
239 ]
240 },
241 {
242 "cell_type": "code",
243 "execution_count": 12,
244 "metadata": {},
245 "outputs": [
246 {
247 "data": {
248 "text/html": [
249 "<div>\n",
250 "<style scoped>\n",
251 " .dataframe tbody tr th:only-of-type {\n",
252 " vertical-align: middle;\n",
253 " }\n",
254 "\n",
255 " .dataframe tbody tr th {\n",
256 " vertical-align: top;\n",
257 " }\n",
258 "\n",
259 " .dataframe thead th {\n",
260 " text-align: right;\n",
261 " }\n",
262 "</style>\n",
263 "<table border=\"1\" class=\"dataframe\">\n",
264 " <thead>\n",
265 " <tr style=\"text-align: right;\">\n",
266 " <th></th>\n",
267 " <th>coid</th>\n",
268 " <th>mdate</th>\n",
269 " <th>mkt</th>\n",
270 " <th>elist_day1</th>\n",
271 " <th>tseid</th>\n",
272 " <th>ind</th>\n",
273 " <th>ind1</th>\n",
274 " <th>emm</th>\n",
275 " <th>ind_ban</th>\n",
276 " <th>isin</th>\n",
277 " <th>...</th>\n",
278 " <th>tejinm1_e</th>\n",
279 " <th>tejico2</th>\n",
280 " <th>tejinm2_c</th>\n",
281 " <th>tejinm2_e</th>\n",
282 " <th>tejico3</th>\n",
283 " <th>tejinm3_c</th>\n",
284 " <th>tejinm3_e</th>\n",
285 " <th>tejico4</th>\n",
286 " <th>tejinm4_c</th>\n",
287 " <th>tejinm4_e</th>\n",
288 " </tr>\n",
289 " </thead>\n",
290 " <tbody>\n",
291 " <tr>\n",
292 " <th>0</th>\n",
293 " <td>1301</td>\n",
294 " <td>2019-01-01</td>\n",
295 " <td>TSE</td>\n",
296 " <td>1964-07-27</td>\n",
297 " <td>1301</td>\n",
298 " <td>13</td>\n",
299 " <td>03</td>\n",
300 " <td>12</td>\n",
301 " <td>ID 75708007</td>\n",
302 " <td>TW0001301000</td>\n",
303 " <td>...</td>\n",
304 " <td>Plastics</td>\n",
305 " <td>M13A</td>\n",
306 " <td>石化</td>\n",
307 " <td>Petrochemical</td>\n",
308 " <td>M13A1</td>\n",
309 " <td>泛用塑膠</td>\n",
310 " <td>Extensive Plastics</td>\n",
311 " <td>M1300</td>\n",
312 " <td>塑膠工業</td>\n",
313 " <td>Plastics</td>\n",
314 " </tr>\n",
315 " </tbody>\n",
316 "</table>\n",
317 "<p>1 rows × 150 columns</p>\n",
318 "</div>"
319 ],
320 "text/plain": [
321 " coid mdate mkt elist_day1 tseid ind ind1 emm ind_ban \\\n",
322 "0 1301 2019-01-01 TSE 1964-07-27 1301 13 03 12 ID 75708007 \n",
323 "\n",
324 " isin ... tejinm1_e tejico2 tejinm2_c tejinm2_e tejico3 \\\n",
325 "0 TW0001301000 ... Plastics M13A 石化 Petrochemical M13A1 \n",
326 "\n",
327 " tejinm3_c tejinm3_e tejico4 tejinm4_c tejinm4_e \n",
328 "0 泛用塑膠 Extensive Plastics M1300 塑膠工業 Plastics \n",
329 "\n",
330 "[1 rows x 150 columns]"
331 ]
332 },
333 "execution_count": 12,
334 "metadata": {},
335 "output_type": "execute_result"
336 }
337 ],
338 "source": [
339 "df"
340 ]
341 },
342 {
343 "cell_type": "code",
344 "execution_count": 71,
345 "metadata": {},
346 "outputs": [],
347 "source": [
348 "# 將部份所需要的 Column 截出來,建 Derived Table\n",
349 "short = '''select coid\n",
350 ",fnamec\n",
351 ",inamec\n",
352 ",mkt\n",
353 ",tejinm2_c\n",
354 ",tejinm3_c\n",
355 ",tejinm4_c\n",
356 ",tejico2\n",
357 ",tejico3\n",
358 ",tejico4\n",
359 "from TEJ_STOCK_DATA\n",
360 "where mkt in ('TSE','OTC')\n",
361 ";'''\n",
362 "df = pd.read_sql_query(short, con=engine)"
363 ]
364 },
365 {
366 "cell_type": "code",
367 "execution_count": 76,
368 "metadata": {},
369 "outputs": [],
370 "source": [
371 "df.rename(columns={'coid':'ID'\n",
372 " ,'fnamec':'F_NAME' \n",
373 " ,'inamec':'NAME'\n",
374 " ,'mkt':'MKT'\n",
375 " ,'tejinm2_c':'INDUSTRY_NAME_1'\n",
376 " ,'tejinm3_c':'INDUSTRY_NAME_2'\n",
377 " ,'tejinm4_c':'TSE_INDUSTRY_NAME'\n",
378 " ,'tejico2':'INDUSTRY_ID_1' \n",
379 " ,'tejico3':'INDUSTRY_ID_2'\n",
380 " ,'tejico4':'TSE_INDUSTRY_ID'}, \n",
381 " inplace=True)"
382 ]
383 },
384 {
385 "cell_type": "code",
386 "execution_count": 77,
387 "metadata": {},
388 "outputs": [
389 {
390 "data": {
391 "text/html": [
392 "<div>\n",
393 "<style scoped>\n",
394 " .dataframe tbody tr th:only-of-type {\n",
395 " vertical-align: middle;\n",
396 " }\n",
397 "\n",
398 " .dataframe tbody tr th {\n",
399 " vertical-align: top;\n",
400 " }\n",
401 "\n",
402 " .dataframe thead th {\n",
403 " text-align: right;\n",
404 " }\n",
405 "</style>\n",
406 "<table border=\"1\" class=\"dataframe\">\n",
407 " <thead>\n",
408 " <tr style=\"text-align: right;\">\n",
409 " <th></th>\n",
410 " <th>ID</th>\n",
411 " <th>F_NAME</th>\n",
412 " <th>NAME</th>\n",
413 " <th>MKT</th>\n",
414 " <th>INDUSTRY_NAME_1</th>\n",
415 " <th>INDUSTRY_NAME_2</th>\n",
416 " <th>TSE_INDUSTRY_NAME</th>\n",
417 " <th>INDUSTRY_ID_1</th>\n",
418 " <th>INDUSTRY_ID_2</th>\n",
419 " <th>TSE_INDUSTRY_ID</th>\n",
420 " </tr>\n",
421 " </thead>\n",
422 " <tbody>\n",
423 " <tr>\n",
424 " <th>0</th>\n",
425 " <td>9962</td>\n",
426 " <td>有益鋼鐵</td>\n",
427 " <td>有益</td>\n",
428 " <td>OTC</td>\n",
429 " <td>金屬基本</td>\n",
430 " <td>不銹鋼</td>\n",
431 " <td>鋼鐵工業</td>\n",
432 " <td>M20A</td>\n",
433 " <td>M20A6</td>\n",
434 " <td>M2000</td>\n",
435 " </tr>\n",
436 " <tr>\n",
437 " <th>1</th>\n",
438 " <td>9960</td>\n",
439 " <td>邁達康網路事業</td>\n",
440 " <td>邁達康</td>\n",
441 " <td>OTC</td>\n",
442 " <td>百貨批發</td>\n",
443 " <td>百貨購物中</td>\n",
444 " <td>貿易百貨</td>\n",
445 " <td>M29A</td>\n",
446 " <td>M29A1</td>\n",
447 " <td>M2900</td>\n",
448 " </tr>\n",
449 " <tr>\n",
450 " <th>2</th>\n",
451 " <td>9958</td>\n",
452 " <td>世紀鋼鐵結構</td>\n",
453 " <td>世紀鋼</td>\n",
454 " <td>TSE</td>\n",
455 " <td>金屬基本</td>\n",
456 " <td>鋼筋</td>\n",
457 " <td>鋼鐵工業</td>\n",
458 " <td>M20A</td>\n",
459 " <td>M20A2</td>\n",
460 " <td>M2000</td>\n",
461 " </tr>\n",
462 " <tr>\n",
463 " <th>3</th>\n",
464 " <td>9955</td>\n",
465 " <td>佳龍科技工程</td>\n",
466 " <td>佳龍</td>\n",
467 " <td>TSE</td>\n",
468 " <td>資源回收</td>\n",
469 " <td>貴金屬回收</td>\n",
470 " <td>其他</td>\n",
471 " <td>M99L</td>\n",
472 " <td>M99L1</td>\n",
473 " <td>M9900</td>\n",
474 " </tr>\n",
475 " <tr>\n",
476 " <th>4</th>\n",
477 " <td>9951</td>\n",
478 " <td>皇田工業</td>\n",
479 " <td>皇田</td>\n",
480 " <td>OTC</td>\n",
481 " <td>汽車組件</td>\n",
482 " <td>汽車組件</td>\n",
483 " <td>電機機械</td>\n",
484 " <td>M15D</td>\n",
485 " <td>M15D</td>\n",
486 " <td>M1500</td>\n",
487 " </tr>\n",
488 " </tbody>\n",
489 "</table>\n",
490 "</div>"
491 ],
492 "text/plain": [
493 " ID F_NAME NAME MKT INDUSTRY_NAME_1 INDUSTRY_NAME_2 TSE_INDUSTRY_NAME \\\n",
494 "0 9962 有益鋼鐵 有益 OTC 金屬基本 不銹鋼 鋼鐵工業 \n",
495 "1 9960 邁達康網路事業 邁達康 OTC 百貨批發 百貨購物中 貿易百貨 \n",
496 "2 9958 世紀鋼鐵結構 世紀鋼 TSE 金屬基本 鋼筋 鋼鐵工業 \n",
497 "3 9955 佳龍科技工程 佳龍 TSE 資源回收 貴金屬回收 其他 \n",
498 "4 9951 皇田工業 皇田 OTC 汽車組件 汽車組件 電機機械 \n",
499 "\n",
500 " INDUSTRY_ID_1 INDUSTRY_ID_2 TSE_INDUSTRY_ID \n",
501 "0 M20A M20A6 M2000 \n",
502 "1 M29A M29A1 M2900 \n",
503 "2 M20A M20A2 M2000 \n",
504 "3 M99L M99L1 M9900 \n",
505 "4 M15D M15D M1500 "
506 ]
507 },
508 "execution_count": 77,
509 "metadata": {},
510 "output_type": "execute_result"
511 }
512 ],
513 "source": [
514 "df.head()"
515 ]
516 },
517 {
518 "cell_type": "code",
519 "execution_count": 78,
520 "metadata": {},
521 "outputs": [],
522 "source": [
523 "engine.execute('drop table IF EXISTS TEJ_STOCK_DATA_BASIC;')\n",
524 "df.to_sql(name='TEJ_STOCK_DATA_BASIC', con=engine, if_exists='append', index=False)"
525 ]
526 }
527 ],
528 "metadata": {
529 "kernelspec": {
530 "display_name": "Python 3",
531 "language": "python",
532 "name": "python3"
533 },
534 "language_info": {
535 "codemirror_mode": {
536 "name": "ipython",
537 "version": 3
538 },
539 "file_extension": ".py",
540 "mimetype": "text/x-python",
541 "name": "python",
542 "nbconvert_exporter": "python",
543 "pygments_lexer": "ipython3",
544 "version": "3.6.4"
545 }
546 },
547 "nbformat": 4,
548 "nbformat_minor": 2
549}