· 7 years ago · Dec 22, 2018, 01:36 AM
1{
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "## Creating a database for emails and Counting Email frequency: \n",
8 "\n"
9 ]
10 },
11 {
12 "cell_type": "code",
13 "execution_count": 1,
14 "metadata": {},
15 "outputs": [],
16 "source": [
17 "import sqlite3\n",
18 "import os"
19 ]
20 },
21 {
22 "cell_type": "code",
23 "execution_count": 2,
24 "metadata": {},
25 "outputs": [
26 {
27 "data": {
28 "text/plain": [
29 "'/home/zeski/Documents/Data_Science/SQL/SQL_DBS'"
30 ]
31 },
32 "execution_count": 2,
33 "metadata": {},
34 "output_type": "execute_result"
35 }
36 ],
37 "source": [
38 "os.chdir('/home/zeski/Documents/Data_Science/SQL/SQL_DBS')\n",
39 "\n",
40 "os.getcwd()"
41 ]
42 },
43 {
44 "cell_type": "code",
45 "execution_count": 16,
46 "metadata": {},
47 "outputs": [
48 {
49 "data": {
50 "text/plain": [
51 "['email.db', 'FIRSTDB.db', 'sql1.db.sqbpro']"
52 ]
53 },
54 "execution_count": 16,
55 "metadata": {},
56 "output_type": "execute_result"
57 }
58 ],
59 "source": [
60 "conn= sqlite3.connect('email.db')\n",
61 "c = conn.cursor()\n",
62 "os.listdir()\n"
63 ]
64 },
65 {
66 "cell_type": "code",
67 "execution_count": 22,
68 "metadata": {},
69 "outputs": [],
70 "source": [
71 "c.execute('''\n",
72 " CREATE TABLE IF NOT EXISTS Counts (email TEXT, count INTEGER)\n",
73 "''')\n",
74 "\n",
75 "fname= input('Enter file name: ')\n",
76 "\n",
77 "if (len(fname)<1): fname = 'mbox.txt'\n",
78 "fh = open(fname)\n",
79 "for line in fh:\n",
80 " if not line.startswith('From: '): continue\n",
81 " pieces = line.split()\n",
82 " email = pieces[1]\n",
83 " c.execute('''\n",
84 " SELECT count FROM Counts WHERE email = ? \n",
85 " ''', (email,))\n",
86 " row = c.fetchone()\n",
87 " if row is None:\n",
88 " c.execute('''\n",
89 " INSERT INTO Counts (email, count)\n",
90 " VALUES(?, 1)\n",
91 " ''', (email,))\n",
92 " else:\n",
93 " c.execute('''\n",
94 " UPDATE Counts SET count = count +1 WHERE email = ?\n",
95 " ''',(email,))\n",
96 "conn.commit()"
97 ]
98 },
99 {
100 "cell_type": "code",
101 "execution_count": 23,
102 "metadata": {},
103 "outputs": [],
104 "source": [
105 "sql = 'SELECT email, count FROM Counts ORDER BY count DESC LIMIT 10'"
106 ]
107 },
108 {
109 "cell_type": "code",
110 "execution_count": 25,
111 "metadata": {},
112 "outputs": [
113 {
114 "name": "stdout",
115 "output_type": "stream",
116 "text": [
117 "zqian@umich.edu 195\n",
118 "mmmay@indiana.edu 161\n",
119 "cwen@iupui.edu 158\n",
120 "chmaurer@iupui.edu 111\n",
121 "aaronz@vt.edu 110\n",
122 "ian@caret.cam.ac.uk 96\n",
123 "jimeng@umich.edu 93\n",
124 "rjlowe@iupui.edu 90\n",
125 "dlhaines@umich.edu 84\n",
126 "david.horwitz@uct.ac.za 67\n"
127 ]
128 }
129 ],
130 "source": [
131 "for row in c.execute(sql):\n",
132 " print(str(row[0]), row[1])"
133 ]
134 },
135 {
136 "cell_type": "code",
137 "execution_count": 27,
138 "metadata": {},
139 "outputs": [],
140 "source": [
141 "c.close()\n",
142 "conn.close()"
143 ]
144 }
145 ],
146 "metadata": {
147 "kernelspec": {
148 "display_name": "Python 3",
149 "language": "python",
150 "name": "python3"
151 },
152 "language_info": {
153 "codemirror_mode": {
154 "name": "ipython",
155 "version": 3
156 },
157 "file_extension": ".py",
158 "mimetype": "text/x-python",
159 "name": "python",
160 "nbconvert_exporter": "python",
161 "pygments_lexer": "ipython3",
162 "version": "3.6.5"
163 }
164 },
165 "nbformat": 4,
166 "nbformat_minor": 2
167}