· 5 years ago · Jan 31, 2020, 04:32 PM
1
2
3import imaplib
4import gspread
5import sys
6import email
7from datetime import datetime
8from dateutil.parser import parse
9from pytz import timezone
10import pandas as pd
11import numpy as np
12from bs4 import BeautifulSoup
13import re
14from oauth2client.service_account import ServiceAccountCredentials
15from df2gspread import df2gspread as d2g
16
17#Full OTF Gmail Scraper v0.1
18#Script only works for gmail, if other email servers are needed contact /u/tuloon
19ORG_EMAIL = "@gmail.com"
20FROM_EMAIL = "XXXXXX" + ORG_EMAIL #insert username
21FROM_PWD = "XXXXX" #insert password, if 2-Step Auth. obtain app password
22SMTP_SERVER = "XXXXX"
23LocalTimeZone = "XXXX" #insert pytz timezone, google "pytz timezones" to find one close to you
24
25CredLocation = 'XXXXXX' #Local of google api client_secrect.json file. See google api guide
26GoogleSheetID = 'XXXX' #GoogleSheetId can be found in your sheet url. See guide
27GoogleSheetName = 'XXXXX' #Name of sheet in google workbook
28
29scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
30
31m = imaplib.IMAP4_SSL(SMTP_SERVER)
32
33def process_OTFWorkouts(m):
34
35 rv, data = m.search(None, '(OR (FROM "OTbeatReport@orangetheoryfitness.com") (FROM "OtBeatSummary@orangetheoryfitness.com"))')
36 if type != 'OK':
37 print "No messages found!"
38 return
39 OTFemails = data[0].split()
40 col_names = ['Date', 'Calories', 'SPs', 'AvgHRate','AvgMaxHR','Grey','Blue','Green','Orange','Red','PeakHR','MaxMaxHR','Coach','Loc','Steps','TTD','TTT','AvgS','MaxS','AvgI','MaxI','AvgP','MaxP','Elev']
41 OTF = pd.DataFrame("N/A", index = np.arange(len(OTFemails)), columns = col_names)
42
43 placer = 0
44 for emailid in OTFemails[::-1]: #Step in Reverse order as Python lists Old -> New
45 typ, data = m.fetch(emailid, '(RFC822)')
46
47 if type != 'OK':
48 print "ERROR getting message", emailid
49 return
50 msg = email.message_from_string(data[0][1])
51 INDDateTime = parse(msg['Date']).astimezone(timezone(LocalTimeZone))
52 #INDDate =INDDateTime.strftime('%m/%d/%Y')
53 Address = msg['From'].lower()
54 if Address!='otbeatreport@orangetheoryfitness.com' and Address!='orangetheory fitness <otbeatsummary@orangetheoryfitness.com>':
55 print Address
56 continue
57 #Check Duplicates
58 if placer != 0:
59 PrevDate = OTF.loc[placer-1,'Date']
60 d1 = PrevDate
61 d2 = INDDateTime
62 diffInMinutes = (d1-d2).total_seconds()/60
63 if diffInMinutes < 55:
64 print "Email Dated: " + INDDateTime.strftime("%m/%d/%Y, %H:%M") + " not recorded as duplicate to Email Dated: " + PrevDate.strftime("%m/%d/%Y, %H:%M") + ". Diff in Time = " + str(diffInMinutes) + " minutes"
65 continue
66 OTF.loc[placer,'Date'] = INDDateTime
67 if msg.is_multipart():
68 for part in msg.walk():
69 if part.get_content_charset() is None:
70 continue
71
72 charset = part.get_content_charset()
73
74 if part.get_content_type() == 'text/plain':
75 body = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
76
77 if part.get_content_type() == 'text/html':
78 body = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
79 soup = BeautifulSoup(body, 'lxml')
80 testCal = soup.select(".numbers")
81 #If Test Cal is undefined it is either 2/19/18, 6/30/16, 3/15/16, 4/25/15, or Most Current
82 if testCal:
83 if len(testCal) > 2:
84 print "Current Email"
85 OTF = getCurrentEmail(soup, OTF, placer)
86 else:
87 print "3/2/18"
88 else:
89 testspan = soup.find_all("span")
90 testspantext = testspan[0].text
91 testspantext = testspantext.strip()
92 testspantext = testspantext.lower()
93 if testspantext == "calorie burn":
94 print "4/25/15 or 6/30/16"
95 else:
96 testtd = soup.find_all("td")
97 testtdtext = testtd[4].text
98 testtdtext = testtdtext.strip()
99 testtdtext = testtdtext.lower()
100 testtdtext = testtdtext[-15:]
101 #If the 5th element of td tag is "calories burned" then it is email 02/19/18
102 if testtdtext=="calories burned":
103 print "2/19/18"
104 #Else it is email 3/15/16 or Most Current
105 else:
106 #Test for existence of bar-bumber class. If it is undefined it is email 3/15/16
107 testClass = soup.select(".bar-bumber")
108 if testClass:
109 print "Most Current"
110 OTF = getMostCurrentEmail(soup, OTF, placer)
111 else:
112 print "3/15/16"
113 placer = placer + 1
114 creds = ServiceAccountCredentials.from_json_keyfile_name('C:\PythonStuff\client_secret.json', scope)
115 d2g.upload(OTF, GoogleSheetID,GoogleSheetName,credentials=creds, row_names=False,col_names=True, start_cell = 'B1')
116
117
118
119
120def getMostCurrentEmail(email, data, loca):
121
122 #Get Calories
123
124 Main = email.select(".h1.text-gray.text-bold")
125 cal = Main[0].text
126 cal = cal.strip()
127
128 #Get Splat Points
129
130 Splat = Main[1].text
131 Splat = Splat.strip()
132
133
134 #Get Avg Heart Rate
135 AvgHRate = Main[2].text
136 AvgHRate = AvgHRate.strip()
137
138 #Get Grey Zone
139
140 Zone = email.select(".bar-bumber")
141 Grey = Zone[0].text
142 Grey = Grey.strip()
143
144
145 #Get Blue Zone
146
147 Blue = Zone[1].text
148 Blue = Blue.strip()
149
150
151 #Get Green Zone
152
153 Green = Zone[2].text;
154 Green = Green.strip()
155
156 #Get Orange Zone
157
158 Orange = Zone[3].text;
159 Orange = Orange.strip()
160
161 #Get Red Zone
162 Red = Zone[4].text;
163 Red = Red.strip()
164
165 #Get Coach
166 CoachLoc = email.select(".text-white")
167 Coach = CoachLoc[4].text
168 Coach = Coach.strip()
169
170 #Get Location
171 Location = CoachLoc[1].text
172 Location = Location.strip()
173
174 #Determine Email Type. Tread2 = Tread Info w/o Beat, Tread = Tread Infor w/ Beat, OTFS = No Tread w/o Beat, Burn = No Tread w/ Beat, Old = older version of current email format (slight change in %HR)
175 VersCheckMain = email.select(".text-bold")
176 VersCheckObject = len(VersCheckMain)
177 if VersCheckObject > 21:
178 VersCheck = "Tread2"
179 # VersCheckObject = getElementsByClassName(root, 'text-bold')[20];
180 elif VersCheckObject > 20:
181 VersCheck = "Tread"
182 elif VersCheckObject > 10:
183 VersCheck = "OTFS"
184 else:
185 VersCheck = VersCheckMain[3].text
186 if(VersCheck == ""):
187 VersCheck = "Burn"
188 else:
189 VersCheck = "Old"
190
191 if VersCheck == "Tread2":
192 #Get PeakHR
193 PeakHRate = VersCheckMain[3].text
194 PeakHRate = re.sub(r"\D", "", PeakHRate)
195 PeakHRate = PeakHRate.strip()
196 #Get AvgMaxHR
197 AvgMaxHRate = VersCheckMain[5].text
198 AvgMaxHRate = re.sub(r"\D", "", AvgMaxHRate)
199 AvgMaxHRate = AvgMaxHRate.strip()
200 #Get MaxMaxHR
201 MaxMaxHRate = VersCheckMain[6].text
202 MaxMaxHRate = re.sub(r"\D", "", MaxMaxHRate)
203 MaxMaxHRate = MaxMaxHRate.strip()
204 #Get Steps
205 Steps = ""
206 #Get TotalDistance
207 TotDistance = VersCheckMain[8].text
208 TotDistance = TotDistance.strip()
209 #Get TotalTime
210 TotTime = VersCheckMain[9].text
211 TotTime = TotTime.encode('ascii', 'ignore').decode('ascii')
212 #TotTime = re.sub("\?", "", TotTime)
213 TotTime = TotTime.strip()
214 #Get AvgSpeed
215 AvgSpeed = VersCheckMain[10].text
216 AvgSpeed = AvgSpeed.strip()
217 #Get MaxSpeed
218 MaxSpeed = VersCheckMain[11].text
219 MaxSpeed = re.sub("Max:", "", MaxSpeed)
220 MaxSpeed = MaxSpeed.strip()
221 #Get AvgIncline
222 AvgIncline = VersCheckMain[13].text
223 AvgIncline = AvgIncline.strip()
224 #Get MaxIncline
225 MaxIncline = VersCheckMain[14].text
226 MaxIncline = re.sub("Max:", "", MaxIncline)
227 MaxIncline = MaxIncline.strip()
228 #Get AvgPace
229 AvgPace = VersCheckMain[16].text
230 AvgPace = AvgPace.encode('ascii', 'ignore').decode('ascii')
231 AvgPace = AvgPace.strip()
232 #Get MaxPace
233 MaxPace = VersCheckMain[17].text
234 MaxPace = MaxPace.encode('ascii', 'ignore').decode('ascii')
235 MaxPace = re.sub("Fastest:", "", MaxPace)
236 MaxPace = MaxPace.strip()
237 #Get Elevation
238 Elevation = VersCheckMain[19].text
239 Elevation = Elevation.strip()
240 elif VersCheck == "Tread":
241 #Get PeakHR
242 PeakHRate = VersCheckMain[3].text
243 PeakHRate = re.sub(r"\D", "", PeakHRate)
244 PeakHRate = PeakHRate.strip()
245 #Get AvgMaxHR
246 AvgMaxHRate = ""
247 #Get MaxMaxHR
248 MaxMaxHRate = ""
249 #Get Steps
250 Steps = VersCheckMain[5].text
251 Steps = re.sub(r"\D", "", Steps)
252 Steps = Steps.strip()
253 #Get TotalDistance
254 TotDistance = VersCheckMain[7].text
255 TotDistance = TotDistance.strip()
256 #Get TotalTime
257 TotTime = VersCheckMain[8].text
258 TotTime = TotTime.encode('ascii', 'ignore').decode('ascii')
259 TotTime = TotTime.strip()
260 #Get AvgSpeed
261 AvgSpeed = VersCheckMain[9].text
262 AvgSpeed = AvgSpeed.strip()
263 #Get MaxSpeed
264 MaxSpeed = VersCheckMain[10].text
265 MaxSpeed = re.sub("Max:", "", MaxSpeed)
266 MaxSpeed = MaxSpeed.strip()
267 #Get AvgIncline
268 AvgIncline = VersCheckMain[12].text
269 AvgIncline = AvgIncline.strip()
270 #Get MaxIncline
271 MaxIncline = VersCheckMain[13].text
272 MaxIncline = re.sub("Max:", "", MaxIncline)
273 MaxIncline = MaxIncline.strip()
274 #Get AvgPace
275 AvgPace = VersCheckMain[15].text
276 AvgPace = AvgPace.encode('ascii', 'ignore').decode('ascii')
277 AvgPace = AvgPace.strip()
278 #Get MaxPace
279 MaxPace = VersCheckMain[16].text
280 MaxPace = MaxPace.encode('ascii', 'ignore').decode('ascii')
281 MaxPace = re.sub("Fastest:", "", MaxPace)
282 MaxPace = MaxPace.strip()
283 #Get Elevation
284 Elevation = VersCheckMain[18].text
285 Elevation = Elevation.strip()
286 elif VersCheck == "OTFS":
287 #Get PeakHR
288 PeakHRate = VersCheckMain[5].text
289 PeakHRate = re.sub(r"\D", "", PeakHRate)
290 PeakHRate = PeakHRate.strip()
291 #Get AvgMaxHR
292 AvgMaxHRate = VersCheckMain[6].text
293 AvgMaxHRate = re.sub(r"\D", "", AvgMaxHRate)
294 AvgMaxHRate = AvgMaxHRate.strip()
295 #Get MaxMaxHR
296 MaxMaxHRate = VersCheckMain[9].text
297 MaxMaxHRate = re.sub(r"\D", "", MaxMaxHRate)
298 MaxMaxHRate = MaxMaxHRate.strip()
299 #Get Steps
300 Steps = ""
301 #Get TotalDistance
302 TotDistance = ""
303 #Get TotalTime
304 TotTime = ""
305 #Get AvgSpeed
306 AvgSpeed = ""
307 #Get MaxSpeed
308 MaxSpeed = ""
309 #Get AvgIncline
310 AvgIncline = ""
311 #Get MaxIncline
312 MaxIncline = ""
313 #Get AvgPace
314 AvgPace = ""
315 #Get MaxPace
316 MaxPace = ""
317 #Get Elevation
318 Elevation = ""
319 elif VersCheck == "Burn":
320 #Get PeakHR
321 PeakHRate = VersCheckMain[5].text
322 PeakHRate = re.sub(r"\D", "", PeakHRate)
323 PeakHRate = PeakHRate.strip()
324 #Get AvgMaxHR
325 AvgMaxHR = ""
326 #Get MaxMaxHR
327 MaxMaxHR = ""
328 #Get Steps
329 Steps = VersCheckMain[6].text
330 Steps = re.sub(r"\D", "", Steps)
331 Steps = Steps.strip()
332 #Get TotalDistance
333 TotDistance = ""
334 #Get TotalTime
335 TotTime = ""
336 #Get AvgSpeed
337 AvgSpeed = ""
338 #Get MaxSpeed
339 MaxSpeed = ""
340 #Get AvgIncline
341 AvgIncline = ""
342 #Get MaxIncline
343 MaxIncline = ""
344 #Get AvgPace
345 AvgPace = ""
346 #Get MaxPace
347 MaxPace = ""
348 #Get Elevation
349 Elevation = ""
350
351 elif VersCheck == "Old":
352 #Get PeakHR
353 PeakHRate = VersCheckMain[3].text
354 PeakHRate = re.sub(r"\D", "", PeakHRate)
355 PeakHRate = PeakHRate.strip()
356 #Get AvgMaxHR
357 AvgMaxHRate = VersCheckMain[5].text
358 AvgMaxHRate = re.sub(r"\D", "", AvgMaxHRate)
359 AvgMaxHRate = AvgMaxHRate.strip()
360 #Get MaxMaxHR
361 MaxMaxHRate = VersCheckMain[6].text
362 MaxMaxHRate = re.sub(r"\D", "", MaxMaxHRate)
363 MaxMaxHRate = MaxMaxHRate.strip()
364 #Get Steps
365 Steps = ""
366 #Get TotalDistance
367 TotDistance = ""
368 #Get TotalTime
369 TotTime = ""
370 #Get AvgSpeed
371 AvgSpeed = ""
372 #Get MaxSpeed
373 MaxSpeed = ""
374 #Get AvgIncline
375 AvgIncline = ""
376 #Get MaxIncline
377 MaxIncline = ""
378 #Get AvgPace
379 AvgPace = ""
380 #Get MaxPace
381 MaxPace = ""
382 #Get Elevation
383 Elevation = ""
384
385 data.loc[loca,'Grey'] = Grey
386 data.loc[loca,'Blue'] = Blue
387 data.loc[loca,'Green'] = Green
388 data.loc[loca,'Orange'] = Orange
389 data.loc[loca,'Red'] = Red
390 data.loc[loca,'Calories'] = cal
391 data.loc[loca,'SPs'] = Splat
392 data.loc[loca,'AvgHRate'] = AvgHRate
393 data.loc[loca,'Coach'] = Coach
394 data.loc[loca,'Loc'] = Location
395 data.loc[loca,'PeakHR'] = PeakHRate
396 data.loc[loca,'AvgMaxHR'] = AvgMaxHRate
397 data.loc[loca,'MaxMaxHR'] = MaxMaxHRate
398 data.loc[loca,'Steps'] = Steps
399 data.loc[loca,'TTD'] = TotDistance
400 data.loc[loca,'TTT'] = TotTime
401 data.loc[loca,'AvgS'] = AvgSpeed
402 data.loc[loca,'MaxS'] = MaxSpeed
403 data.loc[loca,'AvgI'] = AvgIncline
404 data.loc[loca,'MaxI'] = MaxIncline
405 data.loc[loca,'AvgP'] = AvgPace
406 data.loc[loca,'MaxP'] = MaxPace
407 data.loc[loca,'Elev'] = Elevation
408 return data
409
410#Name is bad but corresponds to google script for ease of use
411def getCurrentEmail(email, data, loca):
412
413 #Get Calories
414
415 Main = email.find_all("h2", {"class": "numbers"})
416 cal = Main[0].text
417 cal = cal.strip()
418
419 #Get Splat Points
420
421 Splat = Main[2].text
422 Splat = Splat.strip()
423
424
425 #Get Avg Heart Rate
426 AvgHRate = Main[1].text
427 AvgHRate = AvgHRate.replace('avg hr','')
428 AvgHRate = AvgHRate.strip()
429
430
431 #Get AvgMaxHR
432 AvgMaxHR = Main[3].text
433 AvgMaxHR = AvgMaxHR.replace('%','')
434 AvgMaxHR = AvgMaxHR.replace('AVG MAX HR','')
435 AvgMaxHR = AvgMaxHR.strip()
436
437
438
439 #Get Grey Zone
440
441 Zone = email.find_all("p",{"class": "numbers-summary"})
442 Grey = Zone[0].text
443 Grey = Grey.strip()
444
445
446 #Get Blue Zone
447
448 Blue = Zone[1].text
449 Blue = Blue.strip()
450
451
452 #Get Green Zone
453
454 Green = Zone[2].text
455 Green = Green.strip()
456
457 #Get Orange Zone
458
459 Orange = Zone[3].text
460 Orange = Orange.strip()
461
462 #Get Red Zone
463 Red = Zone[4].text
464 Red = Red.strip()
465
466 data.loc[loca,'Grey'] = Grey
467 data.loc[loca,'Blue'] = Blue
468 data.loc[loca,'Green'] = Green
469 data.loc[loca,'Orange'] = Orange
470 data.loc[loca,'Red'] = Red
471 data.loc[loca,'Calories'] = cal
472 data.loc[loca,'SPs'] = Splat
473 data.loc[loca,'AvgHRate'] = AvgHRate
474 data.loc[loca,'AvgMaxHR'] = AvgMaxHR
475 return data
476
477try:
478 type, data = m.login(FROM_EMAIL,FROM_PWD)
479except imaplib.IMAP4.error:
480 print "LOGIN FAILED!!! "
481 sys.exit(1)
482print type, data
483
484type, data = m.select('inbox')
485
486if type == 'OK':
487 print "Processing mailbox...\n"
488 process_OTFWorkouts(m)
489 m.close()
490
491else:
492 print "ERROR: Unable to open mailbox ", type
493
494m.logout()