· 6 years ago · Apr 21, 2020, 11:20 PM
1
2
3# Usage: Run python captcha.py
4# Output:
5# Requires Python 3
6
7
8# anti_captcha_secret_key.txt should hold the 32 hexadecimal character secret key from anti-captcha.com
9# NOTE: Will use 1 captcha (about 0.22 cents) per run
10
11CAPTCHA_CLIENT_KEY = None
12with open('anti_captcha_secret_key.txt', 'r', encoding='utf-8') as f:
13 CAPTCHA_CLIENT_KEY = f.read().strip()
14 if len(CAPTCHA_CLIENT_KEY) != 32:
15 raise RuntimeError('CAPTCHA_CLIENT_KEY has wrong length: %d!!!' % len(CAPTCHA_CLIENT_KEY))
16
17
18import http.client
19import json
20import time
21import re
22from urllib.parse import urlparse, parse_qs, urlencode
23
24
25def CreateTaskForCaptcha(url, siteKey):
26 conn = http.client.HTTPSConnection('api.anti-captcha.com', timeout=30.0)
27
28 postData = {}
29 postData['clientKey'] = CAPTCHA_CLIENT_KEY
30
31 taskObj = {}
32 taskObj['type'] = "NoCaptchaTaskProxyless"
33 taskObj['websiteURL'] = url
34 taskObj['websiteKey'] = siteKey
35 postData['task'] = taskObj
36
37
38 body = json.dumps(postData).encode('utf-8')
39
40 print(body)
41 conn.request('POST', '/createTask', body=body)
42
43 res = conn.getresponse()
44
45 response = json.loads(res.read().decode('utf-8', 'ignore'))
46
47 conn.close()
48
49 print(response)
50
51 if 'error' in response:
52 raise RuntimeError('Anti-captcha API error: "%s"' % response["error"])
53
54 taskID = int(response["taskId"])
55
56 return taskID
57
58
59def GetCompletedCaptcha(taskID):
60 counter = 0
61
62 while True:
63 time.sleep(6.0)
64
65 conn = http.client.HTTPSConnection('api.anti-captcha.com', timeout=30.0)
66
67 postData = {}
68 postData['clientKey'] = CAPTCHA_CLIENT_KEY
69 postData['taskId'] = taskID
70
71 body = json.dumps(postData).encode('utf-8')
72
73 print(body)
74 conn.request('POST', '/getTaskResult', body=body)
75
76 res = conn.getresponse()
77
78 response = json.loads(res.read().decode('utf-8', 'ignore'))
79
80 print(response)
81
82 if 'status' in response and response['status'] == 'ready':
83 conn.close()
84 return response["solution"]["gRecaptchaResponse"]
85 elif 'errorId' in response and response['errorId'] != 0:
86 raise RuntimeError('Getting completed captcha resulted in error %d, "%s"' % (response['errorId'], response["errorDescription"]))
87 else:
88 print('Not ready yet....')
89
90 conn.close()
91
92 counter += 1
93
94 if counter > 60:
95 raise RuntimeError('We\'ve probably messed something up, task %d cannot be completed it seems' % taskID)
96
97
98
99formActionReg = re.compile(r'<form action="([^"]*)" method="POST">')
100formInputReg = re.compile(r'<input type="[^"]*" name="([^"]*)" value="([^"]*)">')
101siteKeyReg = re.compile(r'<div class="g-recaptcha" data-sitekey="([a-zA-Z0-9\-_]*)">')
102
103def GrabDataOffCaptchaHTML(pageTxt):
104 formActionMatch = formActionReg.search(pageTxt)
105
106 if formActionMatch is None:
107 raise RuntimeError('Could not find form action!!')
108
109 formAction = formActionMatch.group(1)
110
111 formTxt = pageTxt.split('<form action="',1)[-1].split('</form>',1)[0]
112 siteKeyMatch = siteKeyReg.search(formTxt)
113 if siteKeyMatch is None:
114 raise RuntimeError('Could not find site key!!')
115
116 siteKey = siteKeyMatch.group(1)
117
118 retVal = {}
119 retVal['action'] = formAction
120 retVal['site_key'] = siteKey
121
122 inputs = {}
123 for match in formInputReg.finditer(formTxt):
124 inputName = match.group(1)
125 inputValue = match.group(2)
126 inputs[inputName] = inputValue
127
128
129 retVal['inputs'] = inputs
130
131 return retVal
132
133
134def FinishCaptcha(url, formAction, headers, formInputs):
135 conn = http.client.HTTPSConnection('www.youtube.com', timeout=30.0)
136
137 body = urlencode(formInputs).encode('utf-8')
138 headers['Content-Type'] = 'application/x-www-form-urlencoded'
139 print(body)
140 conn.request("POST", formAction, body=body, headers=headers)
141
142 res = conn.getresponse()
143
144 print('Captcha result headers:')
145 print(res.getheaders())
146
147 pageTxt = res.read().decode('utf-8', 'ignore')
148
149 cookieVals = {}
150
151 for headerName,headerValue in res.getheaders():
152 if headerName.lower() == 'set-cookie':
153 cookieParts = headerValue.split(';',1)[0].split('=')
154 cookieVals[cookieParts[0]] = cookieParts[1]
155
156 conn.close()
157
158 return cookieVals
159
160def CookieDictToString(cookieVals):
161 CookieStr = ''
162 for key,val in cookieVals.items():
163 if CookieStr != '':
164 CookieStr = CookieStr + '; '
165
166 CookieStr = CookieStr + '%s=%s' % (key, val)
167
168 return CookieStr
169
170def Test429():
171
172 # Random video, could be any
173 path = '/watch?v=n9KjA471Y3k'
174
175 print('------------Fetching watch page----------')
176 conn = http.client.HTTPSConnection('www.youtube.com', timeout=30.0)
177 conn.request("GET", path)
178
179 res = conn.getresponse()
180
181 if res.status != 429:
182 print('Not 429\'d, dont bother with captcha')
183 return
184
185 cookieVals = {}
186
187 for headerName,headerValue in res.getheaders():
188 if headerName.lower() == 'set-cookie':
189 cookieParts = headerValue.split(';',1)[0].split('=')
190 cookieVals[cookieParts[0]] = cookieParts[1]
191
192
193 CookieStr = CookieDictToString(cookieVals)
194
195 print('Initial watch page cookies: "%s"' % CookieStr)
196
197 pageTxt = res.read().decode('utf-8', 'ignore')
198
199 conn.close()
200
201 captchaData = GrabDataOffCaptchaHTML(pageTxt)
202
203 print('------------Creating task for captcha----------')
204 taskID = CreateTaskForCaptcha('https://www.youtube.com' + path, captchaData['site_key'])
205
206 print('------------Waiting for task to finish----------')
207 captchaResult = GetCompletedCaptcha(taskID)
208
209 inputs = captchaData['inputs']
210
211 inputs["g-recaptcha-response"] = captchaResult
212
213 print(inputs)
214
215 headers = {}
216 headers['Cookie'] = CookieStr
217
218 print('---------------Submitting form request to Captcha page----------')
219 newCookieVals = FinishCaptcha(path, captchaData['action'], headers, inputs)
220
221 if len(newCookieVals) == 0:
222 print('ERROR: Could not get any new cookies from the captcha form response, possibly ran consecutively too fast')
223
224 for key,val in newCookieVals.items():
225 cookieVals[key] = val
226
227 print('-----------------Output----------------')
228 print('Cookie vals: %s' % str(cookieVals))
229 print('Cookie string: %s' % CookieDictToString(cookieVals))
230
231
232# Uncomment this to actually run the test
233# Test429()