· 5 years ago · Sep 29, 2020, 01:56 PM
1cells": [
2 {
3 "cell_type": "code",
4 "execution_count": 1,
5 "metadata": {},
6 "outputs": [],
7 "source": []
8 },
9 {
10 "cell_type": "markdown",
11 "metadata": {},
12 "source": [
13 "# Assignment 1"
14 ]
15 },
16 {
17 "cell_type": "code",
18 "execution_count": 16,
19 "metadata": {},
20 "outputs": [
21 {
22 "name": "stdout",
23 "output_type": "stream",
24 "text": [
25 "there are 100 values with a sum of 470520\n"
26 ]
27 }
28 ],
29 "source": [
30 "import re\n",
31 "fname=open(\"regex_sum_523065.txt\")\n",
32 "summ=0\n",
33 "count=0\n",
34 "for line in fname:\n",
35 " f=re.findall('[0-9]+',line)\n",
36 " for num in f:\n",
37 " count+=1\n",
38 " summ=summ+(int(num))\n",
39 "print (\"there are \",count,\"values with a sum of \",summ)"
40 ]
41 },
42 {
43 "cell_type": "markdown",
44 "metadata": {},
45 "source": [
46 "# Assignment 2.1"
47 ]
48 },
49 {
50 "cell_type": "code",
51 "execution_count": 17,
52 "metadata": {},
53 "outputs": [],
54 "source": [
55 "import urllib.request,urllib.parse,urllib.error\n",
56 "from bs4 import BeautifulSoup\n",
57 "import ssl\n",
58 "\n",
59 "ctx=ssl.create_default_context()\n",
60 "ctx.check_hostname=False\n",
61 "ctx.verify_mode=ssl.CERT_NONE"
62 ]
63 },
64 {
65 "cell_type": "code",
66 "execution_count": 19,
67 "metadata": {},
68 "outputs": [],
69 "source": [
70 "link= \"http://py4e-data.dr-chuck.net/comments_523067.html\"\n",
71 "html=urllib.request.urlopen(link,context=ctx).read()\n",
72 "soup=BeautifulSoup(html,'html.parser')"
73 ]
74 },
75 {
76 "cell_type": "code",
77 "execution_count": 20,
78 "metadata": {},
79 "outputs": [
80 {
81 "name": "stdout",
82 "output_type": "stream",
83 "text": [
84 "count is 50\n",
85 "sum is 2447\n"
86 ]
87 }
88 ],
89 "source": [
90 "tags=soup('span')\n",
91 "s=0\n",
92 "c=0\n",
93 "for tag in tags:\n",
94 " c+=1\n",
95 " s+=int(tag.contents[0])\n",
96 "print (\"count is \", c)\n",
97 "print (\"sum is \",s)"
98 ]
99 },
100 {
101 "cell_type": "markdown",
102 "metadata": {},
103 "source": [
104 "# Assignment 2.2 "
105 ]
106 },
107 {
108 "cell_type": "code",
109 "execution_count": 3,
110 "metadata": {},
111 "outputs": [],
112 "source": [
113 "import urllib.request,urllib.parse,urllib.error\n",
114 "from bs4 import BeautifulSoup\n",
115 "import ssl\n",
116 "import urllib\n",
117 "\n",
118 "ctx=ssl.create_default_context()\n",
119 "ctx.check_hostname=False\n",
120 "ctx.verify_mode=ssl.CERT_NONE"
121 ]
122 },
123 {
124 "cell_type": "code",
125 "execution_count": 6,
126 "metadata": {},
127 "outputs": [
128 {
129 "name": "stdout",
130 "output_type": "stream",
131 "text": [
132 "Enter URL:http://py4e-data.dr-chuck.net/known_by_Rachna.html\n",
133 "Enter count:7\n",
134 "Enter position:18\n",
135 "Retriving: http://py4e-data.dr-chuck.net/known_by_Rachna.html\n"
136 ]
137 },
138 {
139 "name": "stderr",
140 "output_type": "stream",
141 "text": [
142 "C:\\Users\\Hanan\\Anaconda3\\lib\\site-packages\\bs4\\__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system (\"lxml\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n",
143 "\n",
144 "The code that caused this warning is on line 193 of the file C:\\Users\\Hanan\\Anaconda3\\lib\\runpy.py. To get rid of this warning, change code that looks like this:\n",
145 "\n",
146 " BeautifulSoup(YOUR_MARKUP})\n",
147 "\n",
148 "to this:\n",
149 "\n",
150 " BeautifulSoup(YOUR_MARKUP, \"lxml\")\n",
151 "\n",
152 " markup_type=markup_type))\n"
153 ]
154 },
155 {
156 "name": "stdout",
157 "output_type": "stream",
158 "text": [
159 "Jazeb\n"
160 ]
161 }
162 ],
163 "source": [
164 "link=input(\"Enter URL:\")\n",
165 "count=int(input(\"Enter count:\"))\n",
166 "pos=int(input(\"Enter position:\"))\n",
167 "\n",
168 "print (\"Retriving: \",link)\n",
169 "for i in range(0,count):\n",
170 " html=urllib.request.urlopen(link).read()\n",
171 " soup=BeautifulSoup(html)\n",
172 " tags=soup('a')\n",
173 " \n",
174 " link=tags[pos-1].get('href')\n",
175 "\n",
176 "result=tags[pos-1].contents[0]\n",
177 "print (result)\n",
178 " "
179 ]
180 },
181 {
182 "cell_type": "markdown",
183 "metadata": {},
184 "source": [
185 "### Assignemt 5"
186 ]
187 },
188 {
189 "cell_type": "code",
190 "execution_count": 3,
191 "metadata": {},
192 "outputs": [
193 {
194 "name": "stdout",
195 "output_type": "stream",
196 "text": [
197 "Enter Urlhttp://py4e-data.dr-chuck.net/comments_523069.xml\n",
198 "2578\n"
199 ]
200 }
201 ],
202 "source": [
203 "import urllib\n",
204 "import json\n",
205 "import xml.etree.ElementTree as ET\n",
206 "url=input(\"Enter Url\")\n",
207 "u=urllib.request.urlopen(url)\n",
208 "data=u.read()\n",
209 "xml_data=ET.fromstring(data)\n",
210 "search_str=\"comments/comment\"\n",
211 "count_tags=xml_data.findall(search_str)\n",
212 "\n",
213 "total=0\n",
214 "for tags in count_tags:\n",
215 " c=tags.find('count')\n",
216 " total+=int(c.text)\n",
217 " \n",
218 "print(total)"
219 ]
220 },
221 {
222 "cell_type": "code",
223 "execution_count": null,
224 "metadata": {},
225 "outputs": [],
226 "source": []
227 },
228 {
229 "cell_type": "code",
230 "execution_count": null,
231 "metadata": {},
232 "outputs": [],
233 "source": []
234 },
235 {
236 "cell_type": "code",
237 "execution_count": 4,
238 "metadata": {},
239 "outputs": [
240 {
241 "name": "stdout",
242 "output_type": "stream",
243 "text": [
244 "Enter Urlhttp://py4e-data.dr-chuck.net/comments_523070.json\n",
245 "2691\n"
246 ]
247 }
248 ],
249 "source": [
250 "import urllib\n",
251 "import json\n",
252 "url=input(\"Enter Url\")\n",
253 "u=urllib.request.urlopen(url)\n",
254 "dat=u.read()\n",
255 "data=json.loads(dat)\n",
256 "\n",
257 "total=0\n",
258 "for tags in data['comments']:\n",
259 " total+=tags[\"count\"]\n",
260 "print(total)"
261 ]
262 },
263 {
264 "cell_type": "code",
265 "execution_count": null,
266 "metadata": {},
267 "outputs": [],
268 "source": []
269 },
270 {
271 "cell_type": "code",
272 "execution_count": null,
273 "metadata": {},
274 "outputs": [],
275 "source": []
276 },
277 {
278 "cell_type": "code",
279 "execution_count": 2,
280 "metadata": {},
281 "outputs": [],
282 "source": [
283 "# import urllib\n",
284 "# import json\n",
285 "# u = 'http://py4e-data.dr-chuck.net/json?'\n",
286 "# address_i= input(\"Enter address\")\n",
287 "# url=u+urllib.parse.urlencode({'address':address_i})\n",
288 "# print(\"Reteriving \",url)\n",
289 "# u=urllib.request.urlopen(url)\n",
290 "# data=u.read().decode('utf-8')\n",
291 "# print(\"retrived \",len(data), \"characters\")\n",
292 "# js=json.loads(data)\n",
293 "\n",
294 "# print (js)\n",
295 "# # place_id=js['results'][0]['place_id']\n",
296 "# # print(place_id)"
297 ]
298 },
299 {
300 "cell_type": "code",
301 "execution_count": null,
302 "metadata": {},
303 "outputs": [],
304 "source": []
305 },
306 {
307 "cell_type": "code",
308 "execution_count": null,
309 "metadata": {},
310 "outputs": [
311 {
312 "name": "stdout",
313 "output_type": "stream",
314 "text": [
315 "Enter location: Universidad de la Sabana\n",
316 "Retrieving http://py4e-data.dr-chuck.net/json?address=Universidad+de+la+Sabana&key=42\n",
317 "Retrieved 1825 characters\n",
318 "{\n",
319 " \"results\": [\n",
320 " {\n",
321 " \"access_points\": [],\n",
322 " \"address_components\": [\n",
323 " {\n",
324 " \"long_name\": \"Ch\\u00eda\",\n",
325 " \"short_name\": \"Ch\\u00eda\",\n",
326 " \"types\": [\n",
327 " \"locality\",\n",
328 " \"political\"\n",
329 " ]\n",
330 " },\n",
331 " {\n",
332 " \"long_name\": \"Ch\\u00eda\",\n",
333 " \"short_name\": \"Ch\\u00eda\",\n",
334 " \"types\": [\n",
335 " \"administrative_area_level_2\",\n",
336 " \"political\"\n",
337 " ]\n",
338 " },\n",
339 " {\n",
340 " \"long_name\": \"Cundinamarca\",\n",
341 " \"short_name\": \"Cundinamarca\",\n",
342 " \"types\": [\n",
343 " \"administrative_area_level_1\",\n",
344 " \"political\"\n",
345 " ]\n",
346 " },\n",
347 " {\n",
348 " \"long_name\": \"Colombia\",\n",
349 " \"short_name\": \"CO\",\n",
350 " \"types\": [\n",
351 " \"country\",\n",
352 " \"political\"\n",
353 " ]\n",
354 " },\n",
355 " {\n",
356 " \"long_name\": \"250001\",\n",
357 " \"short_name\": \"250001\",\n",
358 " \"types\": [\n",
359 " \"postal_code\"\n",
360 " ]\n",
361 " }\n",
362 " ],\n",
363 " \"formatted_address\": \"Ch\\u00eda, Cundinamarca, Colombia\",\n",
364 " \"geometry\": {\n",
365 " \"location\": {\n",
366 " \"lat\": 4.8615787,\n",
367 " \"lng\": -74.0325368\n",
368 " },\n",
369 " \"location_type\": \"GEOMETRIC_CENTER\",\n",
370 " \"viewport\": {\n",
371 " \"northeast\": {\n",
372 " \"lat\": 4.862927680291502,\n",
373 " \"lng\": -74.03118781970849\n",
374 " },\n",
375 " \"southwest\": {\n",
376 " \"lat\": 4.860229719708498,\n",
377 " \"lng\": -74.0338857802915\n",
378 " }\n",
379 " }\n",
380 " },\n",
381 " \"place_id\": \"ChIJ0dzQiP6HP44RpPAK-KBoDlk\",\n",
382 " \"plus_code\": {\n",
383 " \"compound_code\": \"VX68+JX Ch\\u00eda, Cundinamarca, Colombia\",\n",
384 " \"global_code\": \"67P7VX68+JX\"\n",
385 " },\n",
386 " \"types\": [\n",
387 " \"establishment\",\n",
388 " \"point_of_interest\",\n",
389 " \"university\"\n",
390 " ]\n",
391 " }\n",
392 " ],\n",
393 " \"status\": \"OK\"\n",
394 "}\n",
395 "lat 4.8615787 lng -74.0325368\n",
396 "Ch铆a, Cundinamarca, Colombia\n"
397 ]
398 }
399 ],
400 "source": [
401 "import urllib.request, urllib.parse, urllib.error\n",
402 "import json\n",
403 "import ssl\n",
404 "\n",
405 "api_key = False\n",
406 "# If you have a Google Places API key, enter it here\n",
407 "# api_key = 'AIzaSy___IDByT70'\n",
408 "# https://developers.google.com/maps/documentation/geocoding/intro\n",
409 "\n",
410 "if api_key is False:\n",
411 " api_key = 42\n",
412 " serviceurl = 'http://py4e-data.dr-chuck.net/json?'\n",
413 "else :\n",
414 " serviceurl = 'https://maps.googleapis.com/maps/api/geocode/json?'\n",
415 "\n",
416 "# Ignore SSL certificate errors\n",
417 "ctx = ssl.create_default_context()\n",
418 "ctx.check_hostname = False\n",
419 "ctx.verify_mode = ssl.CERT_NONE\n",
420 "\n",
421 "while True:\n",
422 " address = input('Enter location: ')\n",
423 " if len(address) < 1: break\n",
424 "\n",
425 " parms = dict()\n",
426 " parms['address'] = address\n",
427 " if api_key is not False: parms['key'] = api_key\n",
428 " url = serviceurl + urllib.parse.urlencode(parms)\n",
429 "\n",
430 " print('Retrieving', url)\n",
431 " uh = urllib.request.urlopen(url, context=ctx)\n",
432 " data = uh.read().decode()\n",
433 " print('Retrieved', len(data), 'characters')\n",
434 "\n",
435 " try:\n",
436 " js = json.loads(data)\n",
437 " except:\n",
438 " js = None\n",
439 "\n",
440 " if not js or 'status' not in js or js['status'] != 'OK':\n",
441 " print('==== Failure To Retrieve ====')\n",
442 " print(data)\n",
443 " continue\n",
444 "\n",
445 " print(json.dumps(js, indent=4))\n",
446 "\n",
447 " lat = js['results'][0]['geometry']['location']['lat']\n",
448 " lng = js['results'][0]['geometry']['location']['lng']\n",
449 " print('lat', lat, 'lng', lng)\n",
450 " location = js['results'][0]['formatted_address']\n",
451 " print(location)"
452 ]
453 },
454 {
455 "cell_type": "code",
456 "execution_count": 2,
457 "metadata": {},
458 "outputs": [
459 {
460 "name": "stdout",
461 "output_type": "stream",
462 "text": [
463 "HTTP/1.1 200 OK\r\n",
464 "Date: Thu, 21 May 2020 22:58:02 GMT\r\n",
465 "Server: Apache/2.4.18 (Ubuntu)\r\n",
466 "Last-Modified: Sat, 13 May 2017 11:22:22 GMT\r\n",
467 "ETag: \"1d3-54f6609240717\"\r\n",
468 "Accept-Ranges: bytes\r\n",
469 "Content-Length: 467\r\n",
470 "Cache-Control: max-age=0, no-cache, no-store, must-revalidate\r\n",
471 "Pragma: no-cache\r\n",
472 "Expires: Wed, 11 Jan 1984 05:00:00 GMT\r\n",
473 "Connection: close\r\n",
474 "Content-Type: text/plain\r\n",
475 "\r\n",
476 "Why should you learn to write programs?\n",
477 "\n",
478 "Writing programs (or programming) is a very creative \n",
479 "and rewarding activity. You can write programs for \n",
480 "many reasons, ranging from making your living to solving\n",
481 "a difficult data analysis problem to having fun to helping\n",
482 "someone else solve a problem. This book assumes that \n",
483 "everyone needs to know how to program, and that once \n",
484 "you know how to program you will figure out what you want \n",
485 "to do with your newfound skills. \n"
486 ]
487 }
488 ],
489 "source": [
490 "import socket\n",
491 "\n",
492 "mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
493 "mysock.connect(('data.pr4e.org', 80))\n",
494 "cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\\r\\n\\r\\n'.encode()\n",
495 "mysock.send(cmd)\n",
496 "\n",
497 "while True:\n",
498 " data = mysock.recv(512)\n",
499 " if len(data) < 1:\n",
500 " break\n",
501 " print(data.decode(),end='')\n",
502 "\n",
503 "mysock.close()"
504 ]
505 },
506 {
507 "cell_type": "code",
508 "execution_count": null,
509 "metadata": {},
510 "outputs": [],
511 "source": []
512 }
513 ],
514 "metadata": {
515 "kernelspec": {
516 "display_name": "Python 3",
517 "language": "python",
518 "name": "python3"
519 },
520 "language_info": {
521 "codemirror_mode": {
522 "name": "ipython",
523 "version": 3
524 },
525 "file_extension": ".py",
526 "mimetype": "text/x-python",
527 "name": "python",
528 "nbconvert_exporter": "python",
529 "pygments_lexer": "ipython3",
530 "version": "3.6.4"
531 }
532 },
533 "nbformat": 4,
534 "nbformat_minor": 2
535}