· 4 years ago · Aug 18, 2021, 08:38 AM
1#created dEN5#7360 (DISCORD)
2#USE https://curl.trillworks.com/
3
4import requests
5import json
6from bs4 import BeautifulSoup as bs
7
8type_img_d= {
9 "gif":"gifan",
10 "png":"png",
11 "jpg":"jpg"
12
13}
14
15type_img_size= {
16 "Большие":"large",
17 "Средние":"medium",
18 "Маленькие":"small"
19
20}
21
22def get_req_img_whith_yandex(query_mn,start_=0,limit=1,type_="choice",add_page = True):
23 img_size,type_img,recent = False,False,False
24 headers = {
25 'authority': 'yandex.ru',
26 'sec-ch-ua': '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
27 'device-memory': '4',
28 'rtt': '250',
29 'sec-ch-ua-mobile': '?0',
30 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
31 'viewport-width': '791',
32 'accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
33 'x-requested-with': 'XMLHttpRequest',
34 'dpr': '1',
35 'downlink': '4.6',
36 'ect': '4g',
37 'sec-fetch-site': 'same-origin',
38 'sec-fetch-mode': 'cors',
39 'sec-fetch-dest': 'empty',
40 'referer': 'https://yandex.ru/images/search?from=tabbar&text=google%20search%20api',
41 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6,zh;q=0.5',
42 'cookie': 'yandexuid=1656658121627748886; is_gdpr=0; is_gdpr_b=CNuvQhCMPSgC; mda=0; yandex_gid=47; yuidss=1656658121627748886; ymex=1943281930.yrts.1627921930; gdpr=0; _ym_uid=1627921929389168988; font_loaded=YSv1; yabs-frequency=/5/00020000002bD0nX/xFboS9G0000eGY40/; my=YwA=; L=cgBqXEteYXkIUmFJaVV0fXZ/aXBXXlFYNzwvGUYbKAsV.1628413571.14691.359285.54149c8c4c01121b8b6e6d04fb2fb80b; yandex_login=twist.mas; i=lBKPw0MJ87nk53YzGgUCz+4kRB2Ea8Nroexu9+2ehmDVh1fiUS4cqRo3nD182MfeHmbbispw491bNV/CGOPRnBRTK2A=; Session_id=3:1628676515.5.0.1628413571717:hi1tWQ:2a.1|888287499.-1.2.1:189654856|3:239043.803375.rk3U96tP19VVQQMbau3BJvQYeNQ; sessionid2=3:1628676515.5.0.1628413571717:hi1tWQ:2a.1|888287499.-1.2.1:189654856|3:239043.803375.rk3U96tP19VVQQMbau3BJvQYeNQ; tuid=a:d8727723e3c055094b006a4226755b62a9ce317c946a8ef62467b5c6a851087b; Bismuth=1; computer=1; ys=udn.cDpNQVNURVIgVFdJU1Q%3D#vbch.2-35-0#wprid.1628846326454594-1673560982647130327-vla1-1886-vla-l7-balancer-prod-8080-BAL-65#c_chck.2975178627; lsq=google%20search%20api; _yasc=5ZmPesKO+l58urL8iml5IBOQWrpxYoRhWBWLXmGSDfwBCWHMJe4uP4O4; yp=1944917131.sp.aflt%3A1628693131#1628965240.zlgn_smrt.1#1629019508.gpauto.56_279039999999995%3A44_0139776%3A115341%3A3%3A1628846708#1660379523.p_cl.1628843522#1628965005.mcl.irflfd#1630513927.ygu.1#1943773571.udn.cDpNQVNURVIgVFdJU1Q%3D#1628960347.mct.null#1631438711.los.1#1660379380.p_sw.1628843379#1628960347.mcv.0#1628960347.szm.1%3A1366x768%3A791x625#1631438711.losc.0#1659705474.ln_tp.01#1629105260.clh.2063711#1631525140.csc.1#1628933379.nps.640319500%3Aclose',
43 }
44 list_links = []
45 list_dict = []
46 iter = 0
47 iter+=start_
48 start = time.monotonic()
49 end = float()
50 pager = []
51
52 while add_page:
53 params = [
54 ('format', 'json'),
55 ('request', '{"blocks":[{"block":"extra-content","params":{},"version":2},{"block":"serp-controller","params":{},"version":2},{"block":"serp-list_infinite_yes","params":{"initialPageNum":0},"version":2},{"block":"more_direction_next","params":{},"version":2},{"block":"gallery__items:ajax","params":{},"version":2}],"metadata":{"bundles":{"lb":"jCgK5?b*G$Xvb>:BUOR$"},"assets":{"las":"justifier-height=1;thumb-underlay=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;ca993f.0=1;d30d05.0=1;105ac6.0=1;bed1df.0=1"},"version":"0x0f74f9d0500","extraContent":{"names":["i-react-ajax-adapter"]}},"bmt":{"lb":"jCgK5?b*G$Xvb>:BUOR$"},"amt":{"las":"justifier-height=1;thumb-underlay=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;ca993f.0=1;d30d05.0=1;105ac6.0=1;bed1df.0=1"}}'),
56 ('yu', '1656658121627748886'),
57 ('p', iter),
58 ('from', 'tabbar'),
59 ('text', query_mn),
60 ('rpt', 'image'),
61 ('serpid', 'a7tbQ4lJOYyrChOZD000iQ'),
62 ('serpListType', 'horizontal'),
63 ('thumbSnippet', '0'),
64
65 ]
66 if type_img:
67 params.append(("itype",type_img_d[type_img]))
68 if recent:
69 params.append(("recent","7D"))
70 if img_size:
71 try:
72 params.append(("isize",type_img_size[img_size]))
73 except:
74 size_offset = [("isize","eq"),("iw",img_size[0]),("ih",img_size[1])]
75 for i in size_offset:
76 params.append(i)
77
78
79 response = requests.get('https://yandex.ru/images/search', headers=headers, params=params)
80 json_data = json.dumps(response.text)
81 json_without_slash = json.loads(json_data)
82 try:
83 data_json = json.loads(json_without_slash)["blocks"][2]['html']
84 except json.decoder.JSONDecodeError:
85 break
86 soup = bs(data_json, 'html.parser')
87 list_json = soup.find_all("div", class_=re.compile("serp-item serp-item_type_search serp-item_group_search serp-item_pos_.* serp-item_scale_yes justifier__item i-bem"))
88 list_links_t = []
89 for i in list_json:
90 items = i.get("data-bem")
91 item = json.loads(items)
92 serp_item = item["serp-item"]
93 list_links.append(serp_item["preview"][0]["url"])
94 list_links_t.append(serp_item["preview"][0]["url"])
95 list_dict.append(serp_item)
96 if limit>1:
97 iter+=1
98 print(iter)
99 pager.append({f"{iter}":list_links_t})
100
101 if iter==limit+start_:
102 print(iter)
103 end = time.monotonic()
104 break
105 print(len(list_links))
106 print(len(list_links),end-start)
107 if type_=="all":
108 return list_links
109 if type_=="choice":
110 return choice(list_links),len(list_links)
111 if type_=="dic_ch":
112 return choice(list_dict)
113 if type_=="p_dict":
114 return pager