· 4 years ago · Mar 17, 2021, 07:58 PM
1import asyncio
2from dataclasses import dataclass
3from itertools import repeat
4from typing import Literal, Optional, Union, Dict, List, Tuple, Any
5from aiohttp import ClientTimeout, ClientSession, ContentTypeError, ClientRequest
6from aiohttp.client import DEFAULT_TIMEOUT
7from aiosocksy import Socks5Auth
8from aiosocksy.connector import ProxyConnector, ProxyClientRequest
9
10
11@dataclass(frozen=True)
12class Response:
13 status_code: int
14 response_data: Union[
15 Optional[Union[dict, str, bytes, bytearray]]
16 ]
17
18
19@dataclass()
20class CredentialService:
21 login: str
22 password: str
23 service_type: Literal['SOCKS5', 'SOCKS4'] = 'SOCKS5'
24 proxy_auth: Optional[Socks5Auth] = None
25 socks_url: Optional[str] = None
26 ip_address: Optional[str] = None
27
28 def get_proxy(self) -> Dict[str, Union[str, Socks5Auth]]:
29 if not isinstance(self.proxy_auth, Socks5Auth):
30 self.proxy_auth = Socks5Auth(
31 login=self.login,
32 password=self.password
33 )
34
35 self.socks_url = '{socks_type}://{ip_address}'.format(
36 socks_type=self.service_type.lower(),
37 ip_address=self.ip_address
38 )
39 return dict(
40 proxy_auth=self.proxy_auth,
41 proxy=self.socks_url
42 )
43
44
45class RequestAuthError(Exception):
46 """
47 Ошибка при неправильной аунтефикации POST or GET data
48
49 """
50
51
52class HttpBase(object):
53 """
54 Class, which include abstract methods of parser
55
56 """
57
58
59class HttpXParser(HttpBase):
60 """
61 Парсер для django сайта, собирает дополнительную информацию
62
63 """
64 _sleep_time = 2
65
66 def __init__(self):
67 self._base_headers = {
68 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
69 'Accept-Language': "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
70 # 'Accept': 'application/json, text/plain, */*',
71 }
72 self._session: Optional[ClientSession] = None
73 self.url = 'http://127.0.0.1/api/'
74 self._timeout = ClientTimeout(total=2 * 15, connect=None, sock_connect=5, sock_read=None)
75 self._connector: Optional[ProxyConnector] = None
76
77 async def _request(
78 self,
79 url: Optional[str] = None,
80 get_json: bool = False,
81 validate_django: bool = False,
82 method: Literal['POST', 'GET'] = 'POST',
83 set_timeout: bool = True,
84 proxy: Optional[CredentialService] = None,
85 data: Optional[Dict[str, Union[str, int, List[Union[str, int]]]]] = None,
86 headers: Optional[Dict[str, Union[str, int]]] = None,
87 session: Optional[ClientSession] = None,
88 **client_kwargs) -> Response:
89 """
90 Метод для отправки запроса
91
92 :param url: ссылка, куда вы хотите отправить ваш запрос
93 :param get_json: указывает на то, хотите ли вы получить ответ в формате json
94 :param method: POST or GET(тип запроса)
95 :param data:
96 :param headers:
97 :param session: object of aiohttp.ClientSession
98 :param client_kwargs: key/value for aiohttp.ClientSession initialization
99 :return: Response instance
100 """
101
102 if isinstance(headers, dict):
103 headers = headers.update(self._base_headers)
104 else:
105 headers = self._base_headers
106 if isinstance(proxy, CredentialService):
107 self._connector = ProxyConnector(verify_ssl=False)
108 self.request_class = ProxyClientRequest
109
110 try:
111 proxy_kwargs = proxy.get_proxy()
112 except AttributeError:
113 proxy_kwargs = {}
114 print(proxy.__dict__)
115 if not isinstance(session, ClientSession):
116 async with ClientSession(
117 timeout=self._timeout if set_timeout else DEFAULT_TIMEOUT,
118 connector=self._connector,
119 request_class=self.request_class if isinstance(proxy, CredentialService) else ClientRequest,
120 **client_kwargs
121 ) as session:
122 self._session = session
123 response = await self._session.request(
124 method=method,
125 url=self.url if not url else url,
126 data=self._set_auth(data) if validate_django else data,
127 headers=headers,
128 **proxy_kwargs if proxy_kwargs != {} else proxy_kwargs
129 )
130 try:
131 data = await response.json(
132 content_type="application/json"
133 )
134 except ContentTypeError as ex:
135 if get_json:
136 raise RequestAuthError() from ex
137 data = await response.read()
138 return Response(
139 status_code=response.status,
140 response_data=data
141 )
142
143 @staticmethod
144 def _set_auth(
145 data: Optional[
146 Dict[str, Union[str, int, List[Union[str, int]], Tuple[Union[str, int]]]]
147 ] = None) -> Optional[Dict[str, str]]:
148 """
149 Метод валидации для джанго апи
150
151 :param data: It must be dict(your headers or data)
152 :return: validated data or headers
153 """
154 try:
155 from djangoProject.settings import SECRET_KEY, SECRET_CODE
156 except ImportError:
157 SECRET_KEY = None
158 SECRET_CODE = None
159 if not isinstance(data, dict):
160 data = {}
161 data.update(
162 {
163 'SECRET_KEY': SECRET_KEY,
164 'SECRET_CODE': SECRET_CODE
165 }
166 )
167 return data
168
169 async def fetch(self, *, times: int = 10, **kwargs) -> Optional[list]:
170 """
171
172 :param times: int of quantity requests
173 :param kwargs: HttpXParser._request kwargs
174 :return:
175 """
176 results = []
177 coroutines = [self._request(**kwargs) for _ in repeat(None, times)]
178 for future in asyncio.as_completed(fs=coroutines):
179 results.append(await future)
180 # print([result.response_data for result in results])
181 return results
182
183 def fast(self):
184 """
185 Method to fetching faster with using faster event loop(uvloop)
186
187 :return:
188 """
189 try:
190 from uvloop import EventLoopPolicy
191 asyncio.set_event_loop_policy(EventLoopPolicy())
192 except ImportError:
193 from asyncio import AbstractEventLoopPolicy as EventLoopPolicy
194 asyncio.set_event_loop_policy(EventLoopPolicy())
195 "Catching import error and forsake standard policy"
196 return self
197
198 def __getattr__(self, item: Any) -> Any:
199 """
200 Method, which can get an attribute of base_headers by this method
201
202 :param item: key name of _base_headers dict data
203 :return:
204 """
205 try:
206 return self._base_headers.get(item)
207 except KeyError:
208 """Returning None"""
209
210 def __eq__(self, other: Any) -> bool:
211 """
212 Method to compare instances of parsers
213
214 :param other: other object
215 :return: bool
216 """
217 if isinstance(other, self.__class__):
218 if other.url == self.url and other._base_headers == self._base_headers:
219 return True
220 return False
221
222 def __setitem__(self, key, value) -> None:
223 """
224
225 :param key: key of base_headers dict
226 :param value: value of base_headers dict
227 :return: None
228 """
229 self._base_headers.update(
230 {key: value}
231 )
232
233
234if __name__ == '__main__':
235 parser = HttpXParser()
236 parser['Product'] = 'True'
237 asyncio.run(parser.fast().fetch(times=1, url='https://ek.ua/', proxy=CredentialService(
238 login='6TA3h0',
239 password='3qHCjh',
240 ip_address='91.241.47.240:8000'
241 )))
242
243 # asyncio.run(parser.fast().fetch(1))
244