· 5 years ago · Oct 25, 2020, 06:14 PM
1/* eslint-disable prefer-const */
2
3const { utils: { log } } = require('apify');
4const {
5 isLegalUrl,
6 hasOwnProperty,
7} = require('../../utils');
8
9const ADAPTER_NAME = 'zenscrape';
10
11module.exports = ({
12 apiKey = process.env.ZENSCRAPE_API_KEY || null,
13 premium = !!process.env.ZENSCRAPE_PREMIUM || false,
14 country = process.env.ZENSCRAPE_COUNTRY || null,
15 useBrowser = false,
16 keepRequestHeaders = false,
17}) => ({
18 id: ADAPTER_NAME,
19 mogrify: (request, modifyInPlace = false) => {
20 let {
21 url,
22 method = 'GET',
23 payload = undefined,
24 } = request;
25
26 let headers = hasOwnProperty(request, 'headers')
27 ? { ...request.headers }
28 : {};
29
30 const disable = !!process.env.ZENSCRAPE_DISABLE;
31
32 if (!disable) {
33 const apiEndpoint = 'https://app.zenscrape.com/api/v1/get';
34 const mogrifiedUrl = new URL(apiEndpoint);
35
36 if (!isLegalUrl(url)) {
37 throw new Error(`Cannot mogrify request: malformed URL `
38 + `(request=${JSON.stringify(request)})`);
39 }
40
41 if (url.startsWith(apiEndpoint)) {
42 throw new Error(`Cannot mogrify an already mogrified request `
43 + `(request=${JSON.stringify(request)})`);
44 }
45
46 if (!apiKey) {
47 throw new Error('No zenscrape API key has been set '
48 + '(check your environment is properly configured)');
49 }
50
51 if (!keepRequestHeaders) {
52 headers = {};
53
54 // Headers Apify's requestAsBrowser() will add automatically if not present
55 const defaultHeaders = [
56 'User-Agent',
57 'Accept',
58 'Accept-Language',
59 'Accept-Encoding',
60 'Connection',
61 ];
62
63 // Setting a header to "undefined" causes the underlying http library used by requestAsBrowser()
64 // to strip it from the request (which is the behaviour we want in this case). Note: if debugging,
65 // remember that undefined object properties will not show in JSON.stringify() output.
66 for (const key of defaultHeaders) {
67 headers[key] = undefined;
68 }
69 }
70
71 // Try to set api key as a header
72 if (!hasOwnProperty(headers, 'apikey')) {
73 headers.apikey = apiKey;
74
75 // Fallback to setting the api key as part of the URL
76 } else {
77 mogrifiedUrl.searchParams.set('apikey', apiKey);
78 }
79
80 if (premium) mogrifiedUrl.searchParams.set('premium', 'true');
81 if (country) mogrifiedUrl.searchParams.set('location', country);
82 if (useBrowser) mogrifiedUrl.searchParams.set('render', 'true');
83 if (keepRequestHeaders) mogrifiedUrl.searchParams.set('keep_headers', 'true');
84
85 const originalUrl = url;
86 url = `${mogrifiedUrl.href}&url=${encodeURIComponent(originalUrl)}`;
87 log.debug(`Adapted request `
88 + `(from=${originalUrl}, to=${url}, adapter=${ADAPTER_NAME})`);
89 }
90
91 if (modifyInPlace) {
92 request.url = url;
93 request.headers = headers;
94 return undefined;
95 }
96
97 return {
98 url,
99 method,
100 headers,
101 payload,
102 };
103 },
104});
105