· 5 years ago · Oct 25, 2020, 05:08 PM
1/* eslint-disable prefer-const */
2
3const { utils: { log } } = require('apify');
4const {
5 isLegalUrl,
6 hasOwnProperty,
7} = require('../../utils');
8
9const ADAPTER_NAME = 'scrapingdog';
10
11module.exports = ({
12 apiKey = process.env.SCRAPINGDOG_API_KEY || null,
13 premium = !!process.env.SCRAPINGDOG_PREMIUM || false,
14 country = process.env.SCRAPINGDOG_COUNTRY || null,
15 useBrowser = false,
16 keepRequestHeaders = false,
17 sessionId = null,
18}) => ({
19 id: ADAPTER_NAME,
20 mogrify: (request, modifyInPlace = false) => {
21 let {
22 url,
23 method = 'GET',
24 payload = undefined,
25 } = request;
26
27 let headers = hasOwnProperty(request, 'headers')
28 ? { ...request.headers }
29 : {};
30
31 const disable = !!process.env.SCRAPINGDOG_DISABLE;
32
33 if (!disable) {
34 const apiEndpoint = 'https://api.scrapingdog.com/scrape';
35 const mogrifiedUrl = new URL(apiEndpoint);
36
37 if (!isLegalUrl(url)) {
38 throw new Error(`Cannot mogrify request: malformed URL `
39 + `(request=${JSON.stringify(request)})`);
40 }
41
42 if (url.startsWith(apiEndpoint)) {
43 throw new Error(`Cannot mogrify an already mogrified request `
44 + `(request=${JSON.stringify(request)})`);
45 }
46
47 if (!apiKey) {
48 throw new Error('No scrapingdog API key has been set '
49 + '(check your environment is properly configured)');
50 }
51
52 if (!keepRequestHeaders) {
53 headers = {};
54
55 // Headers Apify's requestAsBrowser() will add automatically if not present
56 const defaultHeaders = [
57 'User-Agent',
58 'Accept',
59 'Accept-Language',
60 'Accept-Encoding',
61 'Connection',
62 ];
63
64 // Setting a header to "undefined" causes the underlying http library used by requestAsBrowser()
65 // to strip it from the request (which is the behaviour we want in this case). Note: if debugging,
66 // remember that undefined object properties will not show in JSON.stringify() output.
67 for (const key of defaultHeaders) {
68 headers[key] = undefined;
69 }
70 }
71
72 mogrifiedUrl.searchParams.set('api_key', apiKey);
73 if (premium) mogrifiedUrl.searchParams.set('premium', 'true');
74 if (country) mogrifiedUrl.searchParams.set('country', country);
75 if (useBrowser) mogrifiedUrl.searchParams.set('dynamic', 'true');
76 if (keepRequestHeaders) mogrifiedUrl.searchParams.set('custom_headers', 'true');
77 if (sessionId && Number.isInteger(sessionId)) {
78 mogrifiedUrl.searchParams.set('session_number', sessionId);
79 }
80
81 const originalUrl = new URL(url);
82 url = `${mogrifiedUrl.href}&url=`
83 + `${originalUrl.origin}${originalUrl.pathname}${encodeURIComponent(originalUrl.search)}`;
84 log.debug(`Adapted request (from=${originalUrl.href}, to=${url}, adapter=${ADAPTER_NAME})`);
85 }
86
87 if (modifyInPlace) {
88 request.url = url;
89 request.headers = headers;
90 return undefined;
91 }
92
93 return {
94 url,
95 method,
96 headers,
97 payload,
98 };
99 },
100});
101