· 6 years ago · Jun 18, 2019, 06:40 PM
1'use strict';
2
3var crypto = require('crypto');
4var http = require('http');
5var url = require('url');
6// `bigJs` is used for number-precision when summing the bitFlag values
7var bigJs = require('big.js');
8var jsonexport = require('jsonexport');
9var fs = require('fs');
10var async = require('async');
11
12// Set your expires times for several minutes into the future.
13// An expires time excessively far in the future will not be honored by the Mozscape API.
14// Divide the result of Date.now() by 1000 to make sure your result is in seconds.
15var expires = Math.floor((Date.now() / 1000)) + 300;
16var accessId = "mozscape-b57e9eef2f";
17var secretKey = "485b572b854b2d3ce5f79ecf8a2a92ca";
18
19// `bitFlagExampleValues` is a list of bitFlag values as strings that we'll
20// loop over and sum together using helper function: `sumColumnValues`
21var bitFlagExampleValues = ['67108864', '68719476736', '34359738368', '16'];
22var sumColumnValues = function(bitFlagValues) {
23 return bitFlagValues.reduce(function (accu, bitFlag) {
24 var accuValBig = new bigJs(accu);
25 var bitFlagBig = new bigJs(bitFlag);
26 var bigSum = accuValBig.plus(bitFlagBig);
27
28 return bigSum.toString();
29 }, 0);
30};
31
32// 'cols' is the sum of the bit flags representing each field you want returned.
33// Learn more here: https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
34// returns "144115291155070976"
35var cols = sumColumnValues(bitFlagExampleValues);
36
37// Put each parameter on a new line.
38var stringToSign = accessId + "\n" + expires;
39
40//create the hmac hash and Base64-encode it.
41var signature = crypto.createHmac('sha1', secretKey).update(stringToSign).digest('base64');
42//URL-encode the result of the above.
43signature = encodeURIComponent(signature);
44
45var domains = [];
46
47var lineReader = require('readline').createInterface({
48 input: require('fs').createReadStream('links')
49});
50
51lineReader.on('line', function (line) {
52 console.log(line)
53 var regex = /(http|https):\/\/(\w+:{0,1}\w*)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
54 if(!regex .test(line)) {
55 line = 'http://' + line;
56 domains.push(line);
57 } else {
58 domains.push(line);
59 }
60});
61
62lineReader.on('error',function(err){
63 console.log(err);
64})
65
66lineReader.on('close',function(){
67 domains = domains
68 .filter((m) => {return url.parse(m).host})
69 .map(function(m){
70 return url.parse(m).host.replace(/^www./, "");
71 });
72 var uniqueDomains = domains.filter(function(elem, index, self) {
73 return (index == self.indexOf(elem) && !/\.html$/.test(elem)) && /^[a-zA-Z0-9][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]\.[a-zA-Z]{2,}$/.test(elem);
74 });
75
76 var result = [];
77 var availableD = [];
78
79
80 while(uniqueDomains.length) {
81 result.push(uniqueDomains.splice(0,10));
82 }
83
84
85 async.eachLimit(result,1,function(host10,cb){
86 console.log("Bulk finding...");
87 var postData = JSON.stringify(host10);
88 var options = {
89 hostname: 'lsapi.seomoz.com',
90 path: '/linkscape/url-metrics/?Cols=' +
91 cols + '&AccessID=' + accessId +
92 '&Expires=' + expires + '&Signature=' + signature,
93 method: 'POST',
94 headers: {
95 'Content-Type': 'application/json',
96 'Content-Length': postData.length
97 }
98 };
99 var responseData = "";
100
101 var req = http.request(options, function(response){
102 response.setEncoding('utf8');
103 response.on('data', function(chunk){
104 responseData += chunk;
105 });
106
107 response.on('end', function(){
108 var data = JSON.parse(responseData);
109 jsonexport(data,function(err, csv){
110 if(err) return console.log(err);
111 availableD = availableD.concat(csv);
112 setTimeout(function(){
113 cb(null);
114 },15000);
115 });
116 });
117
118 response.on('error',function(d){
119 console.log(d);
120 })
121
122 });
123
124 req.write(postData);
125 req.end();
126},function(err,result){
127 if (err){
128 console.log(err);
129 }
130 fs.writeFileSync('result.csv',availableD);
131 console.log('mining completed...');
132})
133
134})