· 6 years ago · Jun 18, 2019, 06:38 PM
1'use strict';
2
3var crypto = require('crypto');
4var http = require('http');
5var url = require('url');
6// `bigJs` is used for number-precision when summing the bitFlag values
7var bigJs = require('big.js');
8var jsonexport = require('jsonexport');
9var fs = require('fs');
10var async = require('async');
11
12// Set your expires times for several minutes into the future.
13// An expires time excessively far in the future will not be honored by the Mozscape API.
14// Divide the result of Date.now() by 1000 to make sure your result is in seconds.
15var expires = Math.floor((Date.now() / 1000)) + 300;
16var accessId = "";
17var secretKey = "";
18
19// `bitFlagExampleValues` is a list of bitFlag values as strings that we'll
20// loop over and sum together using helper function: `sumColumnValues`
21var bitFlagExampleValues = ['67108864', '68719476736', '34359738368', '16'];
22var sumColumnValues = function(bitFlagValues) {
23 return bitFlagValues.reduce(function (accu, bitFlag) {
24 var accuValBig = new bigJs(accu);
25 var bitFlagBig = new bigJs(bitFlag);
26 var bigSum = accuValBig.plus(bitFlagBig);
27
28 return bigSum.toString();
29 }, 0);
30};
31
32// 'cols' is the sum of the bit flags representing each field you want returned.
33// Learn more here: https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
34// returns "144115291155070976"
35var cols = sumColumnValues(bitFlagExampleValues);
36
37// Put each parameter on a new line.
38var stringToSign = accessId + "\n" + expires;
39
40//create the hmac hash and Base64-encode it.
41var signature = crypto.createHmac('sha1', secretKey).update(stringToSign).digest('base64');
42//URL-encode the result of the above.
43signature = encodeURIComponent(signature);
44
45var domains = [];
46
47var lineReader = require('readline').createInterface({
48 input: require('fs').createReadStream('links')
49});
50
51lineReader.on('line', function (line) {
52 console.log(line)
53 var regex = /(http|https):\/\/(\w+:{0,1}\w*)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
54 if(!regex .test(line)) {
55 line = 'http://' + line;
56 domains.push(line);
57 } else {
58 domains.push(line);
59 }
60});
61
62lineReader.on('error',function(err){
63 console.log(err);
64})
65
66lineReader.on('close',function(){
67 domains = domains
68 .filter((m) => {return url.parse(m).host})
69 .map(function(m){
70 return url.parse(m).host.replace(/^www./, "");
71 });
72 var uniqueDomains = domains.filter(function(elem, index, self) {
73 return (index == self.indexOf(elem) && !/\.html$/.test(elem)) && /^[a-zA-Z0-9][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]\.[a-zA-Z]{2,}$/.test(elem);
74 });
75
76 var result = [];
77 var availableD = [];
78
79 var c = 1;
80
81 while(uniqueDomains.length && c== 1 ) {
82 result.push(uniqueDomains.splice(0,10));
83 c++;
84 }
85
86
87 async.eachLimit(result,1,function(host10,cb){
88 console.log("Bulk finding...");
89 var postData = JSON.stringify(host10);
90 var options = {
91 hostname: 'lsapi.seomoz.com',
92 path: '/linkscape/url-metrics/?Cols=' +
93 cols + '&AccessID=' + accessId +
94 '&Expires=' + expires + '&Signature=' + signature,
95 method: 'POST',
96 headers: {
97 'Content-Type': 'application/json',
98 'Content-Length': postData.length
99 }
100 };
101 var responseData = "";
102
103 var req = http.request(options, function(response){
104 response.setEncoding('utf8');
105 response.on('data', function(chunk){
106 responseData += chunk;
107 });
108
109 response.on('end', function(){
110 var data = JSON.parse(responseData);
111 jsonexport(data,function(err, csv){
112 if(err) return console.log(err);
113 availableD = availableD.concat(csv);
114 setTimeout(function(){
115 cb(null);
116 },15000);
117 });
118 });
119
120 response.on('error',function(d){
121 console.log(d);
122 })
123
124 });
125
126 req.write(postData);
127 req.end();
128},function(err,result){
129 if (err){
130 console.log(err);
131 }
132 fs.writeFileSync('result.csv',availableD);
133 console.log('mining completed...');
134})
135
136})