· 8 years ago · Jan 18, 2018, 07:26 PM
1#!/usr/bin/env ruby
2require 'resolv'
3require 'set'
4require 'thread'
5require 'yaml'
6
7# This is a mapping from domain name to the names that are considered to be
8# "self" hosting DMARC reports. The names may be a list of names or a single
9# name. For example, if we see that the xfinity.com DMARC record contains
10# mailto records @comcast.net, we treat those as self-hosted DMARC analysis.
11SelfDomainMap = {
12 'bitly.com' => 'dmarc.bitly.net',
13 'commbank.com.au' => 'cba.com.au',
14 'deutsche-bank.de' => 'db.com',
15 'federalreserve.gov' => 'frb.gov',
16 'hi5.com' => 'tagged.com',
17 'userapi.com' => 'vk.com',
18 'washingtonpost.com' => 'washpost.com',
19 'wikipedia.org' => 'wikimedia.org',
20 'wufoo.com' => 'surveymonkey.com',
21 'xfinity.com' => %w{dmarctest.comcast.net comcast.net},
22 'yinxiang.com' => 'evernote.com',
23}
24
25# Inverse of SelfDomainMap, used to populate the mapping for properties that
26# have many domain names all pointing at the same DMARC mailto domain.
27SelfDomainReverseMap = {
28 'abilenetx.com' => %w{abilenetx.gov},
29 'airbnb.com' => %w{airbnb.ca airbnb.co.uk airbnb.com.au airbnb.de airbnb.es airbnb.fr airbnb.it airbnb.ru},
30 'cfpb.gov' => %w{consumerfinance.gov},
31 'citi.com' => %w{citibank.com citibank.co.in citibankonline.com banamex.com},
32 'cns.gov' => %w{americorps.gov vistacampus.gov presidentialserviceawards.gov},
33 'consumersentinel.gov' => %w{econsumer.gov ftccomplaintassistant.gov},
34 'corp.mail.ru' => %w{ok.ru my.com},
35 'cpsc.gov' => %w{recalls.gov},
36 'doi.gov' => %w{usgs.gov fws.gov blm.gov usbr.gov nationalmap.gov nifc.gov nps.gov sciencebase.gov indianaffairs.gov boem.gov bia.gov bsee.gov mrlc.gov fgdc.gov geomac.gov volunteer.gov osmre.gov},
37 'dol.gov' => %w{benefits.gov},
38 'ebay.com' => %w{gumtree.pl gumtree.co.za},
39 'ed.gov' => %w{studentloans.gov fafsa.gov g5.gov nationsreportcard.gov},
40 'facebook.com' => %w{messenger.com oculus.com},
41 'fcc.gov' => %w{broadbandmap.gov},
42 'fda.hhs.gov' => %w{fda.gov},
43 'fema.dhs.gov' => %w{fema.gov},
44 'fhfa.gov' => %w{harp.gov},
45 'frb.gov' => %w{ffiec.gov uscurrency.gov},
46 'ftc.gov' => %w{consumer.gov consumidor.gov},
47 'groupon.com' => %w{groupon.it groupon.co.uk groupon.fr groupon.de livingsocial.com},
48 'gsa.gov' => %w{18f.gov acquisition.gov challenge.gov data.gov eac.gov everykidinapark.gov fedramp.gov fpds.gov fsd.gov govsales.gov gsaadvantage.gov gsaauctions.gov reginfo.gov sam.gov section508.gov usa.gov},
49 'hq.dhs.gov' => %w{cbp.gov secretservice.gov},
50 'mail.house.gov' => %w{jct.gov},
51 'mail.nasa.gov' => %w{globe.gov scijinks.gov},
52 'mecknc.gov' => %w{mecklenburgcountync.gov},
53 'mercadolibre.com' => %w{mercadolivre.com.br mercadolibre.com.ar mercadolibre.com.mx mercadolibre.com.ve mercadolibre.com.co mercadolivre.com mercadopago.com mercadolibre.com.pe mercadolibre.com.uy},
54 'nrel.gov' => %w{smartgrid.gov},
55 'nsf.gov' => %w{science360.gov research.gov},
56 'ofdp.irs.gov' => %w{tax.gov irsvideos.gov},
57 'omb.gov' => %w{max.gov},
58 'orau.org' => %w{orau.gov},
59 'rambler-co.ru' => %w{rambler.ru},
60 'service.alibaba.com' => %w{taobao.com tmall.com alipay.com aliexpress.com alibaba.com alibaba-inc.com},
61 'ssa.gov' => %w{socialsecurity.gov},
62 'state.gov' => %w{america.gov foreignassistance.gov osac.gov pepfar.gov usconsulate.gov usembassy.gov usmission.gov},
63 'treasury.gov' => %w{cdfifund.gov eftps.gov financialresearch.gov fincen.gov helpwithmybank.gov moneyfactory.gov moneyfactorystore.gov mymoney.gov occ.gov treas.gov treasurydirect.gov ttb.gov ttbonline.gov usaspending.gov usmint.gov},
64 'usdoj.gov' => %w{ada.gov},
65 'yahoo-inc.com' => %w{flickr.com tumblr.com umblr.com staticflickr.com rivals.com yimg.com yahoo.net},
66 'yandex.ru' => %w{yandex.ua yandex.kz yandex.com.tr yandex.by yandex.com ya.ru postila.ru},
67 'yelp.com' => %w{yelp.ca},
68}
69
70SelfDomainReverseMap.each_pair do |mailto_domain, domains|
71 domains.each do |d|
72 SelfDomainMap[d] ||= []
73 SelfDomainMap[d] << mailto_domain
74 end
75end
76
77class DmarcAnalyzer
78 def initialize
79 @resolver = Resolv::DNS.new
80 end
81
82 def generate_report(yaml_file)
83 dmarc_data = YAML.safe_load(File.read(yaml_file))
84 dmarc_data.each_pair do |domain, record|
85 if record
86 if is_dmarc_record?(record)
87 mailtos = dmarc_mailtos(record)
88 classified = classify_mailtos(domain, mailtos).uniq
89 policy = dmarc_record_policy(record)
90 puts [domain, policy, classified.join(',')].join("\t")
91 else
92 puts [domain, 'invalid', ''].join("\t")
93 end
94 else
95 puts [domain, 'DNE', ''].join("\t")
96 end
97 end
98 end
99
100 def log_info(message)
101 STDERR.puts message
102 end
103
104 def resolve_dmarc(domain)
105 @resolver.getresource('_dmarc.' + domain,
106 Resolv::DNS::Resource::IN::TXT).strings.join('')
107 rescue Resolv::ResolvError => err
108 unless err.message.include?('DNS result has no information for')
109 raise
110 end
111 return nil
112 end
113
114 def dns_lookup_from_file(filename, out_stream=STDOUT)
115 domains = File.read(filename).split
116
117 log_info("Looking up #{domains.length} domains")
118
119 results = resolve_parallel(domains)
120
121 log_info("Finished resolving DMARC records!")
122
123 # rely on ruby hash ordering to keep these in order
124 output = {}
125 results.sort_by {|row| row.fetch(:index) }.each do |row|
126 output[row.fetch(:domain)] = row.fetch(:record)
127 end
128
129 YAML.dump(output, out_stream)
130 end
131
132 def resolve_parallel(domains, num_threads=16)
133 queue = Queue.new
134 lock = Mutex.new
135 results = []
136
137 domains.each_with_index {|d, i| queue.push([d, i]) }
138
139 threads = (0...num_threads).map do
140 Thread.new do
141 while true
142 begin
143 domain, index = queue.pop(true)
144 rescue ThreadError
145 break
146 end
147
148 log_info("#{index} #{domain}:")
149
150 resolved = resolve_dmarc(domain)
151
152 log_info("#{index} #{domain} #{resolved.inspect}")
153
154 lock.synchronize do
155 results << {domain: domain, index: index, record: resolved}
156 end
157 end
158 end
159 end
160
161 threads.each(&:join)
162
163 return results
164 end
165
166 def is_dmarc_record?(record)
167 record.downcase.start_with?('v=dmarc')
168 end
169
170 def dmarc_record_parts(record)
171 record.split(';').map(&:strip)
172 end
173
174 def dmarc_record_policy(record)
175 parts = dmarc_record_parts(record)
176 parts.grep(/^p=/).map {|p| p.split('=', 2).last }.join('/')
177 end
178
179 def dmarc_mailtos(record)
180 parts = dmarc_record_parts(record)
181 parts.grep(/^(rua|ruf)\s*=\s*/).map {|p|
182 p.scan(/mailto:([^,!]+)/).flatten
183 }.flatten
184 end
185
186 def classify_mailtos(domain, mailtos)
187 mailtos.map {|mailto| classify_mailto(domain, mailto) }
188 end
189
190 def classify_mailto(domain, mailto)
191 domain = domain.downcase
192 mailto = mailto.downcase
193
194 mailto_domain = mailto.split('@', 2).last
195 case mailto_domain
196 when 'ruf.agari.com', 'rua.agari.com'
197 'agari'
198 when 'auth.returnpath.net'
199 'returnpath'
200 when 'ag.dmarcian.com', 'fr.dmarcian.com', 'ag.dmarcian-eu.com', 'fr.dmarcian-eu.com'
201 'dmarcian'
202 when 'cyberint.com'
203 'cyberint'
204 when 'dmarc.250ok.net', 'dmarc.250ok.com', '250ok.com'
205 '250ok.com'
206 when 'labs.messagebus.com'
207 'messagebus'
208 when 'ruf.netcraft.com', 'rua.netcraft.com', 'dmarc.netcraft.com'
209 'netcraft'
210 when 'emaildefense.proofpoint.com'
211 'proofpoint'
212 when 'haspf.com'
213 'haspf'
214 when 'dmeu.easysol.net', 'easysol.net', 'dm.easysol.net'
215 'easysolutions'
216 when 'google.com'
217 'self:google'
218 when 'bounces.amazon.com'
219 if domain.start_with?('amazon.')
220 'self:amazon'
221 else
222 "unknown:#{mailto}"
223 end
224 when 'dmarc.postmarkapp.com'
225 'postmarkapp'
226 when 'rep.dmarcanalyzer.com', 'for.dmarcanalyzer.com'
227 'dmarcanalyzer'
228 when 'qiye.163.com'
229 'self:netease'
230 when 'vali.email', 'valimail.com'
231 'valimail'
232 when 'mailinblue.com', 'sendinblue.com'
233 'sendinblue'
234 when 'datafeeds.phishlabs.com'
235 'phishlabs'
236 when 'mxtoolbox.dmarc-report.com', 'forensics.dmarc-report.com', 'dmarc-report.com'
237 'mxtoolbox'
238 when 'dmarc.cyber.dhs.gov'
239 'dhs-nppd'
240 else
241 if mailto_domain.include?(domain)
242 # this heuristic is inexact but still useful
243 'self'
244 elsif SelfDomainMap.include?(domain) \
245 && Array(SelfDomainMap.fetch(domain)).include?(mailto_domain)
246 # check explicit self domain mapping
247 'self'
248 else
249 "unknown:#{mailto}"
250 end
251 end
252 end
253end
254
255def usage
256 STDERR.puts <<-EOM
257DMARC TXT record slicer and analyzer.
258
259usage: #{$0} report DNS_YAML_FILE
260 Generate a TSV report on stdout analyzing the DMARC TXT records contained in
261 DNS_YAML_FILE, which may be generated by the resolve sub command.
262
263usage: #{$0} resolve DOMAIN_LIST
264
265 Generate a YAML report on stdout getting the raw DMARC TXT records for each
266 domain listed in DOMAIN_LIST, which should be a newline separated list of
267 domain names.
268 EOM
269end
270
271def main
272 case ARGV[0]
273 when 'report'
274 d = DmarcAnalyzer.new
275 d.generate_report(ARGV.fetch(1))
276 when 'resolve'
277 d = DmarcAnalyzer.new
278 d.dns_lookup_from_file(ARGV.fetch(1))
279 when nil
280 usage
281 exit 1
282 else
283 STDERR.puts "Unknown command #{ARGV[0]}"
284 usage
285 exit 1
286 end
287end
288
289if __FILE__ == $0
290 main
291end