· 5 years ago · Sep 21, 2020, 08:16 AM
1def main():
2 params = {k: v for k, v in demisto.params().items() if v is not None}
3
4 feed_url_to_config = {
5 # TODO: Add this service once we have an indicator type of ASN
6 'https://www.spamhaus.org/drop/asndrop.txt': {
7 'indicator_type': 'ASN',
8 'indicator': {
9 'regex': r'^AS[0-9]+'
10 },
11 'fields': [
12 {
13 'asndrop_country': {
14 'regex': r'^.*;\W([a-zA-Z]+)\W+',
15 'transform': r'\1'
16 }
17 },
18 {
19 'asndrop_org': {
20 'regex': r'^.*\|\W+(.*)',
21 'transform': r'\1'
22 }
23 }
24 ]
25 },
26 'https://www.spamhaus.org/drop/drop.txt': {
27 'indicator_type': FeedIndicatorType.CIDR,
28 'indicator': {
29 'regex': r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}/[0-9]{1,2}'
30 }
31 },
32 'https://www.spamhaus.org/drop/edrop.txt': {
33 'indicator_type': FeedIndicatorType.CIDR,
34 'indicator': {
35 'regex': r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}/[0-9]{1,2}'
36 }
37 }
38 }
39
40 params['feed_url_to_config'] = feed_url_to_config
41
42 # Call the main execution of the HTTP API module.
43 feed_main('Spamhaus Feed', params, 'spamhaus')
44
45
46
47### GENERATED CODE ###
48# This code was inserted in place of an API module.
49
50
51
52''' IMPORTS '''
53import urllib3
54import requests
55import traceback
56from dateutil.parser import parse
57from typing import Optional, Pattern, List
58
59# disable insecure warnings
60urllib3.disable_warnings()
61
62''' GLOBALS '''
63TAGS = 'feedTags'
64
65
66class Client(BaseClient):
67 def __init__(self, url: str, feed_name: str = 'http', insecure: bool = False, credentials: dict = None,
68 ignore_regex: str = None, encoding: str = None, indicator_type: str = '',
69 indicator: str = '', fields: str = '{}', feed_url_to_config: dict = None, polling_timeout: int = 20,
70 headers: dict = None, proxy: bool = False, custom_fields_mapping: dict = None, **kwargs):
71 """Implements class for miners of plain text feeds over HTTP.
72 **Config parameters**
73 :param: url: URL of the feed.
74 :param: polling_timeout: timeout of the polling request in seconds.
75 Default: 20
76 :param feed_name: The name of the feed.
77 :param: custom_fields_mapping: Dict, the "fields" to be used in the indicator - where the keys
78 are the *current* keys of the fields returned feed data and the *values* are the *indicator fields in Demisto*.
79 :param: headers: dict, Optional list of headers to send in the request.
80 :param: ignore_regex: Python regular expression for lines that should be
81 ignored. Default: *null*
82 :param: insecure: boolean, if *false* feed HTTPS server certificate is
83 verified. Default: *false*
84 :param credentials: username and password used for basic authentication.
85 Can be also used as API key header and value by specifying _header in the username field.
86 :param: encoding: encoding of the feed, if not UTF-8. See
87 ``str.decode`` for options. Default: *null*, meaning do
88 nothing, (Assumes UTF-8).
89 :param: indicator_type: Default indicator type
90 :param: indicator: an *extraction dictionary* to extract the indicator from
91 the line. If *null*, the text until the first whitespace or newline
92 character is used as indicator. Default: *null*
93 :param: fields: a dictionary of *extraction dictionaries* to extract
94 additional attributes from each line. Default: {}
95 :param: feed_url_to_config: For each service, a dictionary to process indicators by.
96 For example, ASN feed:
97 'https://www.spamhaus.org/drop/asndrop.txt': {
98 'indicator_type': ASN,
99 'indicator': { (Regex to extract the indicator by, if empty - the whole line is extracted)
100 'regex': r'^AS[0-9]+',
101 },
102 'fields': [{ (See Extraction dictionary below)
103 'asndrop_country': {
104 'regex': '^.*;\\W([a-zA-Z]+)\\W+',
105 'transform: r'\1'
106 }
107 }]
108 }
109 :param: proxy: Use proxy in requests.
110 **Extraction dictionary**
111 Extraction dictionaries contain the following keys:
112 :regex: Python regular expression for searching the text.
113 :transform: template to generate the final value from the result
114 of the regular expression. Default: the entire match of the regex
115 is used as extracted value.
116 See Python `re <https://docs.python.org/2/library/re.html>`_ module for
117 details about Python regular expressions and templates.
118 Example:
119 Example config in YAML where extraction dictionaries are used to
120 extract the indicator and additional fields::
121 url: https://www.dshield.org/block.txt
122 ignore_regex: "[#S].*"
123 indicator:
124 regex: '^([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})\\t([0-9]
125 {1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})'
126 transform: '\\1-\\2'
127 fields:
128 dshield_nattacks:
129 regex: '^.*\\t.*\\t[0-9]+\\t([0-9]+)'
130 transform: '\\1'
131 dshield_name:
132 regex: '^.*\\t.*\\t[0-9]+\\t[0-9]+\\t([^\\t]+)'
133 transform: '\\1'
134 dshield_country:
135 regex: '^.*\\t.*\\t[0-9]+\\t[0-9]+\\t[^\\t]+\\t([A-Z]+)'
136 transform: '\\1'
137 dshield_email:
138 regex: '^.*\\t.*\\t[0-9]+\\t[0-9]+\\t[^\\t]+\\t[A-Z]+\\t(\\S+)'
139 transform: '\\1'
140 Example config in YAML where the text in each line until the first
141 whitespace is used as indicator::
142 url: https://ransomwaretracker.abuse.ch/downloads/CW_C2_URLBL.txt
143 ignore_regex: '^#'
144 """
145 super().__init__(base_url=url, verify=not insecure, proxy=proxy)
146 try:
147 self.polling_timeout = int(polling_timeout)
148 except (ValueError, TypeError):
149 raise ValueError('Please provide an integer value for "Request Timeout"')
150
151 self.headers = headers
152 self.encoding = encoding
153 self.feed_name = feed_name
154 if not credentials:
155 credentials = {}
156 self.username = None
157 self.password = None
158
159 username = credentials.get('identifier', '')
160 if username.startswith('_header:'):
161 if not self.headers:
162 self.headers = {}
163 header_field = username.split(':')
164 if len(header_field) < 2:
165 raise ValueError('An incorrect value was provided for an API key header.'
166 ' The correct value is "_header:<header_name>"')
167 header_name: str = header_field[1]
168 header_value: str = credentials.get('password', '')
169 self.headers[header_name] = header_value
170 else:
171 self.username = username
172 self.password = credentials.get('password', None)
173
174 self.indicator_type = indicator_type
175 if feed_url_to_config:
176 self.feed_url_to_config = feed_url_to_config
177 else:
178 self.feed_url_to_config = {url: self.get_feed_config(fields, indicator)}
179 self.ignore_regex: Optional[Pattern] = None
180 if ignore_regex is not None:
181 self.ignore_regex = re.compile(ignore_regex)
182
183 if custom_fields_mapping is None:
184 custom_fields_mapping = {}
185 self.custom_fields_mapping = custom_fields_mapping
186
187 def get_feed_config(self, fields_json: str = '', indicator_json: str = ''):
188 """
189 Get the feed configuration from the indicator and field JSON strings.
190 :param fields_json: JSON string of fields to extract, for example:
191 {
192 'fieldname': {
193 'regex': regex,
194 'transform': r'\1'
195 }
196 },
197 {
198 'asndrop_org': {
199 'regex': regex,
200 'transform': r'\1'
201 }
202 }
203 :param indicator_json: JSON string of the indicator to extract, for example:
204 {'regex': regex}
205 :return: The feed configuration.
206 """
207 config = {}
208 if indicator_json:
209 indicator = json.loads(indicator_json)
210 if 'regex' in indicator:
211 indicator['regex'] = re.compile(indicator['regex'])
212 else:
213 raise ValueError(f'{self.feed_name} - indicator stanza should have a regex')
214 if 'transform' not in indicator:
215 if indicator['regex'].groups > 0:
216 LOG(f'{self.feed_name} - no transform string for indicator but pattern contains groups')
217 indicator['transform'] = r'\g<0>'
218
219 config['indicator'] = indicator
220 if fields_json:
221 fields = json.loads(fields_json)
222 config['fields'] = []
223 for f, fattrs in fields.items():
224 if 'regex' in fattrs:
225 fattrs['regex'] = re.compile(fattrs['regex'])
226 else:
227 raise ValueError(f'{self.feed_name} - {f} field does not have a regex')
228 if 'transform' not in fattrs:
229 if fattrs['regex'].groups > 0:
230 LOG(f'{self.feed_name} - no transform string for field {f} but pattern contains groups')
231 fattrs['transform'] = r'\g<0>'
232 config['fields'].append({
233 f: fattrs
234 })
235
236 return config
237
238 def build_iterator(self, **kwargs):
239 """
240 For each URL (service), send an HTTP request to get indicators and return them after filtering by Regex
241 :param kwargs: Arguments to send to the HTTP API endpoint
242 :return: List of indicators
243 """
244 kwargs['stream'] = True
245 kwargs['verify'] = self._verify
246 kwargs['timeout'] = self.polling_timeout
247
248 if self.headers is not None:
249 kwargs['headers'] = self.headers
250
251 if self.username is not None and self.password is not None:
252 kwargs['auth'] = (self.username, self.password)
253 try:
254 urls = self._base_url
255 url_to_response_list: List[dict] = []
256 if not isinstance(urls, list):
257 urls = [urls]
258 for url in urls:
259 r = requests.get(
260 url,
261 **kwargs
262 )
263 try:
264 r.raise_for_status()
265 except Exception:
266 LOG(f'{self.feed_name!r} - exception in request:'
267 f' {r.status_code!r} {r.content!r}')
268 raise
269 url_to_response_list.append({url: r})
270 except requests.ConnectionError:
271 raise requests.ConnectionError('Failed to establish a new connection. Please make sure your URL is valid.')
272
273 results = []
274 for url_to_response in url_to_response_list:
275 for url, lines in url_to_response.items():
276 result = lines.iter_lines()
277 if self.encoding is not None:
278 result = map(
279 lambda x: x.decode(self.encoding).encode('utf_8'),
280 result
281 )
282 else:
283 result = map(
284 lambda x: x.decode('utf_8'),
285 result
286 )
287 if self.ignore_regex is not None:
288 result = filter(
289 lambda x: self.ignore_regex.match(x) is None, # type: ignore[union-attr]
290 result
291 )
292 results.append({url: result})
293 return results
294
295 def custom_fields_creator(self, attributes: dict):
296 created_custom_fields = {}
297 for attribute in attributes.keys():
298 if attribute in self.custom_fields_mapping.keys() or attribute == TAGS:
299 if attribute == TAGS:
300 created_custom_fields[attribute] = attributes[attribute]
301 else:
302 created_custom_fields[self.custom_fields_mapping[attribute]] = attributes[attribute]
303
304 return created_custom_fields
305
306
307def datestring_to_millisecond_timestamp(datestring):
308 date = parse(str(datestring))
309 return int(date.timestamp() * 1000)
310
311
312def get_indicator_fields(line, url, feed_tags: list, client: Client):
313 """
314 Extract indicators according to the feed type
315 :param line: The current line in the feed
316 :param url: The feed URL
317 :param client: The client
318 :param feed_tags: The indicator tags.
319 :return: The indicator
320 """
321 attributes = None
322 value: str = ''
323 indicator = None
324 fields_to_extract = []
325 feed_config = client.feed_url_to_config.get(url, {})
326 if feed_config:
327 if 'indicator' in feed_config:
328 indicator = feed_config['indicator']
329 if 'regex' in indicator:
330 indicator['regex'] = re.compile(indicator['regex'])
331 if 'transform' not in indicator:
332 indicator['transform'] = r'\g<0>'
333
334 if 'fields' in feed_config:
335 fields = feed_config['fields']
336 for field in fields:
337 for f, fattrs in field.items():
338 field = {f: {}}
339 if 'regex' in fattrs:
340 field[f]['regex'] = re.compile(fattrs['regex'])
341 if 'transform' not in fattrs:
342 field[f]['transform'] = r'\g<0>'
343 else:
344 field[f]['transform'] = fattrs['transform']
345 fields_to_extract.append(field)
346
347 line = line.strip()
348 if line:
349 extracted_indicator = line.split()[0]
350 if indicator:
351 extracted_indicator = indicator['regex'].search(line)
352 if extracted_indicator is None:
353 return attributes, value
354 if 'transform' in indicator:
355 extracted_indicator = extracted_indicator.expand(indicator['transform'])
356 attributes = {}
357 for field in fields_to_extract:
358 for f, fattrs in field.items():
359 m = fattrs['regex'].search(line)
360
361 if m is None:
362 continue
363
364 attributes[f] = m.expand(fattrs['transform'])
365
366 try:
367 i = int(attributes[f])
368 except Exception:
369 pass
370 else:
371 attributes[f] = i
372 attributes['value'] = value = extracted_indicator
373 attributes['type'] = feed_config.get('indicator_type', client.indicator_type)
374 attributes['tags'] = feed_tags
375 return attributes, value
376
377
378def fetch_indicators_command(client, feed_tags, itype, **kwargs):
379 iterators = client.build_iterator(**kwargs)
380 indicators = []
381 for iterator in iterators:
382 for url, lines in iterator.items():
383 for line in lines:
384 attributes, value = get_indicator_fields(line, url, feed_tags, client)
385 if value:
386 if 'lastseenbysource' in attributes.keys():
387 attributes['lastseenbysource'] = datestring_to_millisecond_timestamp(
388 attributes['lastseenbysource'])
389
390 if 'firstseenbysource' in attributes.keys():
391 attributes['firstseenbysource'] = datestring_to_millisecond_timestamp(
392 attributes['firstseenbysource'])
393
394 indicator_data = {
395 "value": value,
396 "type": client.feed_url_to_config.get(url, {}).get('indicator_type', itype),
397 "rawJSON": attributes,
398 }
399
400 if len(client.custom_fields_mapping.keys()) > 0 or TAGS in attributes.keys():
401 custom_fields = client.custom_fields_creator(attributes)
402 indicator_data["fields"] = custom_fields
403
404 indicators.append(indicator_data)
405 return indicators
406
407
408def get_indicators_command(client: Client, args):
409 itype = args.get('indicator_type', client.indicator_type)
410 limit = int(args.get('limit'))
411 feed_tags = args.get('feedTags')
412 indicators_list = fetch_indicators_command(client, feed_tags, itype)[:limit]
413 entry_result = camelize(indicators_list)
414 hr = tableToMarkdown('Indicators', entry_result, headers=['Value', 'Type', 'Rawjson'])
415 return hr, {}, indicators_list
416
417
418def test_module(client: Client, args):
419 if not client.feed_url_to_config:
420 indicator_type = args.get('indicator_type', demisto.params().get('indicator_type'))
421 if not FeedIndicatorType.is_valid_type(indicator_type):
422 indicator_types = []
423 for key, val in vars(FeedIndicatorType).items():
424 if not key.startswith('__') and type(val) == str:
425 indicator_types.append(val)
426 supported_values = ', '.join(indicator_types)
427 raise ValueError(f'Indicator type of {indicator_type} is not supported. Supported values are:'
428 f' {supported_values}')
429 client.build_iterator()
430 return 'ok', {}, {}
431
432
433def feed_main(feed_name, params=None, prefix=''):
434 if not params:
435 params = assign_params(**demisto.params())
436 if 'feed_name' not in params:
437 params['feed_name'] = feed_name
438 feed_tags = argToList(demisto.params().get('feedTags'))
439 client = Client(**params)
440 command = demisto.command()
441 if command != 'fetch-indicators':
442 demisto.info('Command being called is {}'.format(command))
443 if prefix and not prefix.endswith('-'):
444 prefix += '-'
445 # Switch case
446 commands: dict = {
447 'test-module': test_module,
448 f'{prefix}get-indicators': get_indicators_command
449 }
450 try:
451 if command == 'fetch-indicators':
452 indicators = fetch_indicators_command(client, feed_tags, params.get('indicator_type'))
453 # we submit the indicators in batches
454 for b in batch(indicators, batch_size=2000):
455 demisto.createIndicators(b)
456 else:
457 args = demisto.args()
458 args['feed_name'] = feed_name
459 if feed_tags:
460 args['feedTags'] = feed_tags
461 readable_output, outputs, raw_response = commands[command](client, args)
462 return_outputs(readable_output, outputs, raw_response)
463 except Exception as e:
464 err_msg = f'Error in {feed_name} integration [{e}]\nTrace\n:{traceback.format_exc()}'
465 return_error(err_msg)
466
467
468
469if __name__ == '__builtin__' or __name__ == 'builtins':
470 main()
471