· 6 years ago · Jul 17, 2019, 12:23 AM
1#!/usr/bin/env python
2
3from netaddr import IPNetwork, IPAddress
4from tqdm import tqdm
5import maxminddb
6import iso3166
7
8import sqlite3
9import argparse
10import common
11import json
12import os
13import base64
14
15SCHEMA_FILE = "create_schema.sql"
16VIEwS_FILE = "create_views.sql"
17
18JSONLOG = "cowrie.json.2019-07-04"
19CITY_LOCATIONS_CSV = "GeoLite2-City-Locations-en.csv"
20GEOLITE2_CITY = "GeoLite2-City.mmdb"
21GEOLITE2_ASN = "GeoLite2-ASN.mmdb"
22
23COWRIE_LOG_DIR = "cowrie/"
24DB_FILE = "db.sqlite"
25
26geolocation_db = maxminddb.open_database(GEOLITE2_CITY)
27
28def connect_db(db_file):
29 """ Connects to SQLite database """
30
31 connection = None
32 try:
33 connection = sqlite3.connect(db_file)
34 except (Exception, sqlite3.Error) as error:
35 print(error)
36 return connection
37
38def create_schema(conn, schema_file):
39 c = conn.cursor()
40 with open(schema_file) as f:
41 commands = f.read().split(";")
42 for command in commands:
43 c.execute(command)
44
45def create_views(conn, VIEWS_FILE):
46 c = conn.cursor()
47 with open(VIEWS_FILE) as f:
48 commands = f.read().split()
49 for command in commands:
50 c.execute(command)
51 conn.commit()
52
53def geolocate(src_ip):
54
55 geolocation_results = []
56 geolocation_query = [
57 ["country", "iso_code"],
58 ["country", "names", "en"],
59 ["subdivisions", 0, "names", "en"],
60 ["subdivisions", 0, "iso_code"],
61 ["city", "names", "en"],
62 ["postal", "code"],
63 ["continent", "names", "en"],
64 ["continent", "code"],
65 ["location", "latitude"],
66 ["location", "longitude"],
67 ["location", "time_zone"],
68 ["location", "accuracy_radius"],
69 ]
70
71 for geolocation_attribute in geolocation_query:
72 try:
73 result = geolocation_db.get(src_ip).get(geolocation_attribute.pop(0))
74 for key in geolocation_attribute:
75 if isinstance(key, int):
76 result = result[key]
77 else:
78 result = result.get(key)
79 except(AttributeError, TypeError) as e:
80 result = None
81 geolocation_results.append(result)
82 return geolocation_results
83
84def process_log(conn, log_directory, jsonlog):
85 c = conn.cursor()
86
87 # The MaxMind Database files for performing geolocation on an IP address,
88 # as well as the geolocation of an ASN (an ID number for an ISP).
89 isp_geolocation_db = maxminddb.open_database(GEOLITE2_ASN)
90
91 entries = []
92 # Cache IPs after we geolocate them to greatly increase processing speed
93 geolocated_ip_addresses = {}
94
95 with open(log_directory + jsonlog) as logfile:
96 c.execute("BEGIN TRANSACTION")
97 for line in tqdm(logfile):
98 log_entry = json.loads(line)
99
100 # Event IDs are identifiers that Cowrie uses to signify what
101 # type of action an attacker is doing on a honeypot.
102 event_id = log_entry.get("eventid")
103
104 if (
105 event_id == "cowrie.login.success"
106 or event_id == "cowrie.login.failed"
107 or event_id == "cowrie.command.input"
108 ):
109 if event_id == "cowrie.command.input":
110 attempted_username = ""
111 attempted_password = ""
112 command = log_entry.get("message")
113 else:
114 attempted_username = log_entry.get("username")
115 attempted_password = log_entry.get("password")
116 command = None
117
118 src_ip = log_entry.get("src_ip")
119 event_timestamp = log_entry.get("timestamp")
120 credential_signature = base64.b64encode(
121 attempted_username.encode() + attempted_password.encode()
122 )
123
124 isp_name = isp_geolocation_db.get(src_ip).get("autonomous_system_organization")
125
126 entry = [
127 src_ip,
128 isp_name,
129 event_timestamp,
130 event_id,
131 attempted_username,
132 attempted_password,
133 credential_signature,
134 command
135 ]
136
137 if src_ip not in geolocated_ip_addresses.keys():
138 geolocation_results = geolocate(src_ip)
139 geolocated_ip_addresses[src_ip] = geolocation_results
140
141 elif src_ip in geolocated_ip_addresses.keys():
142 geolocation_results = geolocated_ip_addresses[src_ip]
143
144 entry.extend(geolocation_results)
145 insertion_statement = """ INSERT INTO attack_log(
146 src_ip,
147 isp_name,
148 event_timestamp,
149 event_id,
150 attempted_username,
151 attempted_password,
152 credential_signature,
153 command,
154 country_code,
155 country_name,
156 subdivision_code,
157 subdivision_name,
158 city_name,
159 postal_code,
160 continent_code,
161 continent_name,
162 latitude,
163 longitude,
164 time_zone,
165 accuracy_radius)
166 VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
167 """
168
169 entries.append(entry)
170 for entry in entries:
171 c.execute(insertion_statement, entry)
172 c.execute("END TRANSACTION")
173 conn.commit()
174
175def profile_attackers(conn):
176 print("Profiling attackers ...")
177
178 geolocation_db = maxminddb.open_database(GEOLITE2_CITY)
179 isp_geolocation_db = maxminddb.open_database(GEOLITE2_ASN)
180
181 c = conn.cursor()
182 query = c.execute("SELECT * FROM attack_log").fetchall()
183 unique_ip_addrs = list(dict.fromkeys([i[0] for i in query]))
184
185 entries = []
186 for src_ip in tqdm(unique_ip_addrs):
187 attack_count = 0
188 first_seen = ""
189
190 hits = []
191 for i, row in enumerate(query):
192 if row[0] == src_ip:
193 if row[2] == "cowrie.login.success" or row[2] == "cowrie.login.failed":
194 attack_count += 1
195 hits.append(i)
196
197 first_seen = query[hits[0]][1]
198 last_seen = query[hits[-1]][1]
199
200 geolocation_results = geolocation_results_db.get(src_ip)
201
202 isp_location = isp_geolocation_results_db.get(src_ip)
203 isp = isp_location.get("autonomous_system_organization")
204
205 new_item = [src_ip, isp, first_seen, last_seen, attack_count]
206 desired_entries = [
207 ["country", "iso_code"],
208 ["country", "names", "en"],
209 ["subdivisions", 0, "names", "en"],
210 ["subdivisions", 0, "iso_code"],
211 ["city", "names", "en"],
212 ["postal", "code"],
213 ["continent", "names", "en"],
214 ["continent", "code"],
215 ["location", "latitude"],
216 ["location", "longitude"],
217 ["location", "time_zone"],
218 ["location", "accuracy_radius"],
219 ]
220
221 for item in desired_entries:
222 try:
223 item_data = geolocation_results.get(item.pop(0))
224 for key in item:
225 if isinstance(key, int):
226 item_data = item_data[key]
227 else:
228 item_data = item_data.get(key)
229 except (AttributeError, TypeError) as e:
230 item_data = None
231 new_item.append(item_data)
232 entries.append(new_item)
233 insertion_statement = """INSERT INTO attacker_profiles(
234 src_ip,isp,first_seen,last_seen,attack_count,
235 country_code, country_name,
236 subdivision_code, subdivision_name,
237 city_name, postal_code, continent_code,
238 continent_name, latitude, longitude,
239 time_zone, accuracy_radius)
240 VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""
241 for new_item in entries:
242 c.execute(insertion_statement, new_item)
243 conn.commit()
244
245
246def country_stats(conn):
247 print("Generating country statistics ...")
248
249 c = conn.cursor()
250
251 # Create the country_stats table
252 table = (
253 "CREATE TABLE IF NOT EXISTS country_stats(country_code text, attack_count int)"
254 )
255 c.execute(table)
256
257 attacker_profiles = c.execute("SELECT * FROM attacker_profiles").fetchall()
258 country_data = {}
259
260 # Populate the table with a 0 value for every country. Our map gets wonky
261 # unless every country has a value.
262 for country in iso3166.countries_by_alpha3:
263 country_data[country] = 0
264
265 for record in tqdm(attacker_profiles):
266 country_code = record[5]
267 attack_count = record[4]
268
269 # The GeoJSON file folium uses only works with ISO 3166, so we must
270 # convert our country codes.
271
272 try:
273 print(iso3166.countries.get(country_code))
274 if iso3166.countries.get(country_code) is not None:
275 country_code = iso3166.countries.get(country_code)
276 except KeyError:
277 country_code = ""
278 if country_data.get(country_code) is None:
279 country_data[country_code] = attack_count
280 else:
281 country_data[country_code] += attack_count
282
283 for key, value in country_data.items():
284 print(key)
285 print(value)
286 c.execute(
287 "INSERT INTO country_stats(country_code,attack_count) VALUES(?,?)",
288 [key, value]
289 )
290 conn.commit()
291
292
293
294if __name__ == "__main__":
295
296 parser = argparse.ArgumentParser(description="ip")
297 parser.add_argument(
298 "--log-dir",
299 dest="log_directory",
300 type=str,
301 nargs="+",
302 help="The log directory for cowrie data",
303 )
304 parser.add_argument(
305 "--no-processing", dest="no_processing", default=False, action="store_true"
306 )
307 args = parser.parse_args()
308
309 if args.log_directory is None:
310 print("Assuming default log directory: " + COWRIE_LOG_DIR)
311 else:
312 COWIRE_LOG_DIR = args.log_directory
313 print("Using log directory: " + "".join(args.log_directory))
314
315 print("Connecting to database ....")
316 conn = connect_db(DB_FILE)
317 print("Connected ...")
318
319 print("Creating schema ...")
320 create_schema(conn, SCHEMA_FILE)
321
322 if not args.no_processing:
323 logs = os.listdir(COWRIE_LOG_DIR)
324 for index, log in enumerate(logs):
325 index += 1
326 print(
327 "("
328 + str(index)
329 + "/"
330 + str(len(logs))
331 + ") Processing logfile "
332 + log
333 + " ..."
334 )
335 process_log(conn, COWRIE_LOG_DIR, log)
336 print("Creating views ...")
337 create_views(conn, VIEWS_FILE)