· 5 years ago · Nov 08, 2020, 01:20 PM
1import os
2import pefile
3import numpy as np
4import pandas as ps
5import csv
6
7__LIST_OF_DLLS = {
8 'ADVAP132.DLL', # Advanced Win32 application programming interfaces
9 'AWFAXP32.DLL', # Mail API fax transport
10 'AWFXAB32.DLL', # Address book
11 'AWPWD32.DLL', # Security support
12 'AWRESX32.DLL', # Resource Executor
13 'AWUTIL32.DLL', # At Work Security Support
14 'BHNETB.DLL', # Network monitor SMS client
15 'BHSUPP.DLL', # Network monitor SMS client
16 'CCAPI.DLL', # Microsoft Network component
17 'CCEI.DLL', # Microsoft Network component
18 'CCPSH.DLL', # Microsoft Network component
19 'CCTN20.DLL', # Microsoft Network component
20 'CMC.DLL', # Common messaging calls for Mail API 1.0
21 'COMCTL32.DLL', # User Experience Controls Library
22 'COMDLG32.DLL', # Common Dialogue Library
23 'CRTDLL.DLL', # Microsoft C Runtime Library
24 'DCIMAN.DLL', # Display Control Interface Manager
25 'DCIMAN32.DLL', # Display Control Interface Manager
26 'DSKMAINT.DLL', # Disk Utilities engine
27 'GDI32.DLL', # GDI Client DLL
28 'GROUP.DLL', # policy support
29 'HYPERTERM.DLL', # Terminal DLL
30 'KERNL32.DLL', # Windows NT BASE API Client DLL
31 'LZ32.DLL', # LZ Expand/Compress API DLL
32 'MAPI.DLL', # Mail / Exchange component
33 'MAPI32.DLL', # Extended MAPI 1.0 for Windows NT
34 'MFC30.DLL', # Shared MFC DLL
35 'MPR.DLL', # Multiple Provider Router DLL
36 'MSPST32.DLL', # Microsoft Personal Folder/Address Book Service Provider
37 'MSFS32.DLL', # MAPI 1.0 Service Providers for Microsoft Mail
38 'MSNDUI.DLL', # Microsoft Network component
39 'MSNET32.DLL', # Microsoft 32-bit Network API Library
40 'MSSHRUI.DLL', # Shell extensions for sharing
41 'MSVIEWUT.DLL', # Service data-link libraries for display engines
42 'NAL.DLL', # Network monitor SMS client
43 'NDIS30.DLL', # Network monitor SMS client
44 'NETAPI.DLL', # Network API
45 'NETAPI32.DLL', # Net Win32 API DLL
46 'NETBIOS.DLL', # NetBIOS API Library
47 'NETDI.DLL', # Net Device installer
48 'NETSETUP.DLL', # Network server-based setup
49 'NWAB32.DLL', # Address book provider
50 'NWNET32.DLL', # NetWare client
51 'NWNP32.DLL', # NetWare component
52 'OLEDLG.DLL', # Microsoft Windows OLE 2.0 User Interface Support
53 'POWERCFG.DLL', # Advanced Power Management Control Panel
54 'RASPI.DLL', # Automated Software Profile, Analysis, Removal and Signature Information
55 'RASAPI16.DLL', # Remote Access Services 16-bit API Library
56 'RASAPI32.DLL', # Remote Access 16-bit API Library
57 'RPCRT4.DLL', # Remote Procedure Call Runtime
58 'RPCLTC1.DLL', # Remote Procedure Call libraries
59 'RPCTLC3.DLL', # Remote Procedure Call libraries
60 'RPCTLC5.DLL', # Remote Procedure Call libraries
61 'RPCTLC6.DLL', # Remote Procedure Call libraries
62 'RPCTLS3.DLL', # Remote Procedure Call libraries
63 'RPCTLS5.DLL', # Remote Procedure Call libraries
64 'RPCTLS6.DLL', # Remote Procedure Call libraries
65 'RPCNS4.DLL', # Remote Procedure Call Name Service Client
66 'RSRC32.DLL', # Resource Meter
67 'SAPNSP.DLL', # Winsock data-link library
68 'SECUR32.DLL', # Security Support Provider Interface
69 'SHELL32.DLL', # Windows Shell Common DLL
70 'SLENH.DLL', # Advanced Power Management options
71 'SHLWAPI.DLL', # Library for UNC and URL Paths, Registry Entries and Color Settings
72 'UMDM32.DLL', # Universal Modem Driver component
73 'USER32.DLL', # USER API Client DLL
74 'VERSION.DLL', # Version Checking and File Installation Libraries
75 'WININET.DLL', # Internet Extensions for Win32
76 'WINMM.DLL', # MCI API DLL
77 'WINREG.DLL', # Remote Registry support
78 'WINSOCK.DLL', # Socket API for Windows
79 'WS2.DLL', # 32.DLL Windows Socket 2.0 32-Bit DLL
80 'WSOCK32.DLL', # Windows Socket 32-Bit DLL
81}
82
83__SET_OF_DLLS = set(__LIST_OF_DLLS)
84
85# https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-_image_file_header
86__FILE_HEADER = [
87 'Machine', # The architecture type of the computer.
88 'NumberOfSections', # The number of sections.
89 'TimeDateStamp', # The low 32 bits of the time stamp of the image.
90 'PointerToSymbolTable', # The offset of the symbol table, in bytes, or zero if no COFF symbol table exists.
91 'NumberOfSymbols', # The number of symbols in the symbol table.
92 'SizeOfOptionalHeader', # The size of the optional header, in bytes.
93 'Characteristics' # The characteristics of the image.
94]
95
96# https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32
97__OPTIONAL_HEADER = [
98 'Magic', # The state of the image file.
99 'MajorLinkerVersion', # The major version number of the linker.
100 'MinorLinkerVersion', # The minor version number of the linker.
101 'SizeOfCode',
102 # The size of the code section, in bytes, or the sum of all such sections if there are multiple code sections.
103 'SizeOfInitializedData',
104 # The size of the initialized data section, in bytes, or the sum of all such sections if there are multiple initialized data sections.
105 'SizeOfUninitializedData',
106 # The size of the uninitialized data section, in bytes, or the sum of all such sections if there are multiple uninitialized data sections.
107 'AddressOfEntryPoint', # A pointer to the entry point function, relative to the image base address.
108 'BaseOfCode', # A pointer to the beginning of the code section, relative to the image base.
109 'BaseOfData', # A pointer to the beginning of the data section, relative to the image base.
110 'ImageBase', # The preferred address of the first byte of the image when it is loaded in memory.
111 'SectionAlignment', # The alignment of sections loaded in memory, in bytes.
112 'FileAlignment', # The alignment of the raw data of sections in the image file, in bytes.
113 'MajorOperatingSystemVersion', # The major version number of the required operating system.
114 'MinorOperatingSystemVersion', # The minor version number of the required operating system.
115 'MajorImageVersion', # The major version number of the image.
116 'MinorImageVersion', # The minor version number of the image.
117 'MajorSubsystemVersion', # The major version number of the subsystem.
118 'MinorSubsystemVersion', # The minor version number of the subsystem.
119 'Reserved1', # (Win32VersionValue) This member is reserved and must be 0.
120 'SizeOfImage', # The size of the image, in bytes, including all headers.
121 'SizeOfHeaders',
122 # The combined size of the following items, rounded to a multiple of the value specified in the FileAlignment member.
123 'CheckSum', # The image file checksum.
124 'Subsystem', # The subsystem required to run this image.
125 'DllCharacteristics', # The DLL characteristics of the image.
126 'SizeOfStackReserve', # The number of bytes to reserve for the stack.
127 'SizeOfStackCommit', # The number of bytes to commit for the stack.
128 'SizeOfHeapReserve', # The number of bytes to commit for the local heap.
129 'SizeOfHeapCommit', # This member is obsolete.
130 'LoaderFlags', # The number of directory entries in the remainder of the optional header.
131 'NumberOfRvaAndSizes' # A pointer to the first IMAGE_DATA_DIRECTORY structure in the data directory.
132]
133
134__DIRECTORY_ENTRY_TYPES = {
135 'IMAGE_DIRECTORY_ENTRY_EXPORT': 0,
136 'IMAGE_DIRECTORY_ENTRY_IMPORT': 1,
137 'IMAGE_DIRECTORY_ENTRY_RESOURCE': 2,
138 'IMAGE_DIRECTORY_ENTRY_EXCEPTION': 3,
139 'IMAGE_DIRECTORY_ENTRY_SECURITY': 4,
140 'IMAGE_DIRECTORY_ENTRY_BASERELOC': 5,
141 'IMAGE_DIRECTORY_ENTRY_DEBUG': 6,
142 'IMAGE_DIRECTORY_ENTRY_COPYRIGHT': 7,
143 'IMAGE_DIRECTORY_ENTRY_GLOBALPTR': 8,
144 'IMAGE_DIRECTORY_ENTRY_TLS': 9,
145 'IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG': 10,
146 'IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT': 11,
147 'IMAGE_DIRECTORY_ENTRY_IAT': 12,
148 'IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT': 13,
149 'IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR': 14,
150 'IMAGE_DIRECTORY_ENTRY_RESERVED': 15
151}
152# https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#section-table-section-headers
153__SECTION_HEADER = [
154 'VirtualSize',
155 'VirtualAddress',
156 'SizeOfRawData',
157 'PointerToRawData',
158 'PointerToRelocations',
159 'PointerToLinenumbers',
160 'NumberOfRelocations',
161 'NumberOfLinenumbers',
162 'Characteristics'
163]
164
165__RESOURCE_DIRECTORY_TABLE = [
166 'Characteristics',
167 'MajorVersion',
168 'MinorVersion',
169 'NumberOfIdEntries',
170 'NumberOfNamedEntries',
171 'TimeDateStamp'
172]
173
174
175def extract_feature(file_path):
176 with open(file_path, 'rb') as f:
177 file_data = f.read()
178 try:
179 pe = pefile.PE(data=file_data)
180 ret = dict()
181 # Feature From DLLs referred
182 for dll in __LIST_OF_DLLS:
183 ret[dll] = 0
184 if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
185 for lib in pe.DIRECTORY_ENTRY_IMPORT:
186 if lib.dll != None:
187 if lib.dll.decode().upper() in __SET_OF_DLLS:
188 ret[lib.dll.decode().upper()] = 1
189 # Feature From COFF file header
190 for member in __FILE_HEADER:
191 ret[member] = getattr(pe.FILE_HEADER, member, -1)
192 # Feature From Optional header: standard fields and Windows specific fields
193 for member in __OPTIONAL_HEADER:
194 ret[member] = getattr(pe.OPTIONAL_HEADER, member, -1)
195 # Feature From Optional header: data directories
196 for key in __DIRECTORY_ENTRY_TYPES.keys():
197 ret[f'{key}:VirtualAddress'] = -1
198 ret[f'{key}:Size'] = -1
199 for structure in pe.OPTIONAL_HEADER.DATA_DIRECTORY:
200 ret[f'{structure.name}:VirtualAddress'] = structure.VirtualAddress
201 ret[f'{structure.name}:Size'] = structure.Size
202 # Feature From Section headers
203 for section_name in ['text', 'data', 'resource']:
204 for member_name in __SECTION_HEADER:
205 ret[f'{section_name}:{member_name}'] = -1
206 for section in pe.sections:
207 scn = section.Name
208 if section.Name == b'.text\x00\x00\x00':
209 for member in __SECTION_HEADER:
210 ret[f'text:{member}'] = getattr(section, member, -1)
211 elif section.Name == b'.data\x00\x00\x00':
212 for member in __SECTION_HEADER:
213 ret[f'data:{member}'] = getattr(section, member, -1)
214 elif section.Name == b'.rsrc\x00\x00\x00':
215 for member in __SECTION_HEADER:
216 ret[f'resource:{member}'] = getattr(section, member, -1)
217
218 # Feature From Resource directory table & resources
219 for resource_type in ['Cursors', 'Bitmaps', 'Icons', 'Menus', 'Dialogs', 'Strings', 'Fonts', 'Group Cursors',
220 'Group Icons']:
221 ret[resource_type] = 0
222 for resource_struct in __RESOURCE_DIRECTORY_TABLE:
223 ret[f'Resource:{resource_struct}'] = -1
224 if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'):
225 for resource_struct in __RESOURCE_DIRECTORY_TABLE:
226 ret[f'Resource:{resource_struct}'] = getattr(pe.DIRECTORY_ENTRY_RESOURCE.struct, resource_struct, -1)
227 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries:
228 # Cursors
229 if resource_type.struct.Id == 1:
230 ret['Cursors'] += len(resource_type.directory.entries)
231 # Bitmaps
232 elif resource_type.struct.Id == 2:
233 ret['Bitmaps'] += len(resource_type.directory.entries)
234 # Icons
235 elif resource_type.struct.Id == 3:
236 ret['Icons'] += len(resource_type.directory.entries)
237 # Menus
238 elif resource_type.struct.Id == 4:
239 ret['Menus'] += len(resource_type.directory.entries)
240 # Dialogs
241 elif resource_type.struct.Id == 5:
242 ret['Dialogs'] += len(resource_type.directory.entries)
243 # Strings
244 elif resource_type.struct.Id == 6:
245 ret['Strings'] += len(resource_type.directory.entries)
246 # Fonts
247 elif resource_type.struct.Id == 8:
248 ret['Fonts'] += len(resource_type.directory.entries)
249 # Group Cursors
250 elif resource_type.struct.Id == 12:
251 ret['Group Cursors'] += len(resource_type.directory.entries)
252 # Group Icons
253 elif resource_type.struct.Id == 14:
254 ret['Group Icons'] += len(resource_type.directory.entries)
255 print(ret)
256 return ret
257 except pefile.PEFormatError as pefe:
258 print("PEFormatError", pefe, os.path.basename(file_path))
259 return None
260 except Exception as e:
261 print("Exception", e, os.path.basename(file_path))
262 return None
263
264
265# extract_feature("D:/studying/Rhein-Waal Uni/1st semester/Project research A/vxheaven-windows-virus-collection/Virus.Win/Virus.Win32.Xorer.ci")
266
267#Extracting dll features from IAT (Import Address Table), it is referred in Optional Header Data Directories, has index 12.
268#Located in .idata section
269def find_libraries(directory_path):
270 dll_list = []
271 dlls_number = 0
272 for filename in os.listdir(directory_path):
273 with open(os.path.join(directory_path, filename), 'rb') as f: # open in readonly mode
274 file_data = f.read()
275 try:
276 pe = pefile.PE(data=file_data)
277 if not hasattr(pe, "DIRECTORY_ENTRY_IMPORT"):
278 print('\t There are no imports for this PE-File')
279 else:
280 for entry in pe.DIRECTORY_ENTRY_IMPORT:
281 if not any(member["name"] == entry.dll.decode('utf-8').upper() for member in dll_list):
282 member = dict()
283 member["name"] = entry.dll.decode('utf-8').upper()
284 member["count"] = 1
285 dll_list.append(member)
286 dlls_number += 1
287 else:
288 member["count"] += 1
289 except pefile.PEFormatError as pefe:
290 print("PEFormatError", pefe, os.path.basename(directory_path))
291 except Exception as e:
292 print("Exception", e, os.path.basename(directory_path))
293 print(dll_list)
294 print("Number of DLLs {}".format(dlls_number))
295 # Convert dll_list to csv file
296 keys = dll_list[0].keys()
297 with open('D:/studying/Rhein-Waal Uni/1st semester/Project research A/tables/dlls_number_for_malware.csv', 'w',
298 newline='') as output_file:
299 dict_writer = csv.DictWriter(output_file, keys)
300 dict_writer.writeheader()
301 dict_writer.writerows(dll_list)
302
303
304find_libraries("D:/studying/Rhein-Waal Uni/1st semester/Project research A/vxheaven-windows-virus-collection/Virus.Win")
305
306