· 7 years ago · Sep 25, 2018, 02:56 AM
1"""
2PyInstaller Extractor v1.9 (Supports pyinstaller 3.3, 3.2, 3.1, 3.0, 2.1, 2.0)
3Author : Extreme Coders
4E-mail : extremecoders(at)hotmail(dot)com
5Web : https://0xec.blogspot.com
6Date : 29-November-2017
7Url : https://sourceforge.net/projects/pyinstallerextractor/
8
9For any suggestions, leave a comment on
10https://forum.tuts4you.com/topic/34455-pyinstaller-extractor/
11
12This script extracts a pyinstaller generated executable file.
13Pyinstaller installation is not needed. The script has it all.
14
15For best results, it is recommended to run this script in the
16same version of python as was used to create the executable.
17This is just to prevent unmarshalling errors(if any) while
18extracting the PYZ archive.
19
20Usage : Just copy this script to the directory where your exe resides
21 and run the script with the exe file name as a parameter
22
23C:\path\to\exe\>python pyinstxtractor.py <filename>
24$ /path/to/exe/python pyinstxtractor.py <filename>
25
26Licensed under GNU General Public License (GPL) v3.
27You are free to modify this source.
28
29CHANGELOG
30================================================
31
32Version 1.1 (Jan 28, 2014)
33-------------------------------------------------
34- First Release
35- Supports only pyinstaller 2.0
36
37Version 1.2 (Sept 12, 2015)
38-------------------------------------------------
39- Added support for pyinstaller 2.1 and 3.0 dev
40- Cleaned up code
41- Script is now more verbose
42- Executable extracted within a dedicated sub-directory
43
44(Support for pyinstaller 3.0 dev is experimental)
45
46Version 1.3 (Dec 12, 2015)
47-------------------------------------------------
48- Added support for pyinstaller 3.0 final
49- Script is compatible with both python 2.x & 3.x (Thanks to Moritz Kroll @ Avira Operations GmbH & Co. KG)
50
51Version 1.4 (Jan 19, 2016)
52-------------------------------------------------
53- Fixed a bug when writing pyc files >= version 3.3 (Thanks to Daniello Alto: https://github.com/Djamana)
54
55Version 1.5 (March 1, 2016)
56-------------------------------------------------
57- Added support for pyinstaller 3.1 (Thanks to Berwyn Hoyt for reporting)
58
59Version 1.6 (Sept 5, 2016)
60-------------------------------------------------
61- Added support for pyinstaller 3.2
62- Extractor will use a random name while extracting unnamed files.
63- For encrypted pyz archives it will dump the contents as is. Previously, the tool would fail.
64
65Version 1.7 (March 13, 2017)
66-------------------------------------------------
67- Made the script compatible with python 2.6 (Thanks to Ross for reporting)
68
69Version 1.8 (April 28, 2017)
70-------------------------------------------------
71- Support for sub-directories in .pyz files (Thanks to Moritz Kroll @ Avira Operations GmbH & Co. KG)
72
73Version 1.9 (November 29, 2017)
74-------------------------------------------------
75- Added support for pyinstaller 3.3
76- Display the scripts which are run at entry (Thanks to Michael Gillespie @ malwarehunterteam for the feature request)
77
78"""
79
80from __future__ import print_function
81import os
82import struct
83import marshal
84import zlib
85import sys
86import imp
87import types
88from uuid import uuid4 as uniquename
89
90
91class CTOCEntry:
92 def __init__(self, position, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name):
93 self.position = position
94 self.cmprsdDataSize = cmprsdDataSize
95 self.uncmprsdDataSize = uncmprsdDataSize
96 self.cmprsFlag = cmprsFlag
97 self.typeCmprsData = typeCmprsData
98 self.name = name
99
100
101class PyInstArchive:
102 PYINST20_COOKIE_SIZE = 24 # For pyinstaller 2.0
103 PYINST21_COOKIE_SIZE = 24 + 64 # For pyinstaller 2.1+
104 MAGIC = b'MEI\014\013\012\013\016' # Magic number which identifies pyinstaller
105
106 def __init__(self, path):
107 self.filePath = path
108
109
110 def open(self):
111 try:
112 self.fPtr = open(self.filePath, 'rb')
113 self.fileSize = os.stat(self.filePath).st_size
114 except:
115 print('[*] Error: Could not open {0}'.format(self.filePath))
116 return False
117 return True
118
119
120 def close(self):
121 try:
122 self.fPtr.close()
123 except:
124 pass
125
126
127 def checkFile(self):
128 print('[*] Processing {0}'.format(self.filePath))
129 # Check if it is a 2.0 archive
130 self.fPtr.seek(self.fileSize - self.PYINST20_COOKIE_SIZE, os.SEEK_SET)
131 magicFromFile = self.fPtr.read(len(self.MAGIC))
132
133 if magicFromFile == self.MAGIC:
134 self.pyinstVer = 20 # pyinstaller 2.0
135 print('[*] Pyinstaller version: 2.0')
136 return True
137
138 # Check for pyinstaller 2.1+ before bailing out
139 self.fPtr.seek(self.fileSize - self.PYINST21_COOKIE_SIZE, os.SEEK_SET)
140 magicFromFile = self.fPtr.read(len(self.MAGIC))
141
142 if magicFromFile == self.MAGIC:
143 print('[*] Pyinstaller version: 2.1+')
144 self.pyinstVer = 21 # pyinstaller 2.1+
145 return True
146
147 print('[*] Error : Unsupported pyinstaller version or not a pyinstaller archive')
148 return False
149
150
151 def getCArchiveInfo(self):
152 try:
153 if self.pyinstVer == 20:
154 self.fPtr.seek(self.fileSize - self.PYINST20_COOKIE_SIZE, os.SEEK_SET)
155
156 # Read CArchive cookie
157 (magic, lengthofPackage, toc, tocLen, self.pyver) = \
158 struct.unpack('!8siiii', self.fPtr.read(self.PYINST20_COOKIE_SIZE))
159
160 elif self.pyinstVer == 21:
161 self.fPtr.seek(self.fileSize - self.PYINST21_COOKIE_SIZE, os.SEEK_SET)
162
163 # Read CArchive cookie
164 (magic, lengthofPackage, toc, tocLen, self.pyver, pylibname) = \
165 struct.unpack('!8siiii64s', self.fPtr.read(self.PYINST21_COOKIE_SIZE))
166
167 except:
168 print('[*] Error : The file is not a pyinstaller archive')
169 return False
170
171 print('[*] Python version: {0}'.format(self.pyver))
172
173 # Overlay is the data appended at the end of the PE
174 self.overlaySize = lengthofPackage
175 self.overlayPos = self.fileSize - self.overlaySize
176 self.tableOfContentsPos = self.overlayPos + toc
177 self.tableOfContentsSize = tocLen
178
179 print('[*] Length of package: {0} bytes'.format(self.overlaySize))
180 return True
181
182
183 def parseTOC(self):
184 # Go to the table of contents
185 self.fPtr.seek(self.tableOfContentsPos, os.SEEK_SET)
186
187 self.tocList = []
188 parsedLen = 0
189
190 # Parse table of contents
191 while parsedLen < self.tableOfContentsSize:
192 (entrySize, ) = struct.unpack('!i', self.fPtr.read(4))
193 nameLen = struct.calcsize('!iiiiBc')
194
195 (entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name) = \
196 struct.unpack( \
197 '!iiiBc{0}s'.format(entrySize - nameLen), \
198 self.fPtr.read(entrySize - 4))
199
200 name = name.decode('utf-8').rstrip('\0')
201 if len(name) == 0:
202 name = str(uniquename())
203 print('[!] Warning: Found an unamed file in CArchive. Using random name {0}'.format(name))
204
205 self.tocList.append( \
206 CTOCEntry( \
207 self.overlayPos + entryPos, \
208 cmprsdDataSize, \
209 uncmprsdDataSize, \
210 cmprsFlag, \
211 typeCmprsData, \
212 name \
213 ))
214
215 parsedLen += entrySize
216 print('[*] Found {0} files in CArchive'.format(len(self.tocList)))
217
218
219
220 def extractFiles(self):
221 print('[*] Beginning extraction...please standby')
222 extractionDir = os.path.join(os.getcwd(), os.path.basename(self.filePath) + '_extracted')
223
224 if not os.path.exists(extractionDir):
225 os.mkdir(extractionDir)
226
227 os.chdir(extractionDir)
228
229 for entry in self.tocList:
230 basePath = os.path.dirname(entry.name)
231 if basePath != '':
232 # Check if path exists, create if not
233 if not os.path.exists(basePath):
234 os.makedirs(basePath)
235
236 self.fPtr.seek(entry.position, os.SEEK_SET)
237 data = self.fPtr.read(entry.cmprsdDataSize)
238
239 if entry.cmprsFlag == 1:
240 data = zlib.decompress(data)
241 # Malware may tamper with the uncompressed size
242 # Comment out the assertion in such a case
243 assert len(data) == entry.uncmprsdDataSize # Sanity Check
244
245 with open(entry.name, 'wb') as f:
246 f.write(data)
247
248 if entry.typeCmprsData == b's':
249 print('[+] Possible entry point: {0}'.format(entry.name))
250
251 elif entry.typeCmprsData == b'z' or entry.typeCmprsData == b'Z':
252 self._extractPyz(entry.name)
253
254
255 def _extractPyz(self, name):
256 dirName = name + '_extracted'
257 # Create a directory for the contents of the pyz
258 if not os.path.exists(dirName):
259 os.mkdir(dirName)
260
261 with open(name, 'rb') as f:
262 pyzMagic = f.read(4)
263 assert pyzMagic == b'PYZ\0' # Sanity Check
264
265 pycHeader = f.read(4) # Python magic value
266
267 if imp.get_magic() != pycHeader:
268 print('[!] Warning: The script is running in a different python version than the one used to build the executable')
269 print(' Run this script in Python{0} to prevent extraction errors(if any) during unmarshalling'.format(self.pyver))
270
271 (tocPosition, ) = struct.unpack('!i', f.read(4))
272 f.seek(tocPosition, os.SEEK_SET)
273
274 try:
275 toc = marshal.load(f)
276 except:
277 print('[!] Unmarshalling FAILED. Cannot extract {0}. Extracting remaining files.'.format(name))
278 return
279
280 print('[*] Found {0} files in PYZ archive'.format(len(toc)))
281
282 # From pyinstaller 3.1+ toc is a list of tuples
283 if type(toc) == list:
284 toc = dict(toc)
285
286 for key in toc.keys():
287 (ispkg, pos, length) = toc[key]
288 f.seek(pos, os.SEEK_SET)
289
290 fileName = key
291 try:
292 # for Python > 3.3 some keys are bytes object some are str object
293 fileName = key.decode('utf-8')
294 except:
295 pass
296
297 # Make sure destination directory exists, ensuring we keep inside dirName
298 destName = os.path.join(dirName, fileName.replace("..", "__"))
299 destDirName = os.path.dirname(destName)
300 if not os.path.exists(destDirName):
301 os.makedirs(destDirName)
302
303 try:
304 data = f.read(length)
305 data = zlib.decompress(data)
306 except:
307 print('[!] Error: Failed to decompress {0}, probably encrypted. Extracting as is.'.format(fileName))
308 open(destName + '.pyc.encrypted', 'wb').write(data)
309 continue
310
311 with open(destName + '.pyc', 'wb') as pycFile:
312 pycFile.write(pycHeader) # Write pyc magic
313 pycFile.write(b'\0' * 4) # Write timestamp
314 if self.pyver >= 33:
315 pycFile.write(b'\0' * 4) # Size parameter added in Python 3.3
316 pycFile.write(data)
317
318
319def main():
320 if len(sys.argv) < 2:
321 print('[*] Usage: pyinstxtractor.py <filename>')
322
323 else:
324 arch = PyInstArchive(sys.argv[1])
325 if arch.open():
326 if arch.checkFile():
327 if arch.getCArchiveInfo():
328 arch.parseTOC()
329 arch.extractFiles()
330 arch.close()
331 print('[*] Successfully extracted pyinstaller archive: {0}'.format(sys.argv[1]))
332 print('')
333 print('You can now use a python decompiler on the pyc files within the extracted directory')
334 return
335
336 arch.close()
337
338
339if __name__ == '__main__':
340 main()