Added support for packages.

We have a whole new module finder that uses the actual Python
parser and scans the bytecode for IMPORT_NAME and IMPORT_FROM.
This requires some support in import.c (that hasn't been checked in).
New command line options for this: -d, -q, -m.
This commit is contained in:
Guido van Rossum 1998-03-05 03:42:00 +00:00
parent 3455edcbc8
commit 75dc4969ab
4 changed files with 437 additions and 198 deletions

View file

@ -1,127 +0,0 @@
# Determine the names and filenames of the modules imported by a
# script, recursively. This is done by scanning for lines containing
# import statements. (The scanning has only superficial knowledge of
# Python syntax and no knowledge of semantics, so in theory the result
# may be incorrect -- however this is quite unlikely if you don't
# intentionally obscure your Python code.)
import os
import regex
import string
import sys
# Top-level interface.
# First argument is the main program (script).
# Second optional argument is list of modules to be searched as well.
def findmodules(scriptfile, modules = [], path = sys.path):
todo = {}
todo['__main__'] = scriptfile
for name in modules:
mod = os.path.basename(name)
if mod[-3:] == '.py': mod = mod[:-3]
elif mod[-4:] == '.pyc': mod = mod[:-4]
todo[mod] = name
done = closure(todo)
return done
# Compute the closure of scanfile() and findmodule().
# Return a dictionary mapping module names to filenames.
# Writes to stderr if a file can't be or read.
def closure(todo):
done = {}
while todo:
newtodo = {}
for modname in todo.keys():
if not done.has_key(modname):
filename = todo[modname]
if filename is None:
filename = findmodule(modname)
done[modname] = filename
if filename in ('<builtin>', '<unknown>'):
continue
try:
modules = scanfile(filename)
except IOError, msg:
sys.stderr.write("%s: %s\n" %
(filename, str(msg)))
continue
for m in modules:
if not done.has_key(m):
newtodo[m] = None
todo = newtodo
return done
# Scan a file looking for import statements.
# Return list of module names.
# Can raise IOError.
importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)'
fromstr = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+'
isimport = regex.compile(importstr)
isfrom = regex.compile(fromstr)
def scanfile(filename):
allmodules = {}
f = open(filename, 'r')
try:
while 1:
line = f.readline()
if not line: break # EOF
while line[-2:] == '\\\n': # Continuation line
line = line[:-2] + ' '
line = line + f.readline()
if isimport.search(line) >= 0:
rawmodules = isimport.group(2)
modules = string.splitfields(rawmodules, ',')
for i in range(len(modules)):
modules[i] = string.strip(modules[i])
elif isfrom.search(line) >= 0:
modules = [isfrom.group(2)]
else:
continue
for mod in modules:
allmodules[mod] = None
finally:
f.close()
return allmodules.keys()
# Find the file containing a module, given its name.
# Return filename, or '<builtin>', or '<unknown>'.
builtins = sys.builtin_module_names
tails = ['.py', '.pyc']
def findmodule(modname, path = sys.path):
if modname in builtins: return '<builtin>'
for dirname in path:
for tail in tails:
fullname = os.path.join(dirname, modname + tail)
try:
f = open(fullname, 'r')
except IOError:
continue
f.close()
return fullname
return '<unknown>'
# Test the above functions.
def test():
if not sys.argv[1:]:
print 'usage: python findmodules.py scriptfile [morefiles ...]'
sys.exit(2)
done = findmodules(sys.argv[1], sys.argv[2:])
items = done.items()
items.sort()
for mod, file in [('Module', 'File')] + items:
print "%-15s %s" % (mod, file)
if __name__ == '__main__':
test()

View file

@ -26,6 +26,12 @@
-o dir: Directory where the output files are created; default '.'.
-m: Additional arguments are module names instead of filenames.
-d: Debugging mode for the module finder.
-q: Make the module finder totally quiet.
-h: Print this help message.
-w: Toggle Windows (NT or 95) behavior.
@ -42,7 +48,8 @@
module ...: Additional Python modules (referenced by pathname)
that will be included in the resulting binary. These
may be .py or .pyc files.
may be .py or .pyc files. If -m is specified, these are
module names that are search in the path instead.
NOTES:
@ -67,7 +74,7 @@
# Import the freeze-private modules
import checkextensions
import findmodules
import modulefinder
import makeconfig
import makefreeze
import makemakefile
@ -82,6 +89,8 @@ def main():
exec_prefix = None # settable with -P option
extensions = []
path = sys.path
modargs = 0
debug = 1
odir = ''
win = sys.platform[:3] == 'win'
@ -97,7 +106,7 @@ def main():
# parse command line
try:
opts, args = getopt.getopt(sys.argv[1:], 'he:o:p:P:s:w')
opts, args = getopt.getopt(sys.argv[1:], 'deh:mo:p:P:qs:w')
except getopt.error, msg:
usage('getopt error: ' + str(msg))
@ -106,14 +115,20 @@ def main():
if o == '-h':
print __doc__
return
if o == '-d':
debug = debug + 1
if o == '-e':
extensions.append(a)
if o == '-m':
modargs = 1
if o == '-o':
odir = a
if o == '-p':
prefix = a
if o == '-P':
exec_prefix = a
if o == '-q':
debug = 0
if o == '-w':
win = not win
if o == '-s':
@ -220,18 +235,30 @@ def main():
target = os.path.join(odir, target)
makefile = os.path.join(odir, makefile)
for mod in implicits:
modules.append(findmodules.findmodule(mod))
# Actual work starts here...
dict = findmodules.findmodules(scriptfile, modules, path)
names = dict.keys()
names.sort()
print "Modules being frozen:"
for name in names:
print '\t', name
# collect all modules of the program
mf = modulefinder.ModuleFinder(path, debug)
for mod in implicits:
mf.import_hook(mod)
for mod in modules:
if mod == '-m':
modargs = 1
continue
if modargs:
if mod[-2:] == '.*':
mf.import_hook(mod[:-2], None, ["*"])
else:
mf.import_hook(mod)
else:
mf.load_file(mod)
mf.run_script(scriptfile)
if debug > 0:
mf.report()
print
dict = mf.modules
# generate output for frozen modules
backup = frozen_c + '~'
try:
os.rename(frozen_c, backup)
@ -239,7 +266,7 @@ def main():
backup = None
outfp = open(frozen_c, 'w')
try:
makefreeze.makefreeze(outfp, dict)
makefreeze.makefreeze(outfp, dict, debug)
if win and subsystem == 'windows':
import winmakemakefile
outfp.write(winmakemakefile.WINMAINTEMPLATE)
@ -251,6 +278,7 @@ def main():
frozen_c)
os.rename(backup, frozen_c)
# windows gets different treatment
if win:
# Taking a shortcut here...
import winmakemakefile
@ -264,14 +292,17 @@ def main():
outfp.close()
return
# generate config.c and Makefile
builtins = []
unknown = []
mods = dict.keys()
mods.sort()
for mod in mods:
if dict[mod] == '<builtin>':
if dict[mod].__code__:
continue
if not dict[mod].__file__:
builtins.append(mod)
elif dict[mod] == '<unknown>':
else:
unknown.append(mod)
addfiles = []

View file

@ -1,4 +1,5 @@
import marshal
import string
# Write a file containing frozen code for the modules in the dictionary.
@ -23,51 +24,31 @@
"""
def makefreeze(outfp, dict):
def makefreeze(outfp, dict, debug=0):
done = []
mods = dict.keys()
mods.sort()
for mod in mods:
modfn = dict[mod]
try:
str = makecode(modfn)
except IOError, msg:
sys.stderr.write("%s: %s\n" % (modfn, str(msg)))
continue
if str:
done.append(mod, len(str))
writecode(outfp, mod, str)
m = dict[mod]
mangled = string.join(string.split(mod, "."), "__")
if m.__code__:
if debug:
print "freezing", mod, "..."
str = marshal.dumps(m.__code__)
size = len(str)
if m.__path__:
# Indicate package by negative size
size = -size
done.append((mod, mangled, size))
writecode(outfp, mangled, str)
if debug:
print "generating table of frozen modules"
outfp.write(header)
for mod, size in done:
outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mod, size))
for mod, mangled, size in done:
outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
outfp.write(trailer)
# Return code string for a given module -- either a .py or a .pyc
# file. Return either a string or None (if it's not Python code).
# May raise IOError.
def makecode(filename):
if filename[-3:] == '.py':
f = open(filename, 'r')
try:
text = f.read()
code = compile(text, filename, 'exec')
finally:
f.close()
return marshal.dumps(code)
if filename[-4:] == '.pyc':
f = open(filename, 'rb')
try:
f.seek(8)
str = f.read()
finally:
f.close()
return str
# Can't generate code for this extension
return None
# Write a C initializer for a module containing the frozen python code.
# The array is called M_<mod>.
@ -78,22 +59,3 @@ def writecode(outfp, mod, str):
for c in str[i:i+16]:
outfp.write('%d,' % ord(c))
outfp.write('\n};\n')
# Test for the above functions.
def test():
import os
import sys
if not sys.argv[1:]:
print 'usage: python freezepython.py file.py(c) ...'
sys.exit(2)
dict = {}
for arg in sys.argv[1:]:
base = os.path.basename(arg)
mod, ext = os.path.splitext(base)
dict[mod] = arg
makefreeze(sys.stdout, dict)
if __name__ == '__main__':
test()

View file

@ -0,0 +1,373 @@
"""Find modules used by a script, using introspection."""
import dis
import imp
import marshal
import os
import re
import string
import sys
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
IMPORT_FROM = dis.opname.index('IMPORT_FROM')
class Module:
def __init__(self, name, file=None, path=None):
self.__name__ = name
self.__file__ = file
self.__path__ = path
self.__code__ = None
def __repr__(self):
s = "Module(%s" % `self.__name__`
if self.__file__ is not None:
s = s + ", %s" % `self.__file__`
if self.__path__ is not None:
s = s + ", %s" % `self.__path__`
s = s + ")"
return s
class ModuleFinder:
def __init__(self, path=None, debug=0):
if path is None:
path = sys.path
self.path = path
self.modules = {}
self.badmodules = {}
self.debug = debug
self.indent = 0
def msg(self, level, str, *args):
if level <= self.debug:
for i in range(self.indent):
print " ",
print str,
for arg in args:
print repr(arg),
print
def msgin(self, *args):
level = args[0]
if level <= self.debug:
self.indent = self.indent + 1
apply(self.msg, args)
def msgout(self, *args):
level = args[0]
if level <= self.debug:
self.indent = self.indent - 1
apply(self.msg, args)
def run_script(self, pathname):
self.msg(2, "run_script", pathname)
fp = open(pathname)
stuff = ("", "r", imp.PY_SOURCE)
self.load_module('__main__', fp, pathname, stuff)
def load_file(self, pathname):
dir, name = os.path.split(pathname)
name, ext = os.path.splitext(name)
fp = open(pathname)
stuff = (ext, "r", imp.PY_SOURCE)
self.load_module(name, fp, pathname, stuff)
def import_hook(self, name, caller=None, fromlist=None):
self.msg(3, "import_hook", name, caller, fromlist)
parent = self.determine_parent(caller)
q, tail = self.find_head_package(parent, name)
m = self.load_tail(q, tail)
if not fromlist:
return q
if m.__path__:
self.ensure_fromlist(m, fromlist)
def determine_parent(self, caller):
self.msgin(4, "determine_parent", caller)
if not caller:
self.msgout(4, "determine_parent -> None")
return None
pname = caller.__name__
if caller.__path__:
parent = self.modules[pname]
assert caller is parent
self.msgout(4, "determine_parent ->", parent)
return parent
if '.' in pname:
i = string.rfind(pname, '.')
pname = pname[:i]
parent = self.modules[pname]
assert parent.__name__ == pname
self.msgout(4, "determine_parent ->", parent)
return parent
self.msgout(4, "determine_parent -> None")
return None
def find_head_package(self, parent, name):
self.msgin(4, "find_head_package", parent, name)
if '.' in name:
i = string.find(name, '.')
head = name[:i]
tail = name[i+1:]
else:
head = name
tail = ""
if parent:
qname = "%s.%s" % (parent.__name__, head)
else:
qname = head
q = self.import_module(head, qname, parent)
if q:
self.msgout(4, "find_head_package ->", (q, tail))
return q, tail
if parent:
qname = head
parent = None
q = self.import_module(head, qname, parent)
if q:
self.msgout(4, "find_head_package ->", (q, tail))
return q, tail
self.msgout(4, "raise ImportError: No module named", qname)
raise ImportError, "No module named " + qname
def load_tail(self, q, tail):
self.msgin(4, "load_tail", q, tail)
m = q
while tail:
i = string.find(tail, '.')
if i < 0: i = len(tail)
head, tail = tail[:i], tail[i+1:]
mname = "%s.%s" % (m.__name__, head)
m = self.import_module(head, mname, m)
if not m:
self.msgout(4, "raise ImportError: No module named", mname)
raise ImportError, "No module named " + mname
self.msgout(4, "load_tail ->", m)
return m
def ensure_fromlist(self, m, fromlist, recursive=0):
self.msg(4, "ensure_fromlist", m, fromlist, recursive)
for sub in fromlist:
if sub == "*":
if not recursive:
all = self.find_all_submodules(m)
if all:
self.ensure_fromlist(m, all, 1)
elif not hasattr(m, sub):
subname = "%s.%s" % (m.__name__, sub)
submod = self.import_module(sub, subname, m)
if not submod:
raise ImportError, "No module named " + subname
def find_all_submodules(self, m):
if not m.__path__:
return
modules = {}
suffixes = [".py", ".pyc", ".pyo"]
for dir in m.__path__:
try:
names = os.listdir(dir)
except os.error:
self.msg(2, "can't list directory", dir)
continue
for name in names:
mod = None
for suff in suffixes:
n = len(suff)
if name[-n:] == suff:
mod = name[:-n]
break
if mod and mod != "__init__":
modules[mod] = mod
return modules.keys()
def import_module(self, partname, fqname, parent):
self.msgin(3, "import_module", partname, fqname, parent)
try:
m = self.modules[fqname]
except KeyError:
pass
else:
self.msgout(3, "import_module ->", m)
return m
if self.badmodules.has_key(fqname):
self.msgout(3, "import_module -> None")
return None
try:
fp, pathname, stuff = self.find_module(partname,
parent and parent.__path__)
except ImportError:
self.msgout(3, "import_module ->", None)
return None
try:
m = self.load_module(fqname, fp, pathname, stuff)
finally:
if fp: fp.close()
if parent:
setattr(parent, partname, m)
self.msgout(3, "import_module ->", m)
return m
def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
self.msgin(2, "load_module", fqname, fp and "fp", pathname)
if type == imp.PKG_DIRECTORY:
m = self.load_package(fqname, pathname)
self.msgout(2, "load_module ->", m)
return m
if type == imp.PY_SOURCE:
co = compile(fp.read(), pathname, 'exec')
elif type == imp.PY_COMPILED:
if fp.read(4) != imp.get_magic():
self.msgout(2, "raise ImportError: Bad magic number", pathname)
raise ImportError, "Bad magic number in %s", pathname
fp.read(4)
co = marshal.load(fp)
else:
co = None
m = self.add_module(fqname)
if co:
m.__file__ = pathname
m.__code__ = co
code = co.co_code
n = len(code)
i = 0
lastname = None
while i < n:
c = code[i]
i = i+1
op = ord(c)
if op >= dis.HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
if op == IMPORT_NAME:
name = lastname = co.co_names[oparg]
if not self.badmodules.has_key(lastname):
try:
self.import_hook(name, m)
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
self.badmodules[name] = None
elif op == IMPORT_FROM:
name = co.co_names[oparg]
assert lastname is not None
if not self.badmodules.has_key(lastname):
try:
self.import_hook(lastname, m, [name])
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
fullname = lastname + "." + name
self.badmodules[fullname] = None
else:
lastname = None
self.msgout(2, "load_module ->", m)
return m
def load_package(self, fqname, pathname):
self.msgin(2, "load_package", fqname, pathname)
m = self.add_module(fqname)
m.__file__ = pathname
m.__path__ = [pathname]
fp, buf, stuff = self.find_module("__init__", m.__path__)
self.load_module(fqname, fp, buf, stuff)
self.msgout(2, "load_package ->", m)
return m
def add_module(self, fqname):
if self.modules.has_key(fqname):
return self.modules[fqname]
self.modules[fqname] = m = Module(fqname)
return m
def find_module(self, name, path):
if path is None:
if name in sys.builtin_module_names:
return (None, None, ("", "", imp.C_BUILTIN))
path = self.path
return imp.find_module(name, path)
def report(self):
print
print " %-25s %s" % ("Name", "File")
print " %-25s %s" % ("----", "----")
# Print modules found
keys = self.modules.keys()
keys.sort()
for key in keys:
m = self.modules[key]
if m.__path__:
print "P",
else:
print "m",
print "%-25s" % key, m.__file__ or ""
# Print missing modules
keys = self.badmodules.keys()
keys.sort()
for key in keys:
print "?", key
def test():
# Parse command line
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "dmp:q")
except getopt.error, msg:
print msg
return
# Process options
debug = 1
domods = 0
addpath = []
for o, a in opts:
if o == '-d':
debug = debug + 1
if o == '-m':
domods = 1
if o == '-p':
addpath = addpath + string.split(a, os.pathsep)
if o == '-q':
debug = 0
# Provide default arguments
if not args:
script = "hello.py"
else:
script = args[0]
# Set the path based on sys.path and the script directory
path = sys.path[:]
path[0] = os.path.dirname(script)
path = addpath + path
if debug > 1:
print "path:"
for item in path:
print " ", `item`
# Create the module finder and turn its crank
mf = ModuleFinder(path, debug)
for arg in args[1:]:
if arg == '-m':
domods = 1
continue
if domods:
if arg[-2:] == '.*':
mf.import_hook(arg[:-2], None, ["*"])
else:
mf.import_hook(arg)
else:
mf.load_file(arg)
mf.run_script(script)
mf.report()
if __name__ == '__main__':
try:
test()
except KeyboardInterrupt:
print "\n[interrupt]"