Simulating subpackages with Python import hook
I'm going to hack into the Python import system. Let's assume we have the following directory structure:
.
βββ main
β βββ main.py
β βββ parent
β βββ __init__.py
βββ pkg1
βββ __init__.py
βββ sub
β βββ __init__.py
β βββ import_global.py
βββ success.py
The script will run main.py
, so there must be a top-most module parent
. Now I want to simulate a subpackage whose full name parent.intermediate.pkg1
, which actually refers to a directory pkg1
.
No module actually exists intermediate
, however I really need to simulate one (in my real project, the name of this intermediate module will be dynamically generated). So I decided to use Python import hooks.
First, let me introduce the content pkg1
.
pkg1 / sub / import_global.py:
from operator import add
Value = add(1, 2)
pkg1 / success.py:
Value = 'Success'
And (part of main.py), I made some test cases:
class MainTestCase(unittest.TestCase):
def test_success(self):
from parent.intermediate.pkg1 import success
self.assertEqual(success.Value, "Success")
def test_import_global(self):
from parent.intermediate.pkg1.sub import import_global
self.assertEqual(import_global.Value, 3)
def test_not_found(self):
def F():
from parent.intermediate.pkg1 import not_found
self.assertRaises(ImportError, F)
unittest.main()
All __init__.py
are empty. It will now implement import hooks. I have developed two versions, each with some problems.
First version:
class PkgLoader(object):
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
if fullname.startswith('parent.'):
return self
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
fp, filename, options = imp.find_module(p, [path])
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
loader = PkgLoader()
loader.install()
If failure test_import_global
:
E..
======================================================================
ERROR: test_import_global (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 54, in test_import_global
from parent.intermediate.pkg1.sub import import_global
File "main.py", line 39, in load_module
m = imp.load_module(ns, fp, filename, options)
File "../pkg1/sub/import_global.py", line 1, in <module>
from operator import add
File "main.py", line 35, in load_module
fp, filename, options = imp.find_module(p, [path])
ImportError: No module named operator
----------------------------------------------------------------------
Ran 3 tests in 0.005s
FAILED (errors=1)
Now for the second version I changed load_module
:
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
# ======> The modification starts here <======
try:
fp, filename, options = imp.find_module(p, [path])
except ImportError:
return None
# ======> The modification ends here <======
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
If failure test_not_found
:
.F.
======================================================================
FAIL: test_not_found (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 65, in test_not_found
self.assertRaises(ImportError, F)
AssertionError: ImportError not raised
----------------------------------------------------------------------
Ran 3 tests in 0.004s
FAILED (failures=1)
So now the question is, how can I implement import capture so that all three test cases can pass?
Oh, I have a solution, although more test cases may be needed for my real project. The main opinion is to execute imp.find_module
in a stage find_module
rather than a stage load_module
so that we can avoid using the system to use our custom loader to load non-existent modules.
Here's the solution:
class ModuleImportUtility(object):
@staticmethod
def in_namespace(namespace, fullname):
"""
Whether the given :param:`fullname` is or within the :attr:`namespace`.
"""
if not fullname.startswith(namespace):
return False
nslen = len(namespace)
return len(fullname) == nslen or fullname[nslen] == '.'
@staticmethod
def parent_name(fullname):
"""Get the parent name of :param:`fullname`."""
return '.'.join(fullname.rsplit('.', 1)[:-1])
@staticmethod
def find_modules(namespace, name_parts, root_path):
"""
Find the modules along :param:`name_parts` according to
:param:`root_path`.
:return :class:`list` of (fullname, file, filename, options) as
:method:`imp.find_module`, or :value:`None` if not found.
"""
try:
ret = []
ns = namespace
path = root_path
for n in name_parts:
ns = '%s.%s' % (ns, n)
fp, filename, options = imp.find_module(n, [path])
ret.append((ns, fp, filename, options))
path = filename
return ret
except ImportError:
return None
class NamespaceSplitter(object):
"""Strip the parent namespace and split the subname to pieces."""
def __init__(self, namespace):
self.namespace = namespace
self.cutoff = len(namespace.split("."))
def cut(self, fullname):
return fullname.split('.')[self.cutoff:]
class DirModuleFinder(object):
"""
Find a module under particular namespace in a given directory.
We assume that :attr:`root_path` is not a package, and that it contains
the packages to be imported.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
# We should deal with all the parent packages of namespace, because
# some of the intermediate packages may not exist, and need to be
# created manually
if ModuleImportUtility.in_namespace(fullname, self.namespace):
return DefaultNewModuleLoader()
# If not a parent of the namespace, we try to find the requested
# module under the given :attr:`root_path`
if ModuleImportUtility.in_namespace(self.namespace, fullname):
ns = self.namespace
parts = self.ns_splitter.cut(fullname)
root = self.root_path
if ModuleImportUtility.find_modules(ns, parts, root):
return DirModuleLoader(ns, root)
class DefaultNewModuleLoader(object):
"""
Load the requested module via standard import, or create a new module if
not exist.
"""
def load_module(self, fullname):
import sys
import imp
class FakePackage(object):
def __init__(self, path):
self.__path__ = path
# If the module has already been loaded, then we just fetch this module
# from the import cache
if fullname in sys.modules:
return sys.modules[fullname]
# Otherwise we try perform a standard import first, and if not found,
# we create a new package as the required module
m = None
try:
m = FakePackage(None)
parts = fullname.split('.')
for i, p in enumerate(parts, 1):
ns = '.'.join(parts[:i])
if ns in sys.modules:
m = sys.modules[ns]
else:
if not hasattr(m, '__path__'):
raise ImportError()
fp, filename, options = imp.find_module(p, m.__path__)
m = imp.load_module(p, fp, filename, options)
sys.modules[ns] = m
except ImportError:
m = imp.new_module(fullname)
m.__name__ = fullname
m.__path__ = [fullname]
m.__loader__ = self
m.__file__ = '<dummy package "%s">' % fullname
m.__package__ = ModuleImportUtility.parent_name(fullname)
# Now insert the loaded module into the cache, and return the result
sys.modules[fullname] = m
return m
class DirModuleLoader(object):
"""
Load the requested module under a directory (simulate the system import),
all the intermediate modules will also be loaded.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def load_module(self, fullname):
import imp
name_parts = self.ns_splitter.cut(fullname)
for (ns, fp, filename, options) in \
ModuleImportUtility.find_modules(self.namespace, name_parts,
self.root_path):
if ns not in sys.modules:
sys.modules[ns] = imp.load_module(ns, fp, filename, options)
return sys.modules[fullname]
loader = DirModuleFinder(
'parent.intermediate',
os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
)
loader.install()
Feel free to comment on my solution and if you guys find any potential bugs, feel free to notify me.
You can create modules at runtime and work with a dictionary sys.modules
.
So, if you have a directory structure like:
project-root/main.py
project-root/sub/
project-root/sub/__init__.py
You could, of course, do something like:
import sub # Import child package
sf1 = sub.SubFoo(1) # Test that import worked
But if you want to "pretend" that it sub
is actually a subpackage inside another package, you can do something like:
import sys, types
import sub # Import child package
sf1 = sub.SubFoo(1) # Test that import worked
fake = types.ModuleType('fake') # Create empty "fake" module
fake.sub = sub # Add "sub" module to the "fake" module
sys.modules['fake'] = fake # Add "fake" to sys.modules
sf2 = fake.sub.SubFoo(2) # Test that calling works through "fake" module
In my test code, I sub
__init__.py
only had :
class SubFoo:
def __init__(self, x=None):
print("Created SubFoo(%s)" % x)
And if you run main.py
you get:
Created SubFoo(1)
Created SubFoo(2)
I think this approach would be much easier than what you are trying to do with import hooks.