Simulating subpackages with Python import hook
I'm going to hack into the Python import system. Let's assume we have the following directory structure:
.
βββ main
β βββ main.py
β βββ parent
β βββ __init__.py
βββ pkg1
βββ __init__.py
βββ sub
β βββ __init__.py
β βββ import_global.py
βββ success.py
The script will run main.py
, so there must be a top-most module parent
. Now I want to simulate a subpackage whose full name parent.intermediate.pkg1
, which actually refers to a directory pkg1
.
No module actually exists intermediate
, however I really need to simulate one (in my real project, the name of this intermediate module will be dynamically generated). So I decided to use Python import hooks.
First, let me introduce the content pkg1
.
pkg1 / sub / import_global.py:
from operator import add
Value = add(1, 2)
pkg1 / success.py:
Value = 'Success'
And (part of main.py), I made some test cases:
class MainTestCase(unittest.TestCase):
def test_success(self):
from parent.intermediate.pkg1 import success
self.assertEqual(success.Value, "Success")
def test_import_global(self):
from parent.intermediate.pkg1.sub import import_global
self.assertEqual(import_global.Value, 3)
def test_not_found(self):
def F():
from parent.intermediate.pkg1 import not_found
self.assertRaises(ImportError, F)
unittest.main()
All __init__.py
are empty. It will now implement import hooks. I have developed two versions, each with some problems.
First version:
class PkgLoader(object):
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
if fullname.startswith('parent.'):
return self
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
fp, filename, options = imp.find_module(p, [path])
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
loader = PkgLoader()
loader.install()
If failure test_import_global
:
E..
======================================================================
ERROR: test_import_global (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 54, in test_import_global
from parent.intermediate.pkg1.sub import import_global
File "main.py", line 39, in load_module
m = imp.load_module(ns, fp, filename, options)
File "../pkg1/sub/import_global.py", line 1, in <module>
from operator import add
File "main.py", line 35, in load_module
fp, filename, options = imp.find_module(p, [path])
ImportError: No module named operator
----------------------------------------------------------------------
Ran 3 tests in 0.005s
FAILED (errors=1)
Now for the second version I changed load_module
:
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
# ======> The modification starts here <======
try:
fp, filename, options = imp.find_module(p, [path])
except ImportError:
return None
# ======> The modification ends here <======
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
If failure test_not_found
:
.F.
======================================================================
FAIL: test_not_found (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 65, in test_not_found
self.assertRaises(ImportError, F)
AssertionError: ImportError not raised
----------------------------------------------------------------------
Ran 3 tests in 0.004s
FAILED (failures=1)
So now the question is, how can I implement import capture so that all three test cases can pass?
source to share
Oh, I have a solution, although more test cases may be needed for my real project. The main opinion is to execute imp.find_module
in a stage find_module
rather than a stage load_module
so that we can avoid using the system to use our custom loader to load non-existent modules.
Here's the solution:
class ModuleImportUtility(object):
@staticmethod
def in_namespace(namespace, fullname):
"""
Whether the given :param:`fullname` is or within the :attr:`namespace`.
"""
if not fullname.startswith(namespace):
return False
nslen = len(namespace)
return len(fullname) == nslen or fullname[nslen] == '.'
@staticmethod
def parent_name(fullname):
"""Get the parent name of :param:`fullname`."""
return '.'.join(fullname.rsplit('.', 1)[:-1])
@staticmethod
def find_modules(namespace, name_parts, root_path):
"""
Find the modules along :param:`name_parts` according to
:param:`root_path`.
:return :class:`list` of (fullname, file, filename, options) as
:method:`imp.find_module`, or :value:`None` if not found.
"""
try:
ret = []
ns = namespace
path = root_path
for n in name_parts:
ns = '%s.%s' % (ns, n)
fp, filename, options = imp.find_module(n, [path])
ret.append((ns, fp, filename, options))
path = filename
return ret
except ImportError:
return None
class NamespaceSplitter(object):
"""Strip the parent namespace and split the subname to pieces."""
def __init__(self, namespace):
self.namespace = namespace
self.cutoff = len(namespace.split("."))
def cut(self, fullname):
return fullname.split('.')[self.cutoff:]
class DirModuleFinder(object):
"""
Find a module under particular namespace in a given directory.
We assume that :attr:`root_path` is not a package, and that it contains
the packages to be imported.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
# We should deal with all the parent packages of namespace, because
# some of the intermediate packages may not exist, and need to be
# created manually
if ModuleImportUtility.in_namespace(fullname, self.namespace):
return DefaultNewModuleLoader()
# If not a parent of the namespace, we try to find the requested
# module under the given :attr:`root_path`
if ModuleImportUtility.in_namespace(self.namespace, fullname):
ns = self.namespace
parts = self.ns_splitter.cut(fullname)
root = self.root_path
if ModuleImportUtility.find_modules(ns, parts, root):
return DirModuleLoader(ns, root)
class DefaultNewModuleLoader(object):
"""
Load the requested module via standard import, or create a new module if
not exist.
"""
def load_module(self, fullname):
import sys
import imp
class FakePackage(object):
def __init__(self, path):
self.__path__ = path
# If the module has already been loaded, then we just fetch this module
# from the import cache
if fullname in sys.modules:
return sys.modules[fullname]
# Otherwise we try perform a standard import first, and if not found,
# we create a new package as the required module
m = None
try:
m = FakePackage(None)
parts = fullname.split('.')
for i, p in enumerate(parts, 1):
ns = '.'.join(parts[:i])
if ns in sys.modules:
m = sys.modules[ns]
else:
if not hasattr(m, '__path__'):
raise ImportError()
fp, filename, options = imp.find_module(p, m.__path__)
m = imp.load_module(p, fp, filename, options)
sys.modules[ns] = m
except ImportError:
m = imp.new_module(fullname)
m.__name__ = fullname
m.__path__ = [fullname]
m.__loader__ = self
m.__file__ = '<dummy package "%s">' % fullname
m.__package__ = ModuleImportUtility.parent_name(fullname)
# Now insert the loaded module into the cache, and return the result
sys.modules[fullname] = m
return m
class DirModuleLoader(object):
"""
Load the requested module under a directory (simulate the system import),
all the intermediate modules will also be loaded.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def load_module(self, fullname):
import imp
name_parts = self.ns_splitter.cut(fullname)
for (ns, fp, filename, options) in \
ModuleImportUtility.find_modules(self.namespace, name_parts,
self.root_path):
if ns not in sys.modules:
sys.modules[ns] = imp.load_module(ns, fp, filename, options)
return sys.modules[fullname]
loader = DirModuleFinder(
'parent.intermediate',
os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
)
loader.install()
Feel free to comment on my solution and if you guys find any potential bugs, feel free to notify me.
source to share
You can create modules at runtime and work with a dictionary sys.modules
.
So, if you have a directory structure like:
project-root/main.py
project-root/sub/
project-root/sub/__init__.py
You could, of course, do something like:
import sub # Import child package
sf1 = sub.SubFoo(1) # Test that import worked
But if you want to "pretend" that it sub
is actually a subpackage inside another package, you can do something like:
import sys, types
import sub # Import child package
sf1 = sub.SubFoo(1) # Test that import worked
fake = types.ModuleType('fake') # Create empty "fake" module
fake.sub = sub # Add "sub" module to the "fake" module
sys.modules['fake'] = fake # Add "fake" to sys.modules
sf2 = fake.sub.SubFoo(2) # Test that calling works through "fake" module
In my test code, I sub
__init__.py
only had :
class SubFoo:
def __init__(self, x=None):
print("Created SubFoo(%s)" % x)
And if you run main.py
you get:
Created SubFoo(1)
Created SubFoo(2)
I think this approach would be much easier than what you are trying to do with import hooks.
source to share