Wrangling Namespaces in Python
Published 2011-04-25
Fooling around with a loader in python.
Introduction
I’ve found Python namespaces to be problematic. If you don’t use your own namespace, then collisions are frequent. If you do use your own namespace, then you’ve got other issues to worry about, mostly stemming from __init__.py
and Python’s module search strategy. Here are a couple of options I’ve found so far (school me if you found a better way):
Develop at the level of your package, then bury it in your namespace on install
This is the way Django apps work. For mediaonfire.foo
, the development directory would look like:
/development-root/ Makefile # or whatever mechanism you use foo/ __init__.py # package files test/ __init__.py # tests /install-root/ mediaonfire/ __init__.py # other packages and modules foo/ __init__.py # package files
When you’re ready to install, your install
target plants foo
as a subdirectory of mediaonfire
in your directory tree. As long as your PYTHONPATH
is set correctly, your tests can find foo
in your development directory and everything works. Since the Django test harness requires the app be structured as above, this has served me well in writing Django apps, with only change being that the tests are put into foo/tests.py
, and that I have to be careful of namespace collisions.
Use a magic __init__.py
to fool Python’s module search mechanism into ignoring your toplevel namespace directory
My first stab at this, before finding out about this magic __init__.py
, was to write a PEP 302 finder/loader. It worked most of the time, but it broke on the edge cases:
"""Provides a search mechanism (PEP 302) to find modules in the build directory. When testing modules, distutils puts the "built" modules in build/lib/..., but if the package path conflicts with an installed path, then one or the other module will not be found. This module provides the Finder class and the Loader class to implement PEP 302 (http://www.python.org/dev/peps/pep-0302/) and use 'sys.meta_path' to solve conflicting modules problem. """ # imports import imp import os import re import sys import traceback # constants BUILDER_PATH = "build/lib" # This is the base path, in the development directory, where the # packages are 'built'. DUNDER_INIT = "__init__.py" # Standard package identifier file. PY_SUFFIX = ".py" RE_DOT = re.compile('\.') RE_FIRST_PKG = re.compile('^([^.]+)') # exception classes # interface functions # classes class BuildPathFinder(object): def __init__(self, packages, builder_path = BUILDER_PATH, debug = False): self.packages = dict([(package, True) for package in packages]) self.module_path = None self.builder_path = builder_path self.debug = debug def install(self): found = any( finder for finder in sys.meta_path if finder.__class__ == self.__class__ ) if (not found): sys.meta_path.append(self) def _get_build_path(self, module_import_spec, root_path): module_path_parts = module_import_spec.split('.') module_path = root_path + '/' + '/'.join( module_path_parts[0:-1] ) module_name = module_path_parts[-1] return (module_path, module_name) def find_module(self, fullname, path = None): self._debug_print("find_module fullname=", fullname) self._debug_print("find_module path=", path) if not self._is_finder_package(fullname): self._debug_print("wrong package") return None # for key in sys.modules.iterkeys(): # self._debug_print(" sys.modules=", sys.modules[key]) # module is a singleton, so need to re-initialize self.module_path = None module_tuple = self._get_build_path(fullname, self.builder_path) filepath = '/'.join(module_tuple) self._debug_print("find_module filepath=",filepath) self._debug_print("before self.module_path ", self.module_path) self._set_module_path(filepath) if self.module_path is None: self._debug_print("Trying the sys.path") # look in the usual places module_tuple = self._get_build_path(fullname, '') self._find_module_syspath(module_tuple) if self.module_path is not None: return BuildPathLoader(self.module_path, self.debug) else: return None def _set_module_path(self, filepath): if os.path.exists(filepath): self._debug_print("path exists") if os.path.isdir(filepath): if os.path.exists("/".join([filepath, DUNDER_INIT])): # found the package self._debug_print("found the package") self.module_path = filepath else: # found the code self._debug_print("found the code") self.module_path = filepath elif os.path.exists(filepath + PY_SUFFIX): # found the code self._debug_print("found the code " + PY_SUFFIX) self.module_path = filepath + PY_SUFFIX def _find_module_syspath(self, module_tuple, sys_path = None): if sys_path is None: sys_path = sys.path for path in sys_path: if path == '': # skip the cwd -- it would always succeed in testing continue path_tuple = self._get_build_path(module_tuple[0], path) filepath = ''.join(path_tuple) + '/' + module_tuple[1] self._debug_print("_find_module_syspath filepath=",filepath) self._set_module_path(filepath) self._debug_print("after self._set_module_path ", self.module_path) if self.module_path: return self.module_path = None return def _is_finder_package(self, fullname): if re.search(RE_DOT, fullname): package = re.search(RE_FIRST_PKG, fullname).group(1) else: package = fullname return self.packages.get(package) def _debug_print(self, *args): if not self.debug: return my_args = [ str(arg) for arg in args ] print ''.join(my_args)
class BuildPathLoader(object): def __init__(self, module_path, debug = False): self.module_path = module_path self.debug = debug def load_module(self, fullname): self._debug_print("load_module fullname=",fullname) self._debug_print("load_module self.module_path=", self.module_path) # self._debug_print("sys.modules = [] ", sys.modules) if fullname in sys.modules: self._debug_print("sys.modules=", sys.modules[fullname]) return sys.modules[fullname] if os.path.exists(self.module_path): full_module_path = self.module_path else: module = fullname.split(".")[-1] last_path_element = self.module_path.split("/")[-1] if module != last_path_element: full_module_path = self.module_path + "/" + module + PY_SUFFIX else: full_module_path = self.module_path self._debug_print("load_module full_module_path=", full_module_path) if os.path.isdir(full_module_path): return self.import_package(full_module_path, fullname) else: if os.path.isfile(full_module_path): return self.import_module(full_module_path, fullname) else: raise ImportError def import_package(self, pkg_path, pkg_spec): self._debug_print("import_package pkg_path=", pkg_path) filepath = "/".join((pkg_path, DUNDER_INIT)) self._debug_print("import_package filepath=", filepath) if not os.path.exists(filepath): return None return self.build_module(filepath, pkg_spec, is_pkg = True) def import_module(self, mod_path, mod_spec): self._debug_print("import_module mod_spec=",mod_path) if not os.path.exists(mod_path): return None return self.build_module(mod_path, mod_spec, is_pkg = False) def build_module(self, filepath, spec, is_pkg = False): fd = open(filepath, "rb") # print "STARTING IMP" # description = (PY_SUFFIX, 'rb', imp.PY_SOURCE) # module = sys.modules.setdefault(spec, imp.load_module(spec, fd, filepath, description)) # print "END IMP" # fd.close() # return module code = fd.read() fd.close() module = sys.modules.setdefault(spec, imp.new_module(spec)) module.__file__ = filepath module.__loader__ = "<%s>" % self.__class__.__name__ if is_pkg: module.__path__ = spec.split('.') try: exec code in module.__dict__ except Exception as e: del sys.modules[spec] exstr = traceback.format_exc() print >> sys.stderr, re.sub(r'File "<string>"', 'File "' + spec + '"', exstr) sys.exit(1) return module def _debug_print(self, *args): if not self.debug: return my_args = [ str(arg) for arg in args ] print ''.join(my_args)
So, instead of fighting Python, I found this post that showed me the magic – you can structure your development directory similiar to your install path, but in the __init__.py
you stick the following code:
import pkg_resources pkg_resources.declare_namespace(__name__)
/development-root/ mediaonfire/ __init__.py # with magic foo/ __init__.py # package files tests/ __init__.py # tests
Since I have good test coverage, the refactor to use this development structure hasn’t been too bad. I’ll keep banging away until I learn to code the Python way – I doubt, though, I’ll ever really understand why __init__.py
was needed…