Wrangling Namespaces in Python

Introduction

I’ve found Python namespaces to be problematic. If you don’t use your own namespace, then collisions are frequent. If you do use your own namespace, then you’ve got other issues to worry about, mostly stemming from __init__.py and Python’s module search strategy. Here are a couple of options I’ve found so far (school me if you found a better way):

Develop at the level of your package, then bury it in your namespace on install

This is the way Django apps work. For mediaonfire.foo, the development directory would look like:

/development-root/
    Makefile # or whatever mechanism you use
    foo/
        __init__.py
        # package files
    test/
        __init__.py
        # tests

/install-root/
    mediaonfire/
        __init__.py
        # other packages and modules
        foo/
            __init__.py
            # package files

When you’re ready to install, your install target plants foo as a subdirectory of mediaonfire in your directory tree. As long as your PYTHONPATH is set correctly, your tests can find foo in your development directory and everything works. Since the Django test harness requires the app be structured as above, this has served me well in writing Django apps, with only change being that the tests are put into foo/tests.py, and that I have to be careful of namespace collisions.

Use a magic __init__.py to fool Python’s module search mechanism into ignoring your toplevel namespace directory

My first stab at this, before finding out about this magic __init__.py, was to write a PEP 302 finder/loader. It worked most of the time, but it broke on the edge cases:

"""Provides a search mechanism (PEP 302) to find modules in the build
directory.

When testing modules, distutils puts the "built" modules in
build/lib/..., but if the package path conflicts with an installed
path, then one or the other module will not be found.

This module provides the Finder class and the Loader class to
implement PEP 302 (http://www.python.org/dev/peps/pep-0302/) and use
'sys.meta_path' to solve conflicting modules problem.

"""

# imports
import imp
import os
import re
import sys
import traceback

# constants
BUILDER_PATH = "build/lib"
    # This is the base path, in the development directory, where the
    # packages are 'built'.
DUNDER_INIT = "__init__.py"
    # Standard package identifier file.
PY_SUFFIX = ".py"

RE_DOT = re.compile('\.')
RE_FIRST_PKG = re.compile('^([^.]+)')

# exception classes

# interface functions

# classes
class BuildPathFinder(object):
    def __init__(self, packages, builder_path = BUILDER_PATH, debug = False):
        self.packages = dict([(package, True) for package in packages])
        self.module_path = None
        self.builder_path = builder_path
        self.debug = debug

    def install(self):
        found = any(
            finder for finder in sys.meta_path if finder.__class__ == self.__class__
            )

        if (not found):
            sys.meta_path.append(self)

    def _get_build_path(self, module_import_spec, root_path):
        module_path_parts = module_import_spec.split('.')
        module_path = root_path + '/' + '/'.join( module_path_parts[0:-1] )
        module_name = module_path_parts[-1]

        return (module_path, module_name)

    def find_module(self, fullname, path = None):
        self._debug_print("find_module fullname=", fullname)
        self._debug_print("find_module path=", path)

        if not self._is_finder_package(fullname):
            self._debug_print("wrong package")
            return None

        # for key in sys.modules.iterkeys():
        #     self._debug_print("  sys.modules=", sys.modules[key])

        # module is a singleton, so need to re-initialize
        self.module_path = None

        module_tuple = self._get_build_path(fullname, self.builder_path)

        filepath = '/'.join(module_tuple)
        self._debug_print("find_module filepath=",filepath)
        self._debug_print("before self.module_path ", self.module_path)

        self._set_module_path(filepath)

        if self.module_path is None:
            self._debug_print("Trying the sys.path")
            # look in the usual places
            module_tuple = self._get_build_path(fullname, '')
            self._find_module_syspath(module_tuple)

        if self.module_path is not None:
            return BuildPathLoader(self.module_path, self.debug)
        else:
            return None

    def _set_module_path(self, filepath):
        if os.path.exists(filepath):
            self._debug_print("path exists")
            if os.path.isdir(filepath):
                if os.path.exists("/".join([filepath, DUNDER_INIT])):
                    # found the package
                    self._debug_print("found the package")
                    self.module_path = filepath
            else:
                # found the code 
               self._debug_print("found the code")
               self.module_path = filepath
        elif os.path.exists(filepath + PY_SUFFIX):
            # found the code
            self._debug_print("found the code " + PY_SUFFIX)
            self.module_path = filepath + PY_SUFFIX

    def _find_module_syspath(self, module_tuple, sys_path = None):
        if sys_path is None:
            sys_path = sys.path

        for path in sys_path:
            if path == '':
                # skip the cwd -- it would always succeed in testing
                continue

            path_tuple = self._get_build_path(module_tuple[0], path)
            filepath = ''.join(path_tuple) + '/' + module_tuple[1]
            self._debug_print("_find_module_syspath filepath=",filepath)

            self._set_module_path(filepath)
            self._debug_print("after self._set_module_path ", self.module_path)
            if self.module_path:
                return

        self.module_path = None
        return

    def _is_finder_package(self, fullname):
        if re.search(RE_DOT, fullname):
            package = re.search(RE_FIRST_PKG, fullname).group(1)
        else:
            package = fullname

        return self.packages.get(package)

    def _debug_print(self, *args):
        if not self.debug:
            return

        my_args = [ str(arg) for arg in args ]
        print ''.join(my_args)
class BuildPathLoader(object):
    def __init__(self, module_path, debug = False):
        self.module_path = module_path
        self.debug = debug

    def load_module(self, fullname):
        self._debug_print("load_module fullname=",fullname)
        self._debug_print("load_module self.module_path=", self.module_path)

        # self._debug_print("sys.modules = [] ", sys.modules)
        if fullname in sys.modules:
            self._debug_print("sys.modules=", sys.modules[fullname])
            return sys.modules[fullname]

        if os.path.exists(self.module_path):
            full_module_path = self.module_path
        else:
            module = fullname.split(".")[-1]
            last_path_element = self.module_path.split("/")[-1]
            if module != last_path_element:
                full_module_path = self.module_path + "/" + module + PY_SUFFIX
            else:
                full_module_path = self.module_path
        self._debug_print("load_module full_module_path=", full_module_path)

        if os.path.isdir(full_module_path):
            return self.import_package(full_module_path, fullname)
        else:
            if os.path.isfile(full_module_path):
                return self.import_module(full_module_path, fullname)
            else:
                raise ImportError

    def import_package(self, pkg_path, pkg_spec):
        self._debug_print("import_package pkg_path=", pkg_path)
        filepath = "/".join((pkg_path, DUNDER_INIT))
        self._debug_print("import_package filepath=", filepath)
        if not os.path.exists(filepath):
            return None

        return self.build_module(filepath, pkg_spec, is_pkg = True)

    def import_module(self, mod_path, mod_spec):
        self._debug_print("import_module mod_spec=",mod_path)
        if not os.path.exists(mod_path):
            return None

        return self.build_module(mod_path, mod_spec, is_pkg = False)

    def build_module(self, filepath, spec, is_pkg = False):
        fd = open(filepath, "rb")

        # print "STARTING IMP"
        # description = (PY_SUFFIX, 'rb', imp.PY_SOURCE)
        # module = sys.modules.setdefault(spec, imp.load_module(spec, fd, filepath, description))
        # print "END IMP"
        # fd.close()
        # return module

        code = fd.read()
        fd.close()

        module = sys.modules.setdefault(spec, imp.new_module(spec))
        module.__file__ = filepath
        module.__loader__ = "<%s>" % self.__class__.__name__

        if is_pkg:
            module.__path__ = spec.split('.')

        try:
            exec code in module.__dict__
        except Exception as e:
            del sys.modules[spec]
            exstr = traceback.format_exc()            
            print >> sys.stderr, re.sub(r'File "<string>"', 'File "' + spec + '"', exstr)
            sys.exit(1)

        return module

    def _debug_print(self, *args):
        if not self.debug:
            return

        my_args = [ str(arg) for arg in args ]
        print ''.join(my_args)

So, instead of fighting Python, I found this post that showed me the magic – you can structure your development directory similiar to your install path, but in the __init__.py you stick the following code:

import pkg_resources
pkg_resources.declare_namespace(__name__)
/development-root/
    mediaonfire/
        __init__.py # with magic
        foo/
            __init__.py
            # package files
    tests/
        __init__.py
        # tests

Since I have good test coverage, the refactor to use this development structure hasn’t been too bad. I’ll keep banging away until I learn to code the Python way – I doubt, though, I’ll ever really understand why __init__.py was needed…