details: http://www.bx.psu.edu/hg/galaxy/rev/ac60740712de changeset: 3533:ac60740712de user: Nate Coraor <nate@bx.psu.edu> date: Mon Mar 15 14:35:35 2010 -0400 description: Add numpy egg, update bx_python egg (now with 100% more numpy) and include code that allows scrambling to depend on other eggs existing and being importable. diffstat: eggs.ini | 6 +- lib/galaxy/eggs/dist.py | 5 + lib/galaxy/eggs/scramble.py | 12 ++- lib/pkg_resources.py | 148 ++++++++++++++++++++++------------ scripts/dist-scramble.py | 13 +++ scripts/scramble.py | 12 ++- scripts/scramble/lib/scramble_lib.py | 36 +++----- scripts/scramble/scripts/generic.py | 1 + scripts/test_dist_egg.py | 53 ++++++++++++ 9 files changed, 207 insertions(+), 79 deletions(-) diffs (592 lines): diff -r cfb0776875c0 -r ac60740712de eggs.ini --- a/eggs.ini Mon Mar 15 14:05:25 2010 -0400 +++ b/eggs.ini Mon Mar 15 14:35:35 2010 -0400 @@ -16,6 +16,7 @@ Cheetah = 2.2.2 DRMAA_python = 0.2 MySQL_python = 1.2.3c1 +numpy = 1.3.0 pbs_python = 2.9.4 psycopg2 = 2.0.13 pycrypto = 2.0.1 @@ -59,7 +60,7 @@ psycopg2 = _8.4.2_static pysqlite = _3.6.17_static MySQL_python = _5.1.41_static -bx_python = _dev_3b9d30e47619 +bx_python = _dev_f74aec067563 GeneTrack = _dev_48da9e998f0caf01c5be731e926f4b0481f658f0 SQLAlchemy = _dev_r6498 pysam = _kanwei_90e03180969d @@ -70,3 +71,6 @@ MySQL_python = mysql-5.1.41 psycopg2 = postgresql-8.4.2 pysqlite = sqlite-amalgamation-3_6_17 + +[dependencies] +bx_python = numpy diff -r cfb0776875c0 -r ac60740712de lib/galaxy/eggs/dist.py --- a/lib/galaxy/eggs/dist.py Mon Mar 15 14:05:25 2010 -0400 +++ b/lib/galaxy/eggs/dist.py Mon Mar 15 14:35:35 2010 -0400 @@ -72,6 +72,10 @@ sources = self.config.get( 'source', name ).split() except: sources = [] + try: + dependencies = self.config.get( 'dependencies', name ).split() + except: + dependencies = [] if full_platform: platforms = self.platforms else: @@ -83,4 +87,5 @@ host_info = self.hosts[platform].split() egg.build_host, egg.python = host_info[:2] egg.sources = sources + egg.dependencies = dependencies self.eggs[name].append( egg ) diff -r cfb0776875c0 -r ac60740712de lib/galaxy/eggs/scramble.py --- a/lib/galaxy/eggs/scramble.py Mon Mar 15 14:05:25 2010 -0400 +++ b/lib/galaxy/eggs/scramble.py Mon Mar 15 14:35:35 2010 -0400 @@ -3,7 +3,7 @@ """ import os, sys, shutil, tempfile, subprocess, urlparse, urllib -from __init__ import Egg, Crate, URLRetriever, galaxy_dir, py, unpack_zipfile +from __init__ import Egg, Crate, URLRetriever, galaxy_dir, py, unpack_zipfile, EggNotFetchable from distutils.sysconfig import get_config_var import tarfile, zipfile, zlib @@ -37,6 +37,7 @@ def __init__( self, *args, **kwargs ): Egg.__init__( self, *args, **kwargs ) self.sources = [] + self.dependencies = [] self.buildpath = None self.source_path = None self.py = py @@ -184,6 +185,11 @@ tagfile = open( os.path.join( self.buildpath, ".galaxy_tag" ), "w" ) tagfile.write( self.tag + '\n' ) tagfile.close() + if self.dependencies: + depfile = open( os.path.join( self.buildpath, ".galaxy_deps" ), "w" ) + for dependency in self.dependencies: + depfile.write( dependency + '\n' ) + depfile.close() def run_scramble_script( self ): log.warning( "%s(): Beginning build" % sys._getframe().f_code.co_name ) # subprocessed to sterilize the env @@ -211,6 +217,10 @@ egg.sources = self.config.get( "source", egg.name ).split() except: egg.sources = [] + try: + egg.dependencies = self.config.get( "dependencies", egg.name ).split() + except: + egg.dependencies = [] def parse_egg_section( self, *args, **kwargs ): kwargs['egg_class'] = ScrambleEgg Crate.parse_egg_section( self, *args, **kwargs ) diff -r cfb0776875c0 -r ac60740712de lib/pkg_resources.py --- a/lib/pkg_resources.py Mon Mar 15 14:05:25 2010 -0400 +++ b/lib/pkg_resources.py Mon Mar 15 14:35:35 2010 -0400 @@ -13,26 +13,67 @@ method. """ -import sys, os, zipimport, time, re, imp, new +import sys, os, zipimport, time, re, imp try: frozenset except NameError: from sets import ImmutableSet as frozenset -from os import utime, rename, unlink # capture these to bypass sandboxing +# capture these to bypass sandboxing +from os import utime, rename, unlink, mkdir from os import open as os_open - - - - - - - - - - - +from os.path import isdir, split + + +def _bypass_ensure_directory(name, mode=0777): + # Sandbox-bypassing version of ensure_directory() + dirname, filename = split(name) + if dirname and filename and not isdir(dirname): + _bypass_ensure_directory(dirname) + mkdir(dirname, mode) + + + + + + + +_state_vars = {} + +def _declare_state(vartype, **kw): + g = globals() + for name, val in kw.iteritems(): + g[name] = val + _state_vars[name] = vartype + +def __getstate__(): + state = {} + g = globals() + for k, v in _state_vars.iteritems(): + state[k] = g['_sget_'+v](g[k]) + return state + +def __setstate__(state): + g = globals() + for k, v in state.iteritems(): + g['_sset_'+_state_vars[k]](k, g[k], v) + return state + +def _sget_dict(val): + return val.copy() + +def _sset_dict(key, ob, state): + ob.clear() + ob.update(state) + +def _sget_object(val): + return val.__getstate__() + +def _sset_object(key, ob, state): + ob.__setstate__(state) + +_sget_none = _sset_none = lambda *args: None @@ -164,14 +205,8 @@ def _macosx_vers(_cache=[]): if not _cache: - info = os.popen('/usr/bin/sw_vers').read().splitlines() - for line in info: - key, value = line.split(None, 1) - if key == 'ProductVersion:': - _cache.append(value.strip().split(".")) - break - else: - raise ValueError, "What?!" + from platform import mac_ver + _cache.append(mac_ver()[0].split('.')) return _cache[0] def _macosx_arch(machine): @@ -203,6 +238,12 @@ + + + + + + def compatible_platforms(provided,required): """Can code for the `provided` platform run on the `required` platform? @@ -387,7 +428,7 @@ def add_entry(self, entry): """Add a path item to ``.entries``, finding any distributions on it - ``find_distributions(entry,False)`` is used to find distributions + ``find_distributions(entry, True)`` is used to find distributions corresponding to the path entry, and they are added. `entry` is always appended to ``.entries``, even if it is already present. (This is because ``sys.path`` can contain the same value more than @@ -622,7 +663,6 @@ activated to fulfill the requirements; all relevant distributions are included, even if they were already activated in this working set. """ - needed = self.resolve(parse_requirements(requirements)) for dist in needed: @@ -630,7 +670,6 @@ return needed - def subscribe(self, callback): """Invoke `callback` for all distributions (including existing ones)""" if callback in self.callbacks: @@ -639,19 +678,21 @@ for dist in self: callback(dist) - def _added_new(self, dist): for callback in self.callbacks: callback(dist) - - - - - - - - + def __getstate__(self): + return ( + self.entries[:], self.entry_keys.copy(), self.by_key.copy(), + self.callbacks[:] + ) + + def __setstate__(self, (entries, keys, by_key, callbacks)): + self.entries = entries[:] + self.entry_keys = keys.copy() + self.by_key = by_key.copy() + self.callbacks = callbacks[:] class Environment(object): @@ -916,7 +957,7 @@ extract_path = self.extraction_path or get_default_cache() target_path = os.path.join(extract_path, archive_name+'-tmp', *names) try: - ensure_directory(target_path) + _bypass_ensure_directory(target_path) except: self.extraction_error() @@ -1188,7 +1229,9 @@ ) def _fn(self, base, resource_name): - return os.path.join(base, *resource_name.split('/')) + if resource_name: + return os.path.join(base, *resource_name.split('/')) + return base def _get(self, path): if hasattr(self.loader, 'get_data'): @@ -1226,8 +1269,6 @@ - - class DefaultProvider(EggProvider): """Provides access to package resources in the filesystem""" @@ -1597,7 +1638,7 @@ -_distribution_finders = {} +_declare_state('dict', _distribution_finders = {}) def register_finder(importer_type, distribution_finder): """Register `distribution_finder` to find distributions in sys.path items @@ -1646,7 +1687,7 @@ """Yield distributions accessible on a sys.path directory""" path_item = _normalize_cached(path_item) - if os.path.isdir(path_item): + if os.path.isdir(path_item) and os.access(path_item, os.R_OK): if path_item.lower().endswith('.egg'): # unpacked egg yield Distribution.from_filename( @@ -1679,8 +1720,8 @@ break register_finder(ImpWrapper,find_on_path) -_namespace_handlers = {} -_namespace_packages = {} +_declare_state('dict', _namespace_handlers = {}) +_declare_state('dict', _namespace_packages = {}) def register_namespace_handler(importer_type, namespace_handler): """Register `namespace_handler` to declare namespace packages @@ -1709,7 +1750,7 @@ return None module = sys.modules.get(packageName) if module is None: - module = sys.modules[packageName] = new.module(packageName) + module = sys.modules[packageName] = imp.new_module(packageName) module.__path__ = []; _set_parent_ns(packageName) elif not hasattr(module,'__path__'): raise TypeError("Not a package:", packageName) @@ -1862,7 +1903,7 @@ The algorithm assumes that strings like "-" and any alpha string that alphabetically follows "final" represents a "patch level". So, "2.4-1" is assumed to be a branch or patch of "2.4", and therefore "2.4.1" is - considered newer than "2.4-1", whic in turn is newer than "2.4". + considered newer than "2.4-1", which in turn is newer than "2.4". Strings like "a", "b", "c", "alpha", "beta", "candidate" and so on (that come before "final" alphabetically) are assumed to be pre-release versions, @@ -1871,7 +1912,8 @@ Finally, to handle miscellaneous cases, the strings "pre", "preview", and "rc" are treated as if they were "c", i.e. as though they were release candidates, and therefore are not as new as a version string that does not - contain them. + contain them, and "dev" is replaced with an '@' so that it sorts lower than + than any other pre-release tag. """ parts = [] for part in _parse_version_parts(s.lower()): @@ -2219,12 +2261,9 @@ if not loc: return - if path is sys.path: - self.check_version_conflict() - nloc = _normalize_cached(loc) bdir = os.path.dirname(nloc) - npath= map(_normalize_cached, path) + npath= [(p and _normalize_cached(p) or p) for p in path] bp = None for p, item in enumerate(npath): @@ -2232,10 +2271,14 @@ break elif item==bdir and self.precedence==EGG_DIST: # if it's an .egg, give it precedence over its directory + if path is sys.path: + self.check_version_conflict() path.insert(p, loc) npath.insert(p, nloc) break else: + if path is sys.path: + self.check_version_conflict() path.append(loc) return @@ -2252,7 +2295,6 @@ return - def check_version_conflict(self): if self.key=='setuptools': return # ignore the inevitable setuptools self-conflicts :( @@ -2266,7 +2308,7 @@ continue fn = getattr(sys.modules[modname], '__file__', None) - if fn and normalize_path(fn).startswith(loc): + if fn and (normalize_path(fn).startswith(loc) or fn.startswith(loc)): continue issue_warning( "Module %s was already imported from %s, but %s is being added" @@ -2443,7 +2485,7 @@ def __contains__(self,item): if isinstance(item,Distribution): - if item.key <> self.key: return False + if item.key != self.key: return False if self.index: item = item.parsed_version # only get if we need it elif isinstance(item,basestring): item = parse_version(item) @@ -2540,7 +2582,7 @@ os.open = old_open # and then put it back -# Set up global resource manager +# Set up global resource manager (deliberately not state-saved) _manager = ResourceManager() def _initialize(g): for name in dir(_manager): @@ -2549,7 +2591,7 @@ _initialize(globals()) # Prepare the master working set and make the ``require()`` API available -working_set = WorkingSet() +_declare_state('object', working_set = WorkingSet()) try: # Does the main program list any requirements? from __main__ import __requires__ diff -r cfb0776875c0 -r ac60740712de scripts/dist-scramble.py --- a/scripts/dist-scramble.py Mon Mar 15 14:05:25 2010 -0400 +++ b/scripts/dist-scramble.py Mon Mar 15 14:35:35 2010 -0400 @@ -32,6 +32,19 @@ failed = [] for egg in eggs: try: + for dependency in egg.dependencies: + print "Checking %s dependency: %s" % ( egg.name, dependency ) + # this could be in a better data structure... + dep = filter( lambda x: x.platform == egg.platform, c[dependency] )[0] + dep.resolve() + except EggNotFetchable, e: + degg = e.eggs[0] + print "%s build dependency %s %s %s couldn't be" % ( egg.name, degg.name, degg.version, degg.platform ) + print "downloaded automatically. There isn't really a graceful" + print "way to handle this when dist-scrambling." + failed.append( egg.platform ) + continue + try: egg.scramble() except ScrambleFailure: failed.append( egg.platform ) diff -r cfb0776875c0 -r ac60740712de scripts/scramble.py --- a/scripts/scramble.py Mon Mar 15 14:05:25 2010 -0400 +++ b/scripts/scramble.py Mon Mar 15 14:35:35 2010 -0400 @@ -14,7 +14,7 @@ lib = os.path.abspath( os.path.join( os.path.dirname( __file__ ), "..", "lib" ) ) sys.path.append( lib ) -from galaxy.eggs.scramble import ScrambleCrate, ScrambleFailure +from galaxy.eggs.scramble import ScrambleCrate, ScrambleFailure, EggNotFetchable c = ScrambleCrate() @@ -31,6 +31,16 @@ except: print "error: %s not in eggs.ini" % name sys.exit( 1 ) + for dependency in egg.dependencies: + print "Checking %s dependency: %s" % ( egg.name, dependency ) + try: + c[dependency].require() + except EggNotFetchable, e: + degg = e.eggs[0] + print "%s build dependency %s %s couldn't be downloaded" % ( egg.name, degg.name, degg.version ) + print "automatically. You can try building it by hand with:" + print " python scripts/scramble.py %s" % degg.name + sys.exit( 1 ) egg.scramble() sys.exit( 0 ) except ScrambleFailure, e: diff -r cfb0776875c0 -r ac60740712de scripts/scramble/lib/scramble_lib.py --- a/scripts/scramble/lib/scramble_lib.py Mon Mar 15 14:05:25 2010 -0400 +++ b/scripts/scramble/lib/scramble_lib.py Mon Mar 15 14:35:35 2010 -0400 @@ -21,6 +21,15 @@ except: return None +def get_deps(): + try: + depf = open( '.galaxy_deps', 'r' ) + except: + return [] + c = eggs.Crate() + for dep in depf: + c[dep.strip()].require() + def clean( extra_dirs=[] ): for dir in [ 'build', 'dist' ] + extra_dirs: try: @@ -127,29 +136,10 @@ else: return 'gcc' -# Monkeypatch pkg_resources for better ABI recognition -def _get_platform(): - plat = distutils.util._get_platform() - if sys.version_info[:2] == ( 2, 5 ) and \ - ( ( os.uname()[-1] in ( 'i386', 'ppc' ) and sys.platform == 'darwin' and os.path.abspath( sys.prefix ).startswith( '/System' ) ) or \ - ( sys.platform == 'darwin' and get_config_vars().get('UNIVERSALSDK', '').strip() ) ): - plat = 'macosx-10.3-fat' - if sys.platform == "sunos5" and not (plat.endswith('_32') or plat.endswith('_64')): - if sys.maxint > 2**31: - plat += '_64' - else: - plat += '_32' - if not (plat.endswith('-ucs2') or plat.endswith('-ucs4')): - if sys.maxunicode > 2**16: - plat += '-ucs4' - else: - plat += '-ucs2' - return plat -try: - assert distutil.util._get_platform -except: - distutils.util._get_platform = distutils.util.get_platform - distutils.util.get_platform = _get_platform +# get galaxy eggs lib +galaxy_lib = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', '..', '..', 'lib' ) ) +sys.path.insert( 0, galaxy_lib ) +from galaxy import eggs # get setuptools from ez_setup import use_setuptools diff -r cfb0776875c0 -r ac60740712de scripts/scramble/scripts/generic.py --- a/scripts/scramble/scripts/generic.py Mon Mar 15 14:05:25 2010 -0400 +++ b/scripts/scramble/scripts/generic.py Mon Mar 15 14:35:35 2010 -0400 @@ -9,6 +9,7 @@ from scramble_lib import * tag = get_tag() # get the tag +get_deps() # require any dependent eggs clean() # clean up any existing stuff (could happen if you run scramble.py by hand) # reset args for distutils diff -r cfb0776875c0 -r ac60740712de scripts/test_dist_egg.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/test_dist_egg.py Mon Mar 15 14:35:35 2010 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/env python +""" +A crude script for minimal "testing" of dist eggs (require and import). It may +not work on all zipped eggs. It may be easiest to just customize this script +for whatever egg you want to test. + +usage: test_dist_egg.py <egg_name> +""" +import os, sys, logging, subprocess + +try: + assert sys.argv[1] +except: + print __doc__ + sys.exit( 1 ) + +lib = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', 'lib' ) ) +sys.path.insert( 0, lib ) + +if sys.argv[1].endswith( '.egg' ): + + egg = sys.argv[1] + egg_name = os.path.basename( egg ).split( '-' )[0] + sys.path.insert( 0, egg ) + + import pkg_resources + pkg_resources.require( egg_name ) + provider = pkg_resources.get_provider( egg_name ) + importables = provider.get_metadata('top_level.txt').splitlines() + + for importable in importables: + mod = __import__( importable ) + assert os.path.dirname( mod.__path__[0] ) == os.path.dirname( provider.module_path ) + print "OK" + + sys.exit( 0 ) + +else: + + build_dir = os.path.join( os.path.dirname( os.path.abspath( __file__ ) ), 'scramble', 'build' ) + if os.path.exists( build_dir ): + raise Exception( 'Build dir must be removed before testing: %s' % build_dir ) + + name = sys.argv[1] + + from galaxy.eggs.dist import DistScrambleCrate + + c = DistScrambleCrate() + + for egg in c[name]: + print 'Checking %s %s for %s on %s' % ( name, egg.version, egg.platform, egg.build_host ) + p = subprocess.Popen( 'ssh %s %s %s %s %s' % ( egg.build_host, egg.python, os.path.abspath( __file__ ), egg.distribution.location, egg.platform ), shell=True ) + p.wait()