[galaxy-commits] commit/galaxy-central: 4 new changesets

24 Jul 2014

4 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/47c6c6bb8eec/
Changeset:   47c6c6bb8eec
User:        iracooke
Date:        2014-07-07 03:16:09
Summary:     Add sqlite datatype and corresponding dataprovider
Affected #:  3 files

diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 datatypes_conf.xml.sample

--- a/datatypes_conf.xml.sample
+++ b/datatypes_conf.xml.sample
@@ -177,6 +177,7 @@
     <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/><datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" description="Any data in tab delimited format (tabular)." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Tabular_.28tab_delimited.29"/><datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/>
+    <datatype extension="sqlite" type="galaxy.datatypes.binary:SQlite" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true" description="Any text file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Plain_text"/><datatype extension="linecount" type="galaxy.datatypes.data:LineCount" display_in_upload="false"/><datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/>
@@ -262,6 +263,7 @@
     --><sniffer type="galaxy.datatypes.tabular:Vcf"/><sniffer type="galaxy.datatypes.binary:TwoBit"/>
+    <sniffer type="galaxy.datatypes.binary:SQlite"/><sniffer type="galaxy.datatypes.binary:Bam"/><sniffer type="galaxy.datatypes.binary:Sff"/><sniffer type="galaxy.datatypes.xml:Phyloxml"/>

diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py
+++ b/lib/galaxy/datatypes/binary.py
@@ -12,6 +12,7 @@
 import subprocess
 import tempfile
 import zipfile
+import sqlite3
 
 from urllib import urlencode, quote_plus
 from galaxy import eggs
@@ -545,3 +546,45 @@
             return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
 
 Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit)
+
+
+@dataproviders.decorators.has_dataproviders
+class SQlite ( Binary ):
+    file_ext = "sqlite"
+
+    # Connects and runs a query that should work on any real database 
+    # If the file is not sqlite, an exception will be thrown and the sniffer will return false
+    def sniff( self, filename ):
+        try:
+            conn = sqlite3.connect(filename)
+            schema_version=conn.cursor().execute("pragma schema_version").fetchone()
+            conn.close()
+            if schema_version is not None:
+                return True
+            return False
+        except:
+            return False
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "SQLite Database"
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "SQLite Database (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+
+    @dataproviders.decorators.dataprovider_factory( 'sqlite', dataproviders.dataset.SQliteDataProvider.settings )
+    def sqlite_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.dataset.SQliteDataProvider( dataset_source, **settings )
+
+
+Binary.register_sniffable_binary_format("sqlite","sqlite",SQlite)
+

diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 lib/galaxy/datatypes/dataproviders/dataset.py
--- a/lib/galaxy/datatypes/dataproviders/dataset.py
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -11,6 +11,7 @@
 import line
 import column
 import external
+import sqlite3
 
 from galaxy import eggs
 eggs.require( 'bx-python' )
@@ -700,3 +701,29 @@
         #TODO: as samtools - need more info on output format
         raise NotImplementedError()
         super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs )
+
+
+
+class SQliteDataProvider ( base.DataProvider ):
+    """
+    Data provider that uses a sqlite database file as its source.
+
+    Allows any query to be run and returns the resulting rows as sqlite3 row objects
+    """
+    settings = {
+        'query' : 'str'
+    }
+
+    def __init__( self, source, query=None, **kwargs ):
+        self.query=query
+        self.connection =  sqlite3.connect(source.dataset.file_name);
+        self.connection.row_factory = sqlite3.Row
+        super( SQliteDataProvider, self ).__init__( source, **kwargs )
+
+    def __iter__( self ):
+        if self.query is not None:
+            for row in self.connection.cursor().execute(self.query):
+                yield row
+        else:
+            yield
+


https://bitbucket.org/galaxy/galaxy-central/commits/034c0159c0cf/
Changeset:   034c0159c0cf
User:        iracooke
Date:        2014-07-24 06:30:17
Summary:     Merged galaxy/galaxy-central into default
Affected #:  278 files

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b .hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -11,6 +11,9 @@
 scripts/scramble/lib
 scripts/scramble/archives
 
+# Python virtualenv
+.venv
+
 # Database stuff
 database/beaker_sessions
 database/community_files

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/app.py
--- a/lib/galaxy/app.py
+++ b/lib/galaxy/app.py
@@ -12,7 +12,6 @@
 from galaxy.visualization.data_providers.registry import DataProviderRegistry
 from galaxy.visualization.registry import VisualizationsRegistry
 from galaxy.tools.imp_exp import load_history_imp_exp_tools
-from galaxy.tools.genome_index import load_genome_index_tools
 from galaxy.sample_tracking import external_service_types
 from galaxy.openid.providers import OpenIDProviders
 from galaxy.tools.data_manager.manager import DataManagers
@@ -93,8 +92,6 @@
         self.datatypes_registry.load_external_metadata_tool( self.toolbox )
         # Load history import/export tools.
         load_history_imp_exp_tools( self.toolbox )
-        # Load genome indexer tool.
-        load_genome_index_tools( self.toolbox )
         # visualizations registry: associates resources with visualizations, controls how to render
         self.visualizations_registry = None
         if self.config.visualization_plugins_directory:

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/config.py
--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -15,13 +15,12 @@
 from galaxy.web.formatting import expand_pretty_datetime_format
 from galaxy.util import string_as_bool
 from galaxy.util import listify
-from galaxy.util import parse_xml
 from galaxy.util.dbkeys import GenomeBuilds
 from galaxy import eggs
-import pkg_resources
 
 log = logging.getLogger( __name__ )
 
+
 def resolve_path( path, root ):
     """If 'path' is relative make absolute by prepending 'root'"""
     if not( os.path.isabs( path ) ):
@@ -40,9 +39,9 @@
         self.config_dict = kwargs
         self.root = kwargs.get( 'root_dir', '.' )
         # Collect the umask and primary gid from the environment
-        self.umask = os.umask( 077 ) # get the current umask
-        os.umask( self.umask ) # can't get w/o set, so set it back
-        self.gid = os.getgid() # if running under newgrp(1) we'll need to fix the group of data created on the cluster
+        self.umask = os.umask( 077 )  # get the current umask
+        os.umask( self.umask )  # can't get w/o set, so set it back
+        self.gid = os.getgid()  # if running under newgrp(1) we'll need to fix the group of data created on the cluster
 
         # Database related configuration
         self.database = resolve_path( kwargs.get( "database_file", "database/universe.sqlite" ), self.root )
@@ -75,7 +74,7 @@
         self.enable_unique_workflow_defaults = string_as_bool( kwargs.get( 'enable_unique_workflow_defaults', False ) )
         self.tool_path = resolve_path( kwargs.get( "tool_path", "tools" ), self.root )
         self.tool_data_path = resolve_path( kwargs.get( "tool_data_path", "tool-data" ), os.getcwd() )
-        self.len_file_path = resolve_path( kwargs.get( "len_file_path", os.path.join( self.tool_data_path, 'shared','ucsc','chrom') ), self.root )
+        self.len_file_path = resolve_path( kwargs.get( "len_file_path", os.path.join( self.tool_data_path, 'shared', 'ucsc', 'chrom') ), self.root )
         self.test_conf = resolve_path( kwargs.get( "test_conf", "" ), self.root )
         # The value of migrated_tools_config is the file reserved for containing only those tools that have been eliminated from the distribution
         # and moved to the tool shed.
@@ -169,7 +168,7 @@
         self.admin_users = kwargs.get( "admin_users", "" )
         self.admin_users_list = [u.strip() for u in self.admin_users.split(',') if u]
         self.reset_password_length = int( kwargs.get('reset_password_length', '15') )
-        self.mailing_join_addr = kwargs.get('mailing_join_addr',"galaxy-announce-join@bx.psu.edu")
+        self.mailing_join_addr = kwargs.get('mailing_join_addr', 'galaxy-announce-join@bx.psu.edu')
         self.error_email_to = kwargs.get( 'error_email_to', None )
         self.activation_email = kwargs.get( 'activation_email', None )
         self.user_activation_on = string_as_bool( kwargs.get( 'user_activation_on', False ) )
@@ -271,9 +270,9 @@
         self.object_store_cache_path = resolve_path( kwargs.get( "object_store_cache_path", "database/object_store_cache" ), self.root )
         # Handle AWS-specific config options for backward compatibility
         if kwargs.get( 'aws_access_key', None) is not None:
-            self.os_access_key= kwargs.get( 'aws_access_key', None )
-            self.os_secret_key= kwargs.get( 'aws_secret_key', None )
-            self.os_bucket_name= kwargs.get( 's3_bucket', None )
+            self.os_access_key = kwargs.get( 'aws_access_key', None )
+            self.os_secret_key = kwargs.get( 'aws_secret_key', None )
+            self.os_bucket_name = kwargs.get( 's3_bucket', None )
             self.os_use_reduced_redundancy = kwargs.get( 'use_reduced_redundancy', False )
         else:
             self.os_access_key = kwargs.get( 'os_access_key', None )
@@ -376,6 +375,8 @@
         self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) )
         # visualization plugin framework
         self.visualization_plugins_directory = kwargs.get( 'visualization_plugins_directory', None )
+        # Default chunk size for chunkable datatypes -- 64k
+        self.display_chunk_size = int( kwargs.get( 'display_chunk_size', 65536) )
 
     @property
     def sentry_dsn_public( self ):
@@ -452,19 +453,13 @@
                 except Exception, e:
                     raise ConfigurationError( "Unable to create missing directory: %s\n%s" % ( path, e ) )
         # Create the directories that it makes sense to create
-        for path in self.file_path, \
-                    self.new_file_path, \
-                    self.job_working_directory, \
-                    self.cluster_files_directory, \
-                    self.template_cache, \
-                    self.ftp_upload_dir, \
-                    self.library_import_dir, \
-                    self.user_library_import_dir, \
-                    self.nginx_upload_store, \
-                    './static/genetrack/plots', \
-                    self.whoosh_index_dir, \
-                    self.object_store_cache_path, \
-                    os.path.join( self.tool_data_path, 'shared', 'jars' ):
+        for path in (self.file_path, self.new_file_path,
+                     self.job_working_directory, self.cluster_files_directory,
+                     self.template_cache, self.ftp_upload_dir,
+                     self.library_import_dir, self.user_library_import_dir,
+                     self.nginx_upload_store, './static/genetrack/plots',
+                     self.whoosh_index_dir, self.object_store_cache_path,
+                     os.path.join( self.tool_data_path, 'shared', 'jars' )):
             self._ensure_directory( path )
         # Check that required files exist
         tool_configs = self.tool_configs
@@ -480,7 +475,7 @@
             if key in self.deprecated_options:
                 log.warning( "Config option '%s' is deprecated and will be removed in a future release.  Please consult the latest version of the sample configuration file." % key )
 
-    def is_admin_user( self,user ):
+    def is_admin_user( self, user ):
         """
         Determine if the provided user is listed in `admin_users`.
 
@@ -495,12 +490,13 @@
         """
         return resolve_path( path, self.root )
 
+
 def get_database_engine_options( kwargs, model_prefix='' ):
     """
     Allow options for the SQLAlchemy database engine to be passed by using
     the prefix "database_engine_option".
     """
-    conversions =  {
+    conversions = {
         'convert_unicode': string_as_bool,
         'pool_timeout': int,
         'echo': string_as_bool,
@@ -522,6 +518,7 @@
             rval[ key  ] = value
     return rval
 
+
 def configure_logging( config ):
     """
     Allow some basic logging configuration to be read from ini file.
@@ -556,7 +553,7 @@
         root.addHandler( handler )
     # If sentry is configured, also log to it
     if config.sentry_dsn:
-        pkg_resources.require( "raven" )
+        eggs.require( "raven" )
         from raven.handlers.logging import SentryHandler
         sentry_handler = SentryHandler( config.sentry_dsn )
         sentry_handler.setLevel( logging.WARN )

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py
+++ b/lib/galaxy/datatypes/metadata.py
@@ -126,10 +126,21 @@
                 rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy )
         return rval
 
-    def from_JSON_dict( self, filename, path_rewriter=None ):
+    def from_JSON_dict( self, filename=None, path_rewriter=None, json_dict=None ):
         dataset = self.parent
-        log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) )
-        JSONified_dict = json.load( open( filename ) )
+        if filename is not None:
+            log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) )
+            JSONified_dict = json.load( open( filename ) )
+        elif json_dict is not None:
+            log.debug( 'loading metadata from dict for: %s %s' % ( dataset.__class__.__name__, dataset.id ) )
+            if isinstance( json_dict, basestring ):
+                JSONified_dict = json.loads( json_dict )
+            elif isinstance( json_dict, dict ):
+                JSONified_dict = json_dict
+            else:
+                raise ValueError( "json_dict must be either a dictionary or a string, got %s."  % ( type( json_dict ) ) )
+        else:
+            raise ValueError( "You must provide either a filename or a json_dict" )
         for name, spec in self.spec.items():
             if name in JSONified_dict:
                 from_ext_kwds = {}
@@ -143,13 +154,15 @@
                 #metadata associated with our dataset, we'll delete it from our dataset's metadata dict
                 del dataset._metadata[ name ]
 
-    def to_JSON_dict( self, filename ):
+    def to_JSON_dict( self, filename=None ):
         #galaxy.model.customtypes.json_encoder.encode()
         meta_dict = {}
         dataset_meta_dict = self.parent._metadata
         for name, spec in self.spec.items():
             if name in dataset_meta_dict:
                 meta_dict[ name ] = spec.param.to_external_value( dataset_meta_dict[ name ] )
+        if filename is None:
+            return json.dumps( meta_dict )
         json.dump( meta_dict, open( filename, 'wb+' ) )
 
     def __getstate__( self ):

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/datatypes/tabular.py
--- a/lib/galaxy/datatypes/tabular.py
+++ b/lib/galaxy/datatypes/tabular.py
@@ -25,7 +25,6 @@
 
     # All tabular data is chunkable.
     CHUNKABLE = True
-    CHUNK_SIZE = 10000
 
     """Add metadata elements"""
     MetadataElement( name="comment_lines", default=0, desc="Number of comment lines", readonly=False, optional=True, no_value=0 )
@@ -262,13 +261,13 @@
     def get_chunk(self, trans, dataset, chunk):
         ck_index = int(chunk)
         f = open(dataset.file_name)
-        f.seek(ck_index * self.CHUNK_SIZE)
+        f.seek(ck_index * trans.app.config.display_chunk_size)
         # If we aren't at the start of the file, seek to next newline.  Do this better eventually.
         if f.tell() != 0:
             cursor = f.read(1)
             while cursor and cursor != '\n':
                 cursor = f.read(1)
-        ck_data = f.read(self.CHUNK_SIZE)
+        ck_data = f.read(trans.app.config.display_chunk_size)
         cursor = f.read(1)
         while cursor and ck_data[-1] != '\n':
             ck_data += cursor

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/exceptions/__init__.py
--- a/lib/galaxy/exceptions/__init__.py
+++ b/lib/galaxy/exceptions/__init__.py
@@ -86,6 +86,11 @@
     err_code = error_codes.USER_REQUEST_INVALID_PARAMETER
 
 
+class AuthenticationFailed( MessageException ):
+    status_code = 401
+    err_code = error_codes.USER_AUTHENTICATION_FAILED
+
+
 class AuthenticationRequired( MessageException ):
     status_code = 403
     #TODO: as 401 and send WWW-Authenticate: ???

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/exceptions/error_codes.json
--- a/lib/galaxy/exceptions/error_codes.json
+++ b/lib/galaxy/exceptions/error_codes.json
@@ -60,6 +60,11 @@
     "message": "Supplied incorrect or incompatible tool meta parameters."
    },
    {
+    "name": "USER_AUTHENTICATION_FAILED",
+    "code": 401001,
+    "message": "Authentication failed, invalid credentials supplied."
+   },
+   {
     "name": "USER_NO_API_KEY",
     "code": 403001,
     "message": "API authentication required for this request"

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -1192,9 +1192,6 @@
         param_dict = self.tool.params_from_strings( param_dict, self.app )
         # Check for and move associated_files
         self.tool.collect_associated_files(out_data, self.working_directory)
-        gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
-        if gitd:
-            self.tool.collect_associated_files({'': gitd}, self.working_directory)
         # Create generated output children and primary datasets and add to param_dict
         collected_datasets = {
             'children': self.tool.collect_child_datasets(out_data, self.working_directory),
@@ -1248,7 +1245,6 @@
                 self.external_output_metadata.cleanup_external_metadata( self.sa_session )
             galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session )
             galaxy.tools.imp_exp.JobImportHistoryArchiveWrapper( self.app, self.job_id ).cleanup_after_job()
-            galaxy.tools.genome_index.GenomeIndexToolWrapper( self.job_id ).postprocessing( self.sa_session, self.app )
             if delete_files:
                 self.app.object_store.delete(self.get_job(), base_dir='job_work', entire_dir=True, dir_only=True, extra_dir=str(self.job_id))
         except:
@@ -1351,10 +1347,8 @@
         dataset_path_rewriter = self.dataset_path_rewriter
 
         job = self.get_job()
-        # Job output datasets are combination of history, library, jeha and gitd datasets.
+        # Job output datasets are combination of history, library, and jeha datasets.
         special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first()
-        if not special:
-            special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first()
         false_path = None
 
         results = []

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/actions/post.py
--- a/lib/galaxy/jobs/actions/post.py
+++ b/lib/galaxy/jobs/actions/post.py
@@ -215,8 +215,17 @@
                 p_str += "<label for='pja__"+pja.output_name+"__RenameDatasetAction__newname'>New output name:</label>\
                           <input type='text' name='pja__"+pja.output_name+"__RenameDatasetAction__newname' value=''/>";
             }
+            inputlist = [];
+            $.each(node.input_terminals, function(i, v){
+                inputlist.push(v.name);
+            });
+            if (inputlist !== []){
+                p_str += "Available inputs are: <strong>" + inputlist.join(', ') + "</strong>";
+            }else{
+                p_str += "No inputs are available for templating into this action.";
+            }
             """
-        return get_form_template(cls.name, cls.verbose_name, form, "This action will rename the result dataset.")
+        return get_form_template(cls.name, cls.verbose_name, form, "This action will rename the result dataset.  See <a href='https://wiki.galaxyproject.org/Learn/AdvancedWorkflow/Variables'>the wiki</a> for usage information.")
 
     @classmethod
     def get_short_str(cls, pja):

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/genome_index.py
--- a/lib/galaxy/jobs/deferred/genome_index.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Module for managing genome transfer jobs.
-"""
-from __future__ import with_statement
-
-import logging, shutil, gzip, bz2, zipfile, tempfile, tarfile, sys, os
-
-from galaxy import eggs
-from sqlalchemy import and_
-from data_transfer import *
-
-log = logging.getLogger( __name__ )
-
-__all__ = [ 'GenomeIndexPlugin' ]
-
-class GenomeIndexPlugin( DataTransfer ):
-
-    def __init__( self, app ):
-        super( GenomeIndexPlugin, self ).__init__( app )
-        self.app = app
-        self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__']
-        self.sa_session = app.model.context.current
-
-    def create_job( self, trans, path, indexes, dbkey, intname ):
-        params = dict( user=trans.user.id, path=path, indexes=indexes, dbkey=dbkey, intname=intname )
-        deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'GenomeIndexPlugin', params = params )
-        self.sa_session.add( deferred )
-        self.sa_session.flush()
-        log.debug( 'Job created, id %d' % deferred.id )
-        return deferred.id
-
-    def check_job( self, job ):
-        log.debug( 'Job check' )
-        return 'ready'
-
-    def run_job( self, job ):
-        incoming = dict( path=os.path.abspath( job.params[ 'path' ] ), indexer=job.params[ 'indexes' ][0], user=job.params[ 'user' ] )
-        indexjob = self.tool.execute( self, set_output_hid=False, history=None, incoming=incoming, transfer=None, deferred=job )
-        job.params[ 'indexjob' ] = indexjob[0].id
-        job.state = self.app.model.DeferredJob.states.RUNNING
-        self.sa_session.add( job )
-        self.sa_session.flush()
-        return self.app.model.DeferredJob.states.RUNNING

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/genome_transfer.py
--- a/lib/galaxy/jobs/deferred/genome_transfer.py
+++ /dev/null
@@ -1,250 +0,0 @@
-"""
-Module for managing genome transfer jobs.
-"""
-from __future__ import with_statement
-
-import logging, shutil, gzip, bz2, zipfile, tempfile, tarfile, sys
-
-from galaxy import eggs
-from sqlalchemy import and_
-
-from galaxy.util.odict import odict
-from galaxy.workflow.modules import module_factory
-from galaxy.jobs.actions.post import ActionBox
-
-from galaxy.tools.parameters import visit_input_values
-from galaxy.tools.parameters.basic import DataToolParameter
-from galaxy.tools.data import ToolDataTableManager
-
-from galaxy.datatypes.checkers import *
-from galaxy.datatypes.sequence import Fasta
-from data_transfer import *
-
-log = logging.getLogger( __name__ )
-
-__all__ = [ 'GenomeTransferPlugin' ]
-
-class GenomeTransferPlugin( DataTransfer ):
-
-    locations = {}
-
-    def __init__( self, app ):
-        super( GenomeTransferPlugin, self ).__init__( app )
-        self.app = app
-        self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__']
-        self.sa_session = app.model.context.current
-        tdtman = ToolDataTableManager( app.config.tool_data_path )
-        xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path )
-        for node in xmltree:
-            table = node.get('name')
-            location = node.findall('file')[0].get('path')
-            self.locations[table] = location
-
-    def create_job( self, trans, url, dbkey, intname, indexes ):
-        job = trans.app.transfer_manager.new( protocol='http', url=url )
-        params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http', type='init_transfer', url=url, dbkey=dbkey, indexes=indexes, intname=intname, liftover=None )
-        deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'GenomeTransferPlugin', params = params )
-        self.sa_session.add( deferred )
-        self.sa_session.flush()
-        return deferred.id
-
-    def check_job( self, job ):
-        if job.params['type'] == 'init_transfer':
-            if not hasattr(job, 'transfer_job'):
-                job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) )
-            else:
-                self.sa_session.refresh( job.transfer_job )
-            if job.transfer_job.state == 'done':
-                transfer = job.transfer_job
-                transfer.state = 'downloaded'
-                job.params['type'] = 'extract_transfer'
-                self.sa_session.add( job )
-                self.sa_session.add( transfer )
-                self.sa_session.flush()
-                return self.job_states.READY
-            elif job.transfer_job.state == 'running':
-                return self.job_states.WAIT
-            elif job.transfer_job.state == 'new':
-                assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ]
-                self.app.transfer_manager.run( job.transfer_job )
-                self.sa_session.add( job.transfer_job )
-                self.sa_session.flush()
-                return self.job_states.WAIT
-            else:
-                log.error( "An error occurred while downloading from %s" % job.params[ 'url' ] )
-                return self.job_states.INVALID
-        elif job.params[ 'type' ] == 'extract_transfer':
-            return self.job_states.READY
-
-    def get_job_status( self, jobid ):
-        job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) )
-        if 'transfer_job_id' in job.params:
-            if not hasattr( job, 'transfer_job' ):
-                job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) )
-            else:
-                self.sa_session.refresh( job.transfer_job )
-        return job
-
-    def run_job( self, job ):
-        params = job.params
-        dbkey = params[ 'dbkey' ]
-        if not hasattr( job, 'transfer_job' ):
-            job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) )
-        else:
-            self.sa_session.refresh( job.transfer_job )
-        transfer = job.transfer_job
-        if params[ 'type' ] == 'extract_transfer':
-            CHUNK_SIZE = 2**20
-            destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), job.params[ 'dbkey' ], 'seq' )
-            destfile = '%s.fa' % job.params[ 'dbkey' ]
-            destfilepath = os.path.join( destpath, destfile )
-            tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] )
-            tmppath = os.path.dirname( os.path.abspath( transfer.path ) )
-            if not os.path.exists( destpath ):
-                os.makedirs( destpath )
-            protocol = job.params[ 'protocol' ]
-            data_type = self._check_compress( transfer.path )
-            if data_type is None:
-                sniffer = Fasta()
-                if sniffer.sniff( transfer.path ):
-                    data_type = 'fasta'
-            fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False )
-            if data_type in [ 'tar.gzip', 'tar.bzip' ]:
-                fp = open( transfer.path, 'r' )
-                tar = tarfile.open( mode = 'r:*', bufsize = CHUNK_SIZE, fileobj = fp )
-                files = tar.getmembers()
-                for filename in files:
-                    z = tar.extractfile(filename)
-                    while 1:
-                        try:
-                            chunk = z.read( CHUNK_SIZE )
-                        except IOError:
-                            os.close( fd )
-                            log.error( 'Problem decompressing compressed data' )
-                            exit()
-                        if not chunk:
-                            break
-                        os.write( fd, chunk )
-                    os.write( fd, '\n' )
-                os.close( fd )
-                tar.close()
-                fp.close()
-            elif data_type == 'gzip':
-                compressed = gzip.open( transfer.path, mode = 'rb' )
-                while 1:
-                    try:
-                        chunk = compressed.read( CHUNK_SIZE )
-                    except IOError:
-                        compressed.close()
-                        log.error( 'Problem decompressing compressed data' )
-                        exit()
-                    if not chunk:
-                        break
-                    os.write( fd, chunk )
-                os.close( fd )
-                compressed.close()
-            elif data_type == 'bzip':
-                compressed = bz2.BZ2File( transfer.path, mode = 'r' )
-                while 1:
-                    try:
-                        chunk = compressed.read( CHUNK_SIZE )
-                    except IOError:
-                        compressed.close()
-                        log.error( 'Problem decompressing compressed data' )
-                        exit()
-                    if not chunk:
-                        break
-                    os.write( fd, chunk )
-                os.close( fd )
-                compressed.close()
-            elif data_type == 'zip':
-                uncompressed_name = None
-                unzipped = False
-                z = zipfile.ZipFile( transfer.path )
-                z.debug = 3
-                for name in z.namelist():
-                    if name.endswith('/'):
-                        continue
-                    zipped_file = z.open( name )
-                    while 1:
-                        try:
-                            chunk = zipped_file.read( CHUNK_SIZE )
-                        except IOError:
-                            os.close( fd )
-                            log.error( 'Problem decompressing zipped data' )
-                            return self.app.model.DeferredJob.states.INVALID
-                        if not chunk:
-                            break
-                        os.write( fd, chunk )
-                    zipped_file.close()
-                os.close( fd )
-                z.close()
-            elif data_type == 'fasta':
-                uncompressed = transfer.path
-            else:
-                job.state = self.app.model.DeferredJob.states.INVALID
-                log.error( "Unrecognized compression format for file %s." % transfer.path )
-                self.sa_session.add( job )
-                self.sa_session.flush()
-                return
-            shutil.move( uncompressed, destfilepath )
-            if os.path.exists( transfer.path ):
-                os.remove( transfer.path )
-            os.chmod( destfilepath, 0644 )
-            fastaline = '\t'.join( [ dbkey, dbkey, params[ 'intname' ], os.path.abspath( destfilepath ) ] )
-            self._add_line( 'all_fasta', fastaline )
-            if params[ 'indexes' ] is not None:
-                job.state = self.app.model.DeferredJob.states.WAITING
-                job.params[ 'indexjobs' ] = []
-            else:
-                job.state = self.app.model.DeferredJob.states.OK
-            job.params[ 'type' ] = 'finish_transfer'
-            transfer.path = os.path.abspath(destfilepath)
-            transfer.state = 'done'
-            self.sa_session.add( job )
-            self.sa_session.add( transfer )
-            if transfer.state == 'done':
-                if params[ 'indexes' ] is not None:
-                    for indexer in params[ 'indexes' ]:
-                        incoming = dict(indexer=indexer, dbkey=params[ 'dbkey' ], intname=params[ 'intname' ], path=transfer.path, user=params['user']  )
-                        deferred = self.tool.execute( self, set_output_hid=False, history=None, incoming=incoming, transfer=transfer, deferred=job )
-                        job.params[ 'indexjobs' ].append( deferred[0].id )
-                else:
-                    job.state = self.app.model.DeferredJob.states.OK
-                    self.sa_session.add( job )
-            self.sa_session.flush()
-            return self.app.model.DeferredJob.states.OK
-
-    def _check_compress( self, filepath ):
-        retval = ''
-        if tarfile.is_tarfile( filepath ):
-            retval = 'tar.'
-        if check_zip( filepath ):
-            return 'zip'
-        is_bzipped, is_valid = check_bz2( filepath )
-        if is_bzipped and is_valid:
-            return retval + 'bzip'
-        is_gzipped, is_valid = check_gzip( filepath )
-        if is_gzipped and is_valid:
-            return retval + 'gzip'
-        return None
-
-    def _add_line( self, locfile, newline ):
-        filepath = self.locations[ locfile ]
-        origlines = []
-        output = []
-        comments = []
-        with open( filepath, 'r' ) as destfile:
-            for line in destfile:
-                if line.startswith( '#' ):
-                    comments.append( line.strip() )
-                else:
-                    origlines.append( line.strip() )
-        if newline not in origlines:
-            origlines.append( newline )
-            output.extend( comments )
-            origlines.sort()
-            output.extend( origlines )
-            with open( filepath, 'w+' ) as destfile:
-                destfile.write( '\n'.join( output ) )
-

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/liftover_transfer.py
--- a/lib/galaxy/jobs/deferred/liftover_transfer.py
+++ /dev/null
@@ -1,158 +0,0 @@
-"""
-Module for managing genome transfer jobs.
-"""
-from __future__ import with_statement
-
-import logging, shutil, gzip, tempfile, sys
-
-from galaxy import eggs
-from sqlalchemy import and_
-
-from galaxy.util.odict import odict
-from galaxy.workflow.modules import module_factory
-from galaxy.jobs.actions.post import ActionBox
-
-from galaxy.tools.parameters import visit_input_values
-from galaxy.tools.parameters.basic import DataToolParameter
-
-from galaxy.datatypes.checkers import *
-
-from data_transfer import *
-
-log = logging.getLogger( __name__ )
-
-__all__ = [ 'LiftOverTransferPlugin' ]
-
-class LiftOverTransferPlugin( DataTransfer ):
-
-    locations = {}
-
-    def __init__( self, app ):
-        super( LiftOverTransferPlugin, self ).__init__( app )
-        self.app = app
-        self.sa_session = app.model.context.current
-
-    def create_job( self, trans, url, dbkey, from_genome, to_genome, destfile, parentjob ):
-        job = trans.app.transfer_manager.new( protocol='http', url=url )
-        params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http',
-                       type='init_transfer', dbkey=dbkey, from_genome=from_genome,
-                       to_genome=to_genome, destfile=destfile, parentjob=parentjob )
-        deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'LiftOverTransferPlugin', params = params )
-        self.sa_session.add( deferred )
-        self.sa_session.flush()
-        return deferred.id
-
-    def check_job( self, job ):
-        if job.params['type'] == 'init_transfer':
-            if not hasattr(job, 'transfer_job'):
-                job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) )
-            else:
-                self.sa_session.refresh( job.transfer_job )
-            if job.transfer_job.state == 'done':
-                transfer = job.transfer_job
-                transfer.state = 'downloaded'
-                job.params['type'] = 'extract_transfer'
-                self.sa_session.add( job )
-                self.sa_session.add( transfer )
-                self.sa_session.flush()
-                return self.job_states.READY
-            elif job.transfer_job.state == 'running':
-                return self.job_states.WAIT
-            elif job.transfer_job.state == 'new':
-                assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ]
-                ready = True
-                parent = self.sa_session.query( self.app.model.DeferredJob ).get( int( job.params[ 'parentjob' ] ) )
-                if not hasattr( parent, 'transfer_job' ):
-                    parent.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( parent.params[ 'transfer_job_id' ] ) )
-                if parent.transfer_job.state not in [ 'ok', 'error', 'done' ]:
-                    ready = False
-                for lo_job in parent.params[ 'liftover' ]:
-                    liftoverjob = self.sa_session.query( self.app.model.TransferJob ).get( int( lo_job ) )
-                    if liftoverjob:
-                        if liftoverjob.state not in [ 'ok', 'error', 'new', 'done' ]:
-                            ready = False
-                if ready:
-                    self.app.transfer_manager.run( job.transfer_job )
-                self.sa_session.add( job.transfer_job )
-                self.sa_session.flush()
-                return self.job_states.WAIT
-            else:
-                log.error( "An error occurred while downloading from %s" % job.transfer_job.params[ 'url' ] )
-                return self.job_states.INVALID
-        elif job.params[ 'type' ] == 'extract_transfer':
-            return self.job_states.READY
-
-    def get_job_status( self, jobid ):
-        job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) )
-        return job
-
-    def run_job( self, job ):
-        params = job.params
-        dbkey = params[ 'dbkey' ]
-        source = params[ 'from_genome' ]
-        target = params[ 'to_genome' ]
-        if not hasattr( job, 'transfer_job' ):
-            job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) )
-        else:
-            self.sa_session.refresh( job.transfer_job )
-        transfer = job.transfer_job
-        if params[ 'type' ] == 'extract_transfer':
-            CHUNK_SIZE = 2**20
-            destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), source, 'liftOver' )
-            if not os.path.exists( destpath ):
-                os.makedirs( destpath )
-            destfile = job.params[ 'destfile' ]
-            destfilepath = os.path.join( destpath, destfile )
-            tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] )
-            tmppath = os.path.dirname( os.path.abspath( transfer.path ) )
-            if not os.path.exists( destpath ):
-                os.makedirs( destpath )
-            fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False )
-            chain = gzip.open( transfer.path, 'rb' )
-            while 1:
-                try:
-                    chunk = chain.read( CHUNK_SIZE )
-                except IOError:
-                    os.close( fd )
-                    log.error( 'Problem decompressing compressed data' )
-                    exit()
-                if not chunk:
-                    break
-                os.write( fd, chunk )
-            os.close( fd )
-            chain.close()
-            # Replace the gzipped file with the decompressed file if it's safe to do so
-            shutil.move( uncompressed, destfilepath )
-            os.remove( transfer.path )
-            os.chmod( destfilepath, 0644 )
-            locline = '\t'.join( [ source, target, os.path.abspath( destfilepath ) ] )
-            self._add_line( locline )
-            job.state = self.app.model.DeferredJob.states.OK
-            job.params[ 'type' ] = 'finish_transfer'
-            transfer.path = os.path.abspath(destfilepath)
-            transfer.state = 'done'
-            parentjob = self.sa_session.query( self.app.model.DeferredJob ).get( int( job.params[ 'parentjob' ] ) )
-            finished = True
-            for i in parentjob.params[ 'liftover' ]:
-                sibling = self.sa_session.query( self.app.model.DeferredJob ).get( int( i ) )
-                if sibling.state not in [ 'done', 'ok', 'error' ]:
-                    finished = False
-            if finished:
-                parentjob.state = self.app.model.DeferredJob.states.OK
-                self.sa_session.add( parentjob )
-            self.sa_session.add( job )
-            self.sa_session.add( transfer )
-            self.sa_session.flush()
-            return self.app.model.DeferredJob.states.OK
-
-    def _add_line( self, newline ):
-        filepath = 'tool-data/liftOver.loc'
-        origlines = []
-        with open( filepath, 'r' ) as destfile:
-            for line in destfile:
-                origlines.append( line.strip() )
-        if newline not in origlines:
-            origlines.append( newline )
-            with open( filepath, 'w+' ) as destfile:
-                destfile.write( '\n'.join( origlines ) )
-

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/mapper.py
--- a/lib/galaxy/jobs/mapper.py
+++ b/lib/galaxy/jobs/mapper.py
@@ -83,7 +83,7 @@
         if "job" in function_arg_names or "user" in function_arg_names or "user_email" in function_arg_names or "resource_params" in function_arg_names:
             job = self.job_wrapper.get_job()
             history = job.history
-            user = history and history.user
+            user = job.user
             user_email = user and str(user.email)
 
             if "job" in function_arg_names:

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/metrics/formatting.py
--- a/lib/galaxy/jobs/metrics/formatting.py
+++ b/lib/galaxy/jobs/metrics/formatting.py
@@ -15,4 +15,4 @@
     elif value < 3600:
         return "%s minutes" % ( value / 60 )
     else:
-        return "%s days and %s minutes" % ( value / 3600, ( value % 3600 ) / 60 )
+        return "%s hours and %s minutes" % ( value / 3600, ( value % 3600 ) / 60 )

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/__init__.py
--- a/lib/galaxy/managers/__init__.py
+++ b/lib/galaxy/managers/__init__.py
@@ -1,4 +1,33 @@
-""" 'Business logic' independent of web transactions/user context (trans)
-should be pushed into models - but logic that requires the context trans 
-should be placed under this module.
 """
+Classes that manage resources (models, tools, etc.) by using the current
+Transaction.
+
+Encapsulates the intersection of trans (or trans.sa_session), models,
+and Controllers.
+
+Responsibilities:
+    model operations that involve the trans/sa_session (CRUD)
+    security:
+        ownership, accessibility
+    common aspect-oriented operations via new mixins:
+        sharable, annotatable, tagable, ratable
+
+Not responsible for:
+    encoding/decoding ids
+    any http gobblygook
+    formatting of returned data (always python structures)
+    formatting of raised errors
+
+The goal is to have Controllers only handle:
+    query-string/payload parsing and encoding/decoding ids
+    http
+    return formatting
+
+and:
+    control, improve namespacing in Controllers
+    DRY for Controller ops (define here - use in both UI/API Controllers)
+
+In other words, 'Business logic' independent of web transactions/user context
+(trans) should be pushed into models - but logic that requires the context
+trans should be placed under this module.
+"""

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/base.py
--- /dev/null
+++ b/lib/galaxy/managers/base.py
@@ -0,0 +1,7 @@
+
+
+class ModelManager( object ):
+    pass
+
+class ModelSerializer( object ):
+    pass

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/hdas.py
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -1,16 +1,35 @@
+"""
+Manager and Serializer for HDAs.
+
+HistoryDatasetAssociations (HDAs) are datasets contained or created in a
+history.
+"""
+
 from galaxy import exceptions
-from ..managers import histories
 
+from galaxy.managers import base as manager_base
+from galaxy.managers import histories as history_manager
 
-class HDAManager( object ):
+import galaxy.web
+import galaxy.datatypes.metadata
+from galaxy import objectstore
+
+class HDAManager( manager_base.ModelManager ):
+    """
+    Interface/service object for interacting with HDAs.
+    """
 
     def __init__( self ):
-        self.histories_mgr = histories.HistoryManager()
+        """
+        Set up and initialize other managers needed by hdas.
+        """
+        self.histories_mgr = history_manager.HistoryManager()
 
     def get( self, trans, unencoded_id, check_ownership=True, check_accessible=True ):
         """
+        Get an HDA by its unencoded db id, checking ownership (via its history)
+        or accessibility (via dataset shares/permissions).
         """
-        # this is a replacement for UsesHistoryDatasetAssociationMixin because mixins are a bad soln/structure
         hda = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( unencoded_id )
         if hda is None:
             raise exceptions.ObjectNotFound()
@@ -19,7 +38,8 @@
 
     def secure( self, trans, hda, check_ownership=True, check_accessible=True ):
         """
-        checks if (a) user owns item or (b) item is accessible to user.
+        check ownership (via its history) or accessibility (via dataset
+        shares/permissions).
         """
         # all items are accessible to an admin
         if trans.user and trans.user_is_admin():
@@ -31,12 +51,18 @@
         return hda
 
     def can_access_dataset( self, trans, hda ):
+        """
+        Use security agent to see if current user has access to dataset.
+        """
         current_user_roles = trans.get_current_user_roles()
         return trans.app.security_agent.can_access_dataset( current_user_roles, hda.dataset )
 
     #TODO: is_owner, is_accessible
 
     def check_ownership( self, trans, hda ):
+        """
+        Use history to see if current user owns HDA.
+        """
         if not trans.user:
             #if hda.history == trans.history:
             #    return hda
@@ -51,6 +77,9 @@
             "HistoryDatasetAssociation is not owned by the current user", type='error' )
 
     def check_accessible( self, trans, hda ):
+        """
+        Raise error if HDA is not accessible.
+        """
         if trans.user and trans.user_is_admin():
             return hda
         # check for access of the containing history...
@@ -62,6 +91,151 @@
             "HistoryDatasetAssociation is not accessible to the current user", type='error' )
 
     def err_if_uploading( self, trans, hda ):
+        """
+        Raise error if HDA is still uploading.
+        """
         if hda.state == trans.model.Dataset.states.UPLOAD:
             raise exceptions.Conflict( "Please wait until this dataset finishes uploading" )
         return hda
+
+    def get_hda_dict( self, trans, hda ):
+        """
+        Return full details of this HDA in dictionary form.
+        """
+        #precondition: the user's access to this hda has already been checked
+        #TODO:?? postcondition: all ids are encoded (is this really what we want at this level?)
+        expose_dataset_path = trans.user_is_admin() or trans.app.config.expose_dataset_path
+        hda_dict = hda.to_dict( view='element', expose_dataset_path=expose_dataset_path )
+        hda_dict[ 'api_type' ] = "file"
+
+        # Add additional attributes that depend on trans must be added here rather than at the model level.
+        can_access_hda = trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset )
+        can_access_hda = ( trans.user_is_admin() or can_access_hda )
+        if not can_access_hda:
+            return self.get_inaccessible_hda_dict( trans, hda )
+        hda_dict[ 'accessible' ] = True
+
+        #TODO: I'm unclear as to which access pattern is right
+        hda_dict[ 'annotation' ] = hda.get_item_annotation_str( trans.sa_session, hda.history.user, hda )
+        #annotation = getattr( hda, 'annotation', hda.get_item_annotation_str( trans.sa_session, trans.user, hda ) )
+
+        # ---- return here if deleted AND purged OR can't access
+        purged = ( hda.purged or hda.dataset.purged )
+        if ( hda.deleted and purged ):
+            #TODO: to_dict should really go AFTER this - only summary data
+            return trans.security.encode_dict_ids( hda_dict )
+
+        if expose_dataset_path:
+            try:
+                hda_dict[ 'file_name' ] = hda.file_name
+            except objectstore.ObjectNotFound:
+                log.exception( 'objectstore.ObjectNotFound, HDA %s.', hda.id )
+
+        hda_dict[ 'download_url' ] = galaxy.web.url_for( 'history_contents_display',
+            history_id = trans.security.encode_id( hda.history.id ),
+            history_content_id = trans.security.encode_id( hda.id ) )
+
+        # indeces, assoc. metadata files, etc.
+        meta_files = []
+        for meta_type in hda.metadata.spec.keys():
+            if isinstance( hda.metadata.spec[ meta_type ].param, galaxy.datatypes.metadata.FileParameter ):
+                meta_files.append( dict( file_type=meta_type ) )
+        if meta_files:
+            hda_dict[ 'meta_files' ] = meta_files
+
+        # currently, the viz reg is optional - handle on/off
+        if trans.app.visualizations_registry:
+            hda_dict[ 'visualizations' ] = trans.app.visualizations_registry.get_visualizations( trans, hda )
+        else:
+            hda_dict[ 'visualizations' ] = hda.get_visualizations()
+        #TODO: it may also be wiser to remove from here and add as API call that loads the visualizations
+        #           when the visualizations button is clicked (instead of preloading/pre-checking)
+
+        # ---- return here if deleted
+        if hda.deleted and not purged:
+            return trans.security.encode_dict_ids( hda_dict )
+
+        return trans.security.encode_dict_ids( hda_dict )
+
+    def get_inaccessible_hda_dict( self, trans, hda ):
+        """
+        Return truncated serialization of HDA when inaccessible to user.
+        """
+        return trans.security.encode_dict_ids({
+            'id'        : hda.id,
+            'history_id': hda.history.id,
+            'hid'       : hda.hid,
+            'name'      : hda.name,
+            'state'     : hda.state,
+            'deleted'   : hda.deleted,
+            'visible'   : hda.visible,
+            'accessible': False
+        })
+
+    def get_hda_dict_with_error( self, trans, hda=None, history_id=None, id=None, error_msg='Error' ):
+        """
+        Return truncated serialization of HDA when error raised getting
+        details.
+        """
+        return trans.security.encode_dict_ids({
+            'id'        : hda.id if hda else id,
+            'history_id': hda.history.id if hda else history_id,
+            'hid'       : hda.hid if hda else '(unknown)',
+            'name'      : hda.name if hda else '(unknown)',
+            'error'     : error_msg,
+            'state'     : trans.model.Dataset.states.NEW
+        })
+
+    def get_display_apps( self, trans, hda ):
+        """
+        Return dictionary containing new-style display app urls.
+        """
+        display_apps = []
+        for display_app in hda.get_display_applications( trans ).itervalues():
+
+            app_links = []
+            for link_app in display_app.links.itervalues():
+                app_links.append({
+                    'target': link_app.url.get( 'target_frame', '_blank' ),
+                    'href'  : link_app.get_display_url( hda, trans ),
+                    'text'  : gettext( link_app.name )
+                })
+            if app_links:
+                display_apps.append( dict( label=display_app.name, links=app_links ) )
+
+        return display_apps
+
+    def get_old_display_applications( self, trans, hda ):
+        """
+        Return dictionary containing old-style display app urls.
+        """
+        display_apps = []
+        if not trans.app.config.enable_old_display_applications:
+            return display_apps
+
+        for display_app in hda.datatype.get_display_types():
+            target_frame, display_links = hda.datatype.get_display_links( hda,
+                display_app, trans.app, trans.request.base )
+
+            if len( display_links ) > 0:
+                display_label = hda.datatype.get_display_label( display_app )
+
+                app_links = []
+                for display_name, display_link in display_links:
+                    app_links.append({
+                        'target': target_frame,
+                        'href'  : display_link,
+                        'text'  : gettext( display_name )
+                    })
+                if app_links:
+                    display_apps.append( dict( label=display_label, links=app_links ) )
+
+        return display_apps
+
+
+# =============================================================================
+class HistorySerializer( manager_base.ModelSerializer ):
+    """
+    Interface/service object for serializing HDAs into dictionaries.
+    """
+    pass

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/histories.py
--- a/lib/galaxy/managers/histories.py
+++ b/lib/galaxy/managers/histories.py
@@ -1,9 +1,32 @@
+"""
+Manager and Serializer for histories.
+
+Histories are containers for datasets or dataset collections
+created (or copied) by users over the course of an analysis.
+"""
+
 from galaxy import exceptions
 from galaxy.model import orm
 
+from galaxy.managers import base as manager_base
+import galaxy.managers.hdas
 
-class HistoryManager( object ):
+import galaxy.web
+import galaxy.dataset_collections.util
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# =============================================================================
+class HistoryManager( manager_base.ModelManager ):
+    """
+    Interface/service object for interacting with HDAs.
+    """
+
     #TODO: all the following would be more useful if passed the user instead of defaulting to trans.user
+    def __init__( self, *args, **kwargs ):
+        super( HistoryManager, self ).__init__( *args, **kwargs )
 
     def get( self, trans, unencoded_id, check_ownership=True, check_accessible=True, deleted=None ):
         """
@@ -44,7 +67,7 @@
 
     def secure( self, trans, history, check_ownership=True, check_accessible=True ):
         """
-        checks if (a) user owns item or (b) item is accessible to user.
+        Checks if (a) user owns item or (b) item is accessible to user.
         """
         # all items are accessible to an admin
         if trans.user and trans.user_is_admin():
@@ -56,15 +79,28 @@
         return history
 
     def is_current( self, trans, history ):
+        """
+        True if the given history is the user's current history.
+
+        Returns False if the session has no current history.
+        """
+        if trans.history is None:
+            return False
         return trans.history == history
 
     def is_owner( self, trans, history ):
+        """
+        True if the current user is the owner of the given history.
+        """
         # anon users are only allowed to view their current history
         if not trans.user:
             return self.is_current( trans, history )
         return trans.user == history.user
 
     def check_ownership( self, trans, history ):
+        """
+        Raises error if the current user is not the owner of the history.
+        """
         if trans.user and trans.user_is_admin():
             return history
         if not trans.user and not self.is_current( trans, history ):
@@ -74,6 +110,9 @@
         raise exceptions.ItemOwnershipException( "History is not owned by the current user", type='error' )
 
     def is_accessible( self, trans, history ):
+        """
+        True if the user can access (read) the current history.
+        """
         # admin always have access
         if trans.user and trans.user_is_admin():
             return True
@@ -88,6 +127,103 @@
         return False
 
     def check_accessible( self, trans, history ):
+        """
+        Raises error if the current user can't access the history.
+        """
         if self.is_accessible( trans, history ):
             return history
         raise exceptions.ItemAccessibilityException( "History is not accessible to the current user", type='error' )
+
+    #TODO: bleh...
+    def _get_history_data( self, trans, history ):
+        """
+        Returns a dictionary containing ``history`` and ``contents``, serialized
+        history and an array of serialized history contents respectively.
+        """
+        hda_mgr = galaxy.managers.hdas.HDAManager()
+        collection_dictifier = galaxy.dataset_collections.util.dictify_dataset_collection_instance
+
+        history_dictionary = {}
+        contents_dictionaries = []
+        try:
+            #for content in history.contents_iter( **contents_kwds ):
+            for content in history.contents_iter( types=[ 'dataset', 'dataset_collection' ] ):
+                hda_dict = {}
+
+                if isinstance( content, trans.app.model.HistoryDatasetAssociation ):
+                    try:
+                        hda_dict = hda_mgr.get_hda_dict( trans, content )
+                    except Exception, exc:
+                        # don't fail entire list if hda err's, record and move on
+                        log.exception( 'Error bootstrapping hda: %s', exc )
+                        hda_dict = hda_mgr.get_hda_dict_with_error( trans, content, str( exc ) )
+
+                elif isinstance( content, trans.app.model.HistoryDatasetCollectionAssociation ):
+                    try:
+                        service = trans.app.dataset_collections_service
+                        dataset_collection_instance = service.get_dataset_collection_instance(
+                            trans=trans,
+                            instance_type='history',
+                            id=trans.security.encode_id( content.id ),
+                        )
+                        hda_dict = collection_dictifier( dataset_collection_instance,
+                            security=trans.security, parent=dataset_collection_instance.history, view="element" )
+
+                    except Exception, exc:
+                        log.exception( "Error in history API at listing dataset collection: %s", exc )
+                        #TODO: return some dict with the error
+
+                contents_dictionaries.append( hda_dict )
+
+            # re-use the hdas above to get the history data...
+            history_dictionary = self.get_history_dict( trans, history, contents_dictionaries=contents_dictionaries )
+
+        except Exception, exc:
+            user_id = str( trans.user.id ) if trans.user else '(anonymous)'
+            log.exception( 'Error bootstrapping history for user %s: %s', user_id, str( exc ) )
+            message = ( 'An error occurred getting the history data from the server. '
+                      + 'Please contact a Galaxy administrator if the problem persists.' )
+            history_dictionary[ 'error' ] = message
+
+        return {
+            'history'   : history_dictionary,
+            'contents'  : contents_dictionaries
+        }
+
+    def get_history_dict( self, trans, history, contents_dictionaries=None ):
+        """
+        Returns history data in the form of a dictionary.
+        """
+        #TODO: to serializer
+        history_dict = history.to_dict( view='element', value_mapper={ 'id':trans.security.encode_id })
+        history_dict[ 'user_id' ] = None
+        if history.user_id:
+            history_dict[ 'user_id' ] = trans.security.encode_id( history.user_id )
+
+        history_dict[ 'nice_size' ] = history.get_disk_size( nice_size=True )
+        history_dict[ 'annotation' ] = history.get_item_annotation_str( trans.sa_session, history.user, history )
+        if not history_dict[ 'annotation' ]:
+            history_dict[ 'annotation' ] = ''
+
+        #TODO: item_slug url
+        if history_dict[ 'importable' ] and history_dict[ 'slug' ]:
+            username_and_slug = ( '/' ).join(( 'u', history.user.username, 'h', history_dict[ 'slug' ] ))
+            history_dict[ 'username_and_slug' ] = username_and_slug
+
+#TODO: re-add
+        #hda_summaries = hda_dictionaries if hda_dictionaries else self.get_hda_summary_dicts( trans, history )
+        ##TODO remove the following in v2
+        #( state_counts, state_ids ) = self._get_hda_state_summaries( trans, hda_summaries )
+        #history_dict[ 'state_details' ] = state_counts
+        #history_dict[ 'state_ids' ] = state_ids
+        #history_dict[ 'state' ] = self._get_history_state_from_hdas( trans, history, state_counts )
+
+        return history_dict
+
+
+# =============================================================================
+class HistorySerializer( manager_base.ModelSerializer ):
+    """
+    Interface/service object for serializing histories into dictionaries.
+    """
+    pass

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py
--- a/lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py
+++ b/lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py
@@ -23,6 +23,8 @@
 handler.setFormatter( formatter )
 log.addHandler( handler )
 
+metadata = MetaData()
+context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
 
 class DeferredJob( object ):
     states = Bunch( NEW = 'new',
@@ -37,12 +39,8 @@
         self.params = params
 
 def upgrade(migrate_engine):
-    metadata = MetaData()
     metadata.bind = migrate_engine
 
-    Session = sessionmaker( bind=migrate_engine)
-    context = Session()
-
     DeferredJob.table = Table( "deferred_job", metadata,
         Column( "id", Integer, primary_key=True ),
         Column( "create_time", DateTime, default=now ),
@@ -70,12 +68,8 @@
     context.flush()
 
 def downgrade(migrate_engine):
-    metadata = MetaData()
     metadata.bind = migrate_engine
 
-    Session = sessionmaker( bind=migrate_engine)
-    context = Session()
-
     jobs = context.query( DeferredJob ).filter_by( plugin='GenomeTransferPlugin' ).all()
 
     for job in jobs:

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -3213,7 +3213,9 @@
 
 # Populate tool_type to ToolClass mappings
 tool_types = {}
-for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool, DataManagerTool ]:
+for tool_class in [ Tool, SetMetadataTool, OutputParameterJSONTool,
+                    DataManagerTool, DataSourceTool, AsyncDataSourceTool,
+                    DataDestinationTool ]:
     tool_types[ tool_class.tool_type ] = tool_class
 
 

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/actions/index_genome.py
--- a/lib/galaxy/tools/actions/index_genome.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import tempfile
-from __init__ import ToolAction
-from galaxy.util.odict import odict
-from galaxy.tools.genome_index import *
-
-import logging
-log = logging.getLogger( __name__ )
-
-class GenomeIndexToolAction( ToolAction ):
-    """Tool action used for exporting a history to an archive. """
-
-    def execute( self, tool, trans, *args, **kwargs  ):
-        #
-        # Get genome to index.
-        #
-        incoming = kwargs['incoming']
-        #
-        # Create the job and output dataset objects
-        #
-        job = trans.app.model.Job()
-        job.tool_id = tool.id
-        job.user_id = incoming['user']
-        start_job_state = job.state # should be job.states.NEW
-        job.state = job.states.WAITING # we need to set job state to something other than NEW,
-                                       # or else when tracking jobs in db it will be picked up
-                                       # before we have added input / output parameters
-        trans.sa_session.add( job )
-
-        # Create dataset that will serve as archive.
-        temp_dataset = trans.app.model.Dataset( state=trans.app.model.Dataset.states.NEW )
-        trans.sa_session.add( temp_dataset )
-
-        trans.sa_session.flush() # ensure job.id and archive_dataset.id are available
-        trans.app.object_store.create( temp_dataset ) # set the object store id, create dataset (because galaxy likes having datasets)
-
-        #
-        # Setup job and job wrapper.
-        #
-
-        # Add association for keeping track of index jobs, transfer jobs, and so on.
-        user = trans.sa_session.query( trans.app.model.User ).get( int( incoming['user'] ) )
-        assoc = trans.app.model.GenomeIndexToolData( job=job, dataset=temp_dataset, fasta_path=incoming['path'], \
-                                                        indexer=incoming['indexer'], user=user, \
-                                                        deferred_job=kwargs['deferred'], transfer_job=kwargs['transfer'] )
-        trans.sa_session.add( assoc )
-
-        job_wrapper = GenomeIndexToolWrapper( job )
-        cmd_line = job_wrapper.setup_job( assoc )
-
-        #
-        # Add parameters to job_parameter table.
-        #
-        incoming[ '__GENOME_INDEX_COMMAND__' ] = cmd_line
-        for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
-            job.add_parameter( name, value )
-
-        job.state = start_job_state # job inputs have been configured, restore initial job state
-        job.set_handler(tool.get_job_handler(None))
-        trans.sa_session.flush()
-
-
-        # Queue the job for execution
-        trans.app.job_queue.put( job.id, tool.id )
-        log.info( "Added genome index job to the job queue, id: %s" % str( job.id ) )
-
-        return job, odict()
-

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/deps/containers.py
--- a/lib/galaxy/tools/deps/containers.py
+++ b/lib/galaxy/tools/deps/containers.py
@@ -191,19 +191,26 @@
         # TODO: Remove redundant volumes...
         volumes = docker_util.DockerVolume.volumes_from_str(volumes_raw)
         volumes_from = self.destination_info.get("docker_volumes_from", docker_util.DEFAULT_VOLUMES_FROM)
-        return docker_util.build_docker_run_command(
+
+        docker_host_props = dict(
+            docker_cmd=prop("cmd", docker_util.DEFAULT_DOCKER_COMMAND),
+            sudo=asbool(prop("sudo", docker_util.DEFAULT_SUDO)),
+            sudo_cmd=prop("sudo_cmd", docker_util.DEFAULT_SUDO_COMMAND),
+            host=prop("host", docker_util.DEFAULT_HOST),
+        )
+
+        cache_command = docker_util.build_docker_cache_command(self.container_id, **docker_host_props)
+        run_command = docker_util.build_docker_run_command(
             command,
             self.container_id,
             volumes=volumes,
             volumes_from=volumes_from,
             env_directives=env_directives,
             working_directory=working_directory,
-            docker_cmd=prop("cmd", docker_util.DEFAULT_DOCKER_COMMAND),
-            sudo=asbool(prop("sudo", docker_util.DEFAULT_SUDO)),
-            sudo_cmd=prop("sudo_cmd", docker_util.DEFAULT_SUDO_COMMAND),
-            host=prop("host", docker_util.DEFAULT_HOST),
-            net=prop("net", "none")  # By default, docker instance has networking disabled
+            net=prop("net", "none"),  # By default, docker instance has networking disabled
+            **docker_host_props
         )
+        return "%s\n%s" % (cache_command, run_command)
 
     def __expand_str(self, value):
         if not value:

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/deps/docker_util.py
--- a/lib/galaxy/tools/deps/docker_util.py
+++ b/lib/galaxy/tools/deps/docker_util.py
@@ -50,28 +50,41 @@
         return ":".join([self.from_path, self.to_path, self.how])
 
 
+def build_docker_cache_command(
+    image,
+    docker_cmd=DEFAULT_DOCKER_COMMAND,
+    sudo=DEFAULT_SUDO,
+    sudo_cmd=DEFAULT_SUDO_COMMAND,
+    host=DEFAULT_HOST,
+):
+    inspect_command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host)
+    inspect_command_parts.extend(["inspect", image])
+    inspect_image_command = " ".join(inspect_command_parts)
+
+    pull_command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host)
+    pull_command_parts.extend(["pull", image])
+    pull_image_command = " ".join(pull_command_parts)
+    cache_command = "%s > /dev/null 2>&1\n[ $? -ne 0 ] && %s > /dev/null 2>&1\n" % (inspect_image_command, pull_image_command)
+    return cache_command
+
+
 def build_docker_run_command(
     container_command,
     image,
     tag=None,
-    docker_cmd=DEFAULT_DOCKER_COMMAND,
     volumes=[],
     volumes_from=DEFAULT_VOLUMES_FROM,
     memory=DEFAULT_MEMORY,
     env_directives=[],
     working_directory=DEFAULT_WORKING_DIRECTORY,
+    name=None,
+    net=DEFAULT_NET,
+    docker_cmd=DEFAULT_DOCKER_COMMAND,
     sudo=DEFAULT_SUDO,
     sudo_cmd=DEFAULT_SUDO_COMMAND,
-    name=None,
     host=DEFAULT_HOST,
-    net=DEFAULT_NET,
 ):
-    command_parts = []
-    if sudo:
-        command_parts.append(sudo_cmd)
-    command_parts.append(docker_cmd)
-    if host:
-        command_parts.append(["-H", host])
+    command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host)
     command_parts.append("run")
     for env_directive in env_directives:
         command_parts.extend(["-e", env_directive])
@@ -93,3 +106,15 @@
     command_parts.append(full_image)
     command_parts.append(container_command)
     return " ".join(command_parts)
+
+
+def __docker_prefix(docker_cmd, sudo, sudo_cmd, host):
+    """ Prefix to issue a docker command.
+    """
+    command_parts = []
+    if sudo:
+        command_parts.append(sudo_cmd)
+    command_parts.append(docker_cmd)
+    if host:
+        command_parts.append(["-H", host])
+    return command_parts

diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/genome_index/__init__.py
--- a/lib/galaxy/tools/genome_index/__init__.py
+++ /dev/null
@@ -1,243 +0,0 @@
-from __future__ import with_statement
-
-import json
-import logging
-import os
-import shutil
-import tarfile
-import tempfile
-
-from galaxy import model, util
-from galaxy.web.framework.helpers import to_unicode
-from galaxy.model.item_attrs import UsesAnnotations
-from galaxy.util.json import *
-from galaxy.web.base.controller import UsesHistoryMixin
-from galaxy.tools.data import ToolDataTableManager
-
-
-log = logging.getLogger(__name__)
-
-def load_genome_index_tools( toolbox ):
-    """ Adds tools for indexing genomes via the main job runner. """
-    # Create XML for loading the tool.
-    tool_xml_text = """
-        <tool id="__GENOME_INDEX__" name="Index Genome" version="0.1" tool_type="genome_index">
-          <type class="GenomeIndexTool" module="galaxy.tools"/>
-          <action module="galaxy.tools.actions.index_genome" class="GenomeIndexToolAction"/>
-          <command>$__GENOME_INDEX_COMMAND__ $output_file $output_file.files_path "$__app__.config.rsync_url" "$__app__.config.tool_data_path"</command>
-          <inputs>
-            <param name="__GENOME_INDEX_COMMAND__" type="hidden"/>
-          </inputs>
-          <outputs>
-            <data format="txt" name="output_file"/>
-          </outputs>
-          <stdio>
-            <exit_code range="1:" err_level="fatal" />
-          </stdio>
-        </tool>
-        """
-
-    # Load index tool.
-    tmp_name = tempfile.NamedTemporaryFile()
-    tmp_name.write( tool_xml_text )
-    tmp_name.flush()
-    genome_index_tool = toolbox.load_tool( tmp_name.name )
-    toolbox.tools_by_id[ genome_index_tool.id ] = genome_index_tool
-    log.debug( "Loaded genome index tool: %s", genome_index_tool.id )
-
-class GenomeIndexToolWrapper( object ):
-    """ Provides support for performing jobs that index a genome. """
-    def __init__( self, job_id ):
-        self.locations = dict()
-        self.job_id = job_id
-
-    def setup_job( self, genobj ):
-        """ Perform setup for job to index a genome and return an archive. Method generates
-            attribute files, sets the corresponding attributes in the associated database
-            object, and returns a command line for running the job. The command line
-            includes the command, inputs, and options; it does not include the output
-            file because it must be set at runtime. """
-
-        #
-        # Create and return command line for running tool.
-        #
-        scriptpath = os.path.join( os.path.abspath( os.getcwd() ), "lib/galaxy/tools/genome_index/index_genome.py" )
-        return "python %s %s %s" % ( scriptpath, genobj.indexer, genobj.fasta_path )
-
-    def postprocessing( self, sa_session, app ):
-        """ Finish the job, move the finished indexes to their final resting place,
-            and update the .loc files where applicable. """
-        gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first()
-        indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index',
-                          bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' )
-
-
-        if gitd:
-            fp = open( gitd.dataset.get_file_name(), 'r' )
-            deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first()
-            try:
-                logloc = json.load( fp )
-            except ValueError:
-                deferred.state = app.model.DeferredJob.states.ERROR
-                sa_session.add( deferred )
-                sa_session.flush()
-                log.debug( 'Indexing job failed, setting deferred job state to error.' )
-                return False
-            finally:
-                fp.close()
-            destination = None
-            tdtman = ToolDataTableManager( app.config.tool_data_path )
-            xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path )
-            for node in xmltree:
-                table = node.get('name')
-                location = node.findall('file')[0].get('path')
-                self.locations[table] = os.path.abspath( location )
-            locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] )
-            params = deferred.params
-            dbkey = params[ 'dbkey' ]
-            basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey )
-            intname = params[ 'intname' ]
-            indexer = gitd.indexer
-            workingdir = os.path.abspath( gitd.dataset.extra_files_path )
-            location = []
-            indexdata = gitd.dataset.extra_files_path
-            if indexer == '2bit':
-                indexdata = os.path.join( workingdir, '%s.2bit' % dbkey )
-                destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey )
-                location.append( dict( line='\t'.join( [ 'seq', dbkey, destination ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) )
-            elif indexer == 'bowtie':
-                self._ex_tar( workingdir, 'cs.tar' )
-                destination = os.path.join( basepath, 'bowtie_index' )
-                for var in [ 'nt', 'cs' ]:
-                    for line in logloc[ var ]:
-                        idx = line
-                        if var == 'nt':
-                            locfile = self.locations[ 'bowtie_indexes' ]
-                            locdir = os.path.join( destination, idx )
-                        else:
-                            locfile = self.locations[ 'bowtie_indexes_color' ]
-                            locdir = os.path.join( destination, var, idx )
-                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
-            elif indexer == 'bowtie2':
-                destination = os.path.join( basepath, 'bowtie2_index' )
-                for line in logloc[ 'nt' ]:
-                    idx = line
-                    locfile = self.locations[ 'bowtie2_indexes' ]
-                    locdir = os.path.join( destination, idx )
-                    location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
-            elif indexer == 'bwa':
-                self._ex_tar( workingdir, 'cs.tar' )
-                destination = os.path.join( basepath, 'bwa_index' )
-                for var in [ 'nt', 'cs' ]:
-                    for line in logloc[ var ]:
-                        idx = line
-                        if var == 'nt':
-                            locfile = self.locations[ 'bwa_indexes' ]
-                            locdir = os.path.join( destination, idx )
-                        else:
-                            locfile = self.locations[ 'bwa_indexes_color' ]
-                            locdir = os.path.join( destination, var, idx )
-                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
-            elif indexer == 'perm':
-                self._ex_tar( workingdir, 'cs.tar' )
-                destination = os.path.join( basepath, 'perm_index' )
-                for var in [ 'nt', 'cs' ]:
-                    for line in logloc[ var ]:
-                        idx = line.pop()
-                        if var == 'nt':
-                            locfile = self.locations[ 'perm_base_indexes' ]
-                            locdir = os.path.join( destination, idx )
-                        else:
-                            locfile = self.locations[ 'perm_color_indexes' ]
-                            locdir = os.path.join( destination, var, idx )
-                        line.append( locdir )
-                        location.append( dict( line='\t'.join( line ), file=locfile ) )
-            elif indexer == 'picard':
-                destination = os.path.join( basepath, 'srma_index' )
-                for var in [ 'nt' ]:
-                    for line in logloc[ var ]:
-                        idx = line
-                        locfile = self.locations[ 'picard_indexes' ]
-                        locdir = os.path.join( destination, idx )
-                        location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) )
-            elif indexer == 'sam':
-                destination = os.path.join( basepath, 'sam_index' )
-                for var in [ 'nt' ]:
-                    for line in logloc[ var ]:
-                        locfile = self.locations[ 'sam_fa_indexes' ]
-                        locdir = os.path.join( destination, line )
-                        location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) )
-
-            if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ):
-                log.debug( 'Moving %s to %s' % ( indexdata, destination ) )
-                shutil.move( indexdata, destination )
-                if indexer not in [ '2bit' ]:
-                    genome = '%s.fa' % dbkey
-                    target = os.path.join( destination, genome )
-                    fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) )
-                    self._check_link( fasta, target )
-                    if os.path.exists( os.path.join( destination, 'cs' ) ):
-                        target = os.path.join( destination, 'cs', genome )
-                        fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) )
-                        self._check_link( fasta, target )
-            for line in location:
-                self._add_line( line[ 'file' ], line[ 'line' ] )
-            deferred.state = app.model.DeferredJob.states.OK
-            sa_session.add( deferred )
-            sa_session.flush()
-
-
-    def _check_link( self, targetfile, symlink ):
-        target = os.path.relpath( targetfile, os.path.dirname( symlink ) )
-        filename = os.path.basename( targetfile )
-        if not os.path.exists( targetfile ): # this should never happen.
-            raise Exception, "%s not found. Unable to proceed without a FASTA file. Aborting." % targetfile
-        if os.path.exists( symlink ) and os.path.islink( symlink ):
-            if os.path.realpath( symlink ) == os.path.abspath( targetfile ): # symlink exists, points to the correct FASTA file.
-                return
-            else: # no it doesn't. Make a new one, and this time do it right.
-                os.remove( symlink )
-                os.symlink( target, symlink )
-                return
-        elif not os.path.exists( symlink ): # no symlink to the FASTA file. Create one.
-            os.symlink( target, symlink )
-            return
-        elif os.path.exists( symlink ) and not os.path.islink( symlink ):
-            if self._hash_file( targetfile ) == self._hash_file( symlink ): # files are identical. No need to panic.
-                return
-            else:
-                if os.path.getsize( symlink ) == 0: # somehow an empty file got copied instead of the symlink. Delete with extreme prejudice.
-                    os.remove( symlink )
-                    os.symlink( target, symlink )
-                    return
-                else:
-                    raise Exception, "Regular file %s exists, is not empty, contents do not match %s." % ( symlink, targetfile )
-
-    def _hash_file( self, filename ):
-        import hashlib
-        md5 = hashlib.md5()
-        with open( filename, 'rb' ) as f:
-            for chunk in iter( lambda: f.read( 8192 ), '' ):
-                 md5.update( chunk )
-        return md5.digest()
-
-
-    def _ex_tar( self, directory, filename ):
-        fh = tarfile.open( os.path.join( directory, filename ) )
-        fh.extractall( path=directory )
-        fh.close()
-        os.remove( os.path.join( directory, filename ) )
-
-    def _add_line( self, locfile, newline ):
-        filepath = locfile
-        origlines = []
-        output = []
-        comments = []
-        with open( filepath, 'r' ) as destfile:
-            for line in destfile:
-                origlines.append( line.strip() )
-        if newline not in origlines:
-            origlines.append( newline )
-            with open( filepath, 'w+' ) as destfile:
-                origlines.append( '' )
-                destfile.write( '\n'.join( origlines ) )

This diff is so big that we needed to truncate the remainder.

https://bitbucket.org/galaxy/galaxy-central/commits/4ca1677c8d5b/
Changeset:   4ca1677c8d5b
User:        iracooke
Date:        2014-07-24 09:16:24
Summary:     Added whitelisting to sqlite data provider
Affected #:  1 file

diff -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b -r 4ca1677c8d5bc6486b0b1dff50372a23dfaf307d lib/galaxy/datatypes/dataproviders/dataset.py
--- a/lib/galaxy/datatypes/dataproviders/dataset.py
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -12,6 +12,7 @@
 import column
 import external
 import sqlite3
+import re
 
 from galaxy import eggs
 eggs.require( 'bx-python' )
@@ -720,8 +721,18 @@
         self.connection.row_factory = sqlite3.Row
         super( SQliteDataProvider, self ).__init__( source, **kwargs )
 
+    def query_matches_whitelist(self,query):
+        if re.match("select ",query,re.IGNORECASE):
+            if re.search("^([^\"]|\"[^\"]*\")*?;",query) or re.search("^([^\']|\'[^\']*\')*?;",query):
+                return False
+            else:
+                return True
+        return False
+
+
+
     def __iter__( self ):
-        if self.query is not None:
+        if (self.query is not None) and self.query_matches_whitelist(self.query):
             for row in self.connection.cursor().execute(self.query):
                 yield row
         else:


https://bitbucket.org/galaxy/galaxy-central/commits/56a7b27577de/
Changeset:   56a7b27577de
User:        dannon
Date:        2014-07-25 00:29:57
Summary:     Merged in iracooke/galaxy-central (pull request #434)

Add sqlite datatype and corresponding dataprovider
Affected #:  3 files

diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 datatypes_conf.xml.sample
--- a/datatypes_conf.xml.sample
+++ b/datatypes_conf.xml.sample
@@ -177,6 +177,7 @@
     <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/><datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" description="Any data in tab delimited format (tabular)." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Tabular_.28tab_delimited.29"/><datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/>
+    <datatype extension="sqlite" type="galaxy.datatypes.binary:SQlite" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true" description="Any text file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Plain_text"/><datatype extension="linecount" type="galaxy.datatypes.data:LineCount" display_in_upload="false"/><datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/>
@@ -262,6 +263,7 @@
     --><sniffer type="galaxy.datatypes.tabular:Vcf"/><sniffer type="galaxy.datatypes.binary:TwoBit"/>
+    <sniffer type="galaxy.datatypes.binary:SQlite"/><sniffer type="galaxy.datatypes.binary:Bam"/><sniffer type="galaxy.datatypes.binary:Sff"/><sniffer type="galaxy.datatypes.xml:Phyloxml"/>

diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py
+++ b/lib/galaxy/datatypes/binary.py
@@ -12,6 +12,7 @@
 import subprocess
 import tempfile
 import zipfile
+import sqlite3
 
 from urllib import urlencode, quote_plus
 from galaxy import eggs
@@ -545,3 +546,45 @@
             return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
 
 Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit)
+
+
+@dataproviders.decorators.has_dataproviders
+class SQlite ( Binary ):
+    file_ext = "sqlite"
+
+    # Connects and runs a query that should work on any real database 
+    # If the file is not sqlite, an exception will be thrown and the sniffer will return false
+    def sniff( self, filename ):
+        try:
+            conn = sqlite3.connect(filename)
+            schema_version=conn.cursor().execute("pragma schema_version").fetchone()
+            conn.close()
+            if schema_version is not None:
+                return True
+            return False
+        except:
+            return False
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "SQLite Database"
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "SQLite Database (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+
+    @dataproviders.decorators.dataprovider_factory( 'sqlite', dataproviders.dataset.SQliteDataProvider.settings )
+    def sqlite_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.dataset.SQliteDataProvider( dataset_source, **settings )
+
+
+Binary.register_sniffable_binary_format("sqlite","sqlite",SQlite)
+

diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 lib/galaxy/datatypes/dataproviders/dataset.py
--- a/lib/galaxy/datatypes/dataproviders/dataset.py
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -11,6 +11,8 @@
 import line
 import column
 import external
+import sqlite3
+import re
 
 from galaxy import eggs
 eggs.require( 'bx-python' )
@@ -700,3 +702,39 @@
         #TODO: as samtools - need more info on output format
         raise NotImplementedError()
         super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs )
+
+
+
+class SQliteDataProvider ( base.DataProvider ):
+    """
+    Data provider that uses a sqlite database file as its source.
+
+    Allows any query to be run and returns the resulting rows as sqlite3 row objects
+    """
+    settings = {
+        'query' : 'str'
+    }
+
+    def __init__( self, source, query=None, **kwargs ):
+        self.query=query
+        self.connection =  sqlite3.connect(source.dataset.file_name);
+        self.connection.row_factory = sqlite3.Row
+        super( SQliteDataProvider, self ).__init__( source, **kwargs )
+
+    def query_matches_whitelist(self,query):
+        if re.match("select ",query,re.IGNORECASE):
+            if re.search("^([^\"]|\"[^\"]*\")*?;",query) or re.search("^([^\']|\'[^\']*\')*?;",query):
+                return False
+            else:
+                return True
+        return False
+
+
+
+    def __iter__( self ):
+        if (self.query is not None) and self.query_matches_whitelist(self.query):
+            for row in self.connection.cursor().execute(self.query):
+                yield row
+        else:
+            yield
+

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: 4 new changesets

commits-noreply＠bitbucket.org