1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/67b5abaaf8bc/ changeset: 67b5abaaf8bc user: inithello date: 2012-05-10 18:55:29 summary: Add admin tool to download and index a reference genome. affected #: 19 files diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -12,6 +12,7 @@ import galaxy.quota from galaxy.tags.tag_handler import GalaxyTagHandler from galaxy.tools.imp_exp import load_history_imp_exp_tools +from galaxy.tools.genome_index import load_genome_index_tools from galaxy.sample_tracking import external_service_types from galaxy.openid.providers import OpenIDProviders @@ -90,6 +91,8 @@ self.datatypes_registry.load_external_metadata_tool( self.toolbox ) # Load history import/export tools. load_history_imp_exp_tools( self.toolbox ) + # Load genome indexer tool. + load_genome_index_tools( self.toolbox ) # Load security policy. self.security_agent = self.model.security_agent self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions ) diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -42,6 +42,7 @@ tempfile.tempdir = self.new_file_path self.openid_consumer_cache_path = resolve_path( kwargs.get( "openid_consumer_cache_path", "database/openid_consumer_cache" ), self.root ) self.cookie_path = kwargs.get( "cookie_path", "/" ) + self.genome_data_path = kwargs.get( "genome_data_path", "tool-data/genome" ) # Galaxy OpenID settings self.enable_openid = string_as_bool( kwargs.get( 'enable_openid', False ) ) self.openid_config = kwargs.get( 'openid_config_file', 'openid_conf.xml' ) diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -144,9 +144,12 @@ self.file_name = dataset.file_name self.metadata = dict() self.children = [] - jeha = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() - if jeha: - out_data[ "output_file" ] = FakeDatasetAssociation( dataset=jeha.dataset ) + special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() + if not special: + special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() + if special: + out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) + # These can be passed on the command line if wanted as $userId $userEmail if job.history and job.history.user: # check for anonymous user! userId = '%d' % job.history.user.id @@ -442,6 +445,9 @@ param_dict = self.tool.params_from_strings( param_dict, self.app ) # Check for and move associated_files self.tool.collect_associated_files(out_data, self.working_directory) + gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() + if gitd: + self.tool.collect_associated_files({'' : gitd}, self.working_directory) # Create generated output children and primary datasets and add to param_dict collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)} param_dict.update({'__collected_datasets__':collected_datasets}) @@ -480,6 +486,7 @@ self.external_output_metadata.cleanup_external_metadata( self.sa_session ) galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session ) galaxy.tools.imp_exp.JobImportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session ) + galaxy.tools.genome_index.GenomeIndexToolWrapper( self.job_id ).postprocessing( self.sa_session, self.app ) self.app.object_store.delete(self.get_job(), base_dir='job_work', entire_dir=True, dir_only=True, extra_dir=str(self.job_id)) except: log.exception( "Unable to cleanup job %d" % self.job_id ) @@ -535,9 +542,11 @@ else: return self.false_path job = self.get_job() - # Job output datasets are combination of output datasets, library datasets, and jeha datasets. - jeha = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() - jeha_false_path = None + # Job output datasets are combination of history, library, jeha and gitd datasets. + special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() + if not special: + special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() + false_path = None if self.app.config.outputs_to_working_directory: self.output_paths = [] self.output_hdas_and_paths = {} @@ -546,14 +555,14 @@ dsp = DatasetPath( hda.dataset.id, hda.dataset.file_name, false_path ) self.output_paths.append( dsp ) self.output_hdas_and_paths[name] = hda, dsp - if jeha: - jeha_false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % jeha.dataset.id ) ) + if special: + false_path = os.path.abspath( os.path.join( self.working_directory, "galaxy_dataset_%d.dat" % special.dataset.id ) ) else: results = [ ( da.name, da.dataset, DatasetPath( da.dataset.dataset.id, da.dataset.file_name ) ) for da in job.output_datasets + job.output_library_datasets ] self.output_paths = [t[2] for t in results] self.output_hdas_and_paths = dict([(t[0], t[1:]) for t in results]) - if jeha: - dsp = DatasetPath( jeha.dataset.id, jeha.dataset.file_name, jeha_false_path ) + if special: + dsp = DatasetPath( special.dataset.id, special.dataset.file_name, false_path ) self.output_paths.append( dsp ) return self.output_paths diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/jobs/deferred/genome_transfer.py --- /dev/null +++ b/lib/galaxy/jobs/deferred/genome_transfer.py @@ -0,0 +1,250 @@ +""" +Module for managing genome transfer jobs. +""" +import logging, shutil, gzip, bz2, zipfile, tempfile, tarfile, sys + +from galaxy import eggs +from sqlalchemy import and_ + +from galaxy.util.odict import odict +from galaxy.workflow.modules import module_factory +from galaxy.jobs.actions.post import ActionBox + +from galaxy.tools.parameters import visit_input_values +from galaxy.tools.parameters.basic import DataToolParameter +from galaxy.tools.data import ToolDataTableManager + +from galaxy.datatypes.checkers import * +from galaxy.datatypes.sequence import Fasta +from data_transfer import * + +log = logging.getLogger( __name__ ) + +__all__ = [ 'GenomeTransferPlugin' ] + +class GenomeTransferPlugin( DataTransfer ): + + locations = {} + + def __init__( self, app ): + super( GenomeTransferPlugin, self ).__init__( app ) + self.app = app + self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__'] + self.sa_session = app.model.context.current + tdtman = ToolDataTableManager() + xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path) + for node in xmltree: + table = node.get('name') + location = node.findall('file')[0].get('path') + self.locations[table] = location + + def create_job( self, trans, url, dbkey, intname, indexes ): + job = trans.app.transfer_manager.new( protocol='http', url=url ) + params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http', type='init_transfer', url=url, dbkey=dbkey, indexes=indexes, intname=intname, liftover=None ) + deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'GenomeTransferPlugin', params = params ) + self.sa_session.add( deferred ) + self.sa_session.flush() + return deferred.id + + def check_job( self, job ): + if job.params['type'] == 'init_transfer': + if not hasattr(job, 'transfer_job'): + job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) + else: + self.sa_session.refresh( job.transfer_job ) + if job.transfer_job.state == 'done': + transfer = job.transfer_job + transfer.state = 'downloaded' + job.params['type'] = 'extract_transfer' + self.sa_session.add( job ) + self.sa_session.add( transfer ) + self.sa_session.flush() + return self.job_states.READY + elif job.transfer_job.state == 'running': + return self.job_states.WAIT + elif job.transfer_job.state == 'new': + assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ] + self.app.transfer_manager.run( job.transfer_job ) + self.sa_session.add( job.transfer_job ) + self.sa_session.flush() + return self.job_states.WAIT + else: + log.error( "An error occurred while downloading from %s" % job.params[ 'url' ] ) + return self.job_states.INVALID + elif job.params[ 'type' ] == 'extract_transfer': + return self.job_states.READY + + def get_job_status( self, jobid ): + job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) ) + if not hasattr( job, 'transfer_job' ): + job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) + else: + self.sa_session.refresh( job.transfer_job ) + return job + + def run_job( self, job ): + params = job.params + dbkey = params[ 'dbkey' ] + if not hasattr( job, 'transfer_job' ): + job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) + else: + self.sa_session.refresh( job.transfer_job ) + transfer = job.transfer_job + if params[ 'type' ] == 'extract_transfer': + CHUNK_SIZE = 2**20 + destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), job.params[ 'dbkey' ], 'seq' ) + destfile = '%s.fa' % job.params[ 'dbkey' ] + destfilepath = os.path.join( destpath, destfile ) + tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] ) + tmppath = os.path.dirname( os.path.abspath( transfer.path ) ) + if not os.path.exists( destpath ): + os.makedirs( destpath ) + protocol = job.params[ 'protocol' ] + data_type = self._check_compress( transfer.path ) + if data_type is None: + sniffer = Fasta() + if sniffer.sniff( transfer.path ): + data_type = 'fasta' + fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False ) + if data_type in [ 'tar.gzip', 'tar.bzip' ]: + fp = open( transfer.path, 'r' ) + tar = tarfile.open( mode = 'r:*', bufsize = CHUNK_SIZE, fileobj = fp ) + files = tar.getmembers() + for filename in files: + z = tar.extractfile(filename) + try: + chunk = z.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + log.error( 'Problem decompressing compressed data' ) + exit() + if not chunk: + break + os.write( fd, chunk ) + os.write( fd, '\n' ) + os.close( fd ) + tar.close() + fp.close() + elif data_type == 'gzip': + compressed = gzip.open( transfer.path, mode = 'rb' ) + while 1: + try: + chunk = compressed.read( CHUNK_SIZE ) + except IOError: + compressed.close() + log.error( 'Problem decompressing compressed data' ) + exit() + if not chunk: + break + os.write( fd, chunk ) + os.write( fd, '\n' ) + os.close( fd ) + compressed.close() + elif data_type == 'bzip': + compressed = bz2.BZ2File( transfer.path, mode = 'r' ) + while 1: + try: + chunk = compressed.read( CHUNK_SIZE ) + except IOError: + compressed.close() + log.error( 'Problem decompressing compressed data' ) + exit() + if not chunk: + break + os.write( fd, chunk ) + os.write( fd, '\n' ) + os.close( fd ) + compressed.close() + elif data_type == 'zip': + uncompressed_name = None + unzipped = False + z = zipfile.ZipFile( transfer.path ) + z.debug = 3 + for name in z.namelist(): + if name.endswith('/'): + continue + if sys.version_info[:2] >= ( 2, 6 ): + zipped_file = z.open( name ) + while 1: + try: + chunk = zipped_file.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + log.error( 'Problem decompressing zipped data' ) + return self.app.model.DeferredJob.states.INVALID + if not chunk: + break + os.write( fd, chunk ) + os.write( fd, '\n' ) + zipped_file.close() + else: + try: + outfile = open( fd, 'wb' ) + outfile.write( z.read( name ) ) + outfile.close() + except IOError: + os.close( fd ) + log.error( 'Problem decompressing zipped data' ) + return + os.close( fd ) + z.close() + elif data_type == 'fasta': + uncompressed = transfer.path + else: + job.state = self.app.model.DeferredJob.states.INVALID + log.error( "Unrecognized compression format for file %s." % transfer.path ) + self.sa_session.add( job ) + self.sa_session.flush() + return + shutil.move( uncompressed, destfilepath ) + if os.path.exists( transfer.path ): + os.remove( transfer.path ) + os.chmod( destfilepath, 0644 ) + fastaline = '\t'.join( [ dbkey, dbkey, params[ 'intname' ], os.path.abspath( destfilepath ) ] ) + self._add_line( 'all_fasta', fastaline ) + job.state = self.app.model.DeferredJob.states.OK + job.params[ 'type' ] = 'finish_transfer' + transfer.path = os.path.abspath(destfilepath) + transfer.state = 'done' + self.sa_session.add( job ) + self.sa_session.add( transfer ) + self.sa_session.flush() + if transfer.state == 'done' and params[ 'indexes' ] is not None: + for indexer in params[ 'indexes' ]: + incoming = dict(indexer=indexer, dbkey=params[ 'dbkey' ], intname=params[ 'intname' ], path=transfer.path, user=params['user'] ) + deferred = self.tool.execute( self, set_output_hid=False, history=None, incoming=incoming, transfer=transfer, deferred=job ) + return self.app.model.DeferredJob.states.OK + + def _check_compress( self, filepath ): + retval = '' + if tarfile.is_tarfile( filepath ): + retval = 'tar.' + if check_zip( filepath ): + return 'zip' + is_bzipped, is_valid = check_bz2( filepath ) + if is_bzipped and is_valid: + return retval + 'bzip' + is_gzipped, is_valid = check_gzip( filepath ) + if is_gzipped and is_valid: + return retval + 'gzip' + return None + + def _add_line( self, locfile, newline ): + filepath = self.locations[ locfile ] + origlines = [] + output = [] + comments = [] + with open( filepath, 'r' ) as destfile: + for line in destfile: + if line.startswith( '#' ): + comments.append( line.strip() ) + else: + origlines.append( line.strip() ) + if newline not in origlines: + origlines.append( newline ) + output.extend( comments ) + origlines.sort() + output.extend( origlines ) + with open( filepath, 'w+' ) as destfile: + destfile.write( '\n'.join( output ) ) + diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/jobs/deferred/liftover_transfer.py --- /dev/null +++ b/lib/galaxy/jobs/deferred/liftover_transfer.py @@ -0,0 +1,134 @@ +""" +Module for managing genome transfer jobs. +""" +import logging, shutil, gzip, tempfile, sys + +from galaxy import eggs +from sqlalchemy import and_ + +from galaxy.util.odict import odict +from galaxy.workflow.modules import module_factory +from galaxy.jobs.actions.post import ActionBox + +from galaxy.tools.parameters import visit_input_values +from galaxy.tools.parameters.basic import DataToolParameter +from galaxy.tools.data import ToolDataTableManager + +from galaxy.datatypes.checkers import * + +from data_transfer import * + +log = logging.getLogger( __name__ ) + +__all__ = [ 'LiftOverTransferPlugin' ] + +class LiftOverTransferPlugin( DataTransfer ): + + locations = {} + + def __init__( self, app ): + super( LiftOverTransferPlugin, self ).__init__( app ) + self.app = app + self.sa_session = app.model.context.current + + def create_job( self, trans, url, dbkey, from_genome, to_genome, destfile ): + job = trans.app.transfer_manager.new( protocol='http', url=url ) + params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http', + type='init_transfer', dbkey=dbkey, from_genome=from_genome, + to_genome=to_genome, destfile=destfile ) + deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'LiftOverTransferPlugin', params = params ) + self.sa_session.add( deferred ) + self.sa_session.flush() + return deferred.id + + def check_job( self, job ): + if job.params['type'] == 'init_transfer': + if not hasattr(job, 'transfer_job'): + job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) + else: + self.sa_session.refresh( job.transfer_job ) + if job.transfer_job.state == 'done': + transfer = job.transfer_job + transfer.state = 'downloaded' + job.params['type'] = 'extract_transfer' + self.sa_session.add( job ) + self.sa_session.add( transfer ) + self.sa_session.flush() + return self.job_states.READY + elif job.transfer_job.state == 'running': + return self.job_states.WAIT + elif job.transfer_job.state == 'new': + assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ] + self.app.transfer_manager.run( job.transfer_job ) + self.sa_session.add( job.transfer_job ) + self.sa_session.flush() + return self.job_states.WAIT + else: + log.error( "An error occurred while downloading from %s" % job.params[ 'url' ] ) + return self.job_states.INVALID + elif job.params[ 'type' ] == 'extract_transfer': + return self.job_states.READY + + def get_job_status( self, jobid ): + job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) ) + return job + + def run_job( self, job ): + params = job.params + dbkey = params[ 'dbkey' ] + source = params[ 'from_genome' ] + target = params[ 'to_genome' ] + if not hasattr( job, 'transfer_job' ): + job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) + else: + self.sa_session.refresh( job.transfer_job ) + transfer = job.transfer_job + if params[ 'type' ] == 'extract_transfer': + CHUNK_SIZE = 2**20 + destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), job.params[ 'dbkey' ], 'liftOver' ) + destfile = job.params[ 'destfile' ] + destfilepath = os.path.join( destpath, destfile ) + tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] ) + tmppath = os.path.dirname( os.path.abspath( transfer.path ) ) + if not os.path.exists( destpath ): + os.makedirs( destpath ) + fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False ) + chain = gzip.open( transfer.path, 'rb' ) + while 1: + try: + chunk = chain.read( CHUNK_SIZE ) + except IOError: + os.close( fd ) + log.error( 'Problem decompressing compressed data' ) + exit() + if not chunk: + break + os.write( fd, chunk ) + os.close( fd ) + chain.close() + # Replace the gzipped file with the decompressed file if it's safe to do so + shutil.move( uncompressed, destfilepath ) + os.remove( transfer.path ) + os.chmod( destfilepath, 0644 ) + locline = '\t'.join( [ source, target, os.path.abspath( destfilepath ) ] ) + self._add_line( locline ) + job.state = self.app.model.DeferredJob.states.OK + job.params[ 'type' ] = 'finish_transfer' + transfer.path = os.path.abspath(destfilepath) + transfer.state = 'done' + self.sa_session.add( job ) + self.sa_session.add( transfer ) + self.sa_session.flush() + return self.app.model.DeferredJob.states.OK + + def _add_line( self, newline ): + filepath = 'tool-data/liftOver.loc' + origlines = [] + with open( filepath, 'r' ) as destfile: + for line in destfile: + origlines.append( line.strip() ) + if newline not in origlines: + origlines.append( newline ) + with open( filepath, 'w+' ) as destfile: + destfile.write( '\n'.join( origlines ) ) + diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -301,6 +301,20 @@ self.history = history self.archive_dir=archive_dir +class GenomeIndexToolData( object ): + def __init__( self, job=None, params=None, dataset=None, deferred_job=None, \ + transfer_job=None, fasta_path=None, created_time=None, modified_time=None, \ + dbkey=None, user=None, indexer=None ): + self.job = job + self.dataset = dataset + self.fasta_path = fasta_path + self.user = user + self.indexer = indexer + self.created_time = created_time + self.modified_time = modified_time + self.deferred = deferred_job + self.transfer = transfer_job + class DeferredJob( object ): states = Bunch( NEW = 'new', WAITING = 'waiting', diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py +++ b/lib/galaxy/model/mapping.py @@ -492,6 +492,19 @@ Column( "archive_dir", TEXT ) ) +GenomeIndexToolData.table = Table( "genome_index_tool_data", metadata, + Column( "id", Integer, primary_key=True ), + Column( "job_id", Integer, ForeignKey( "job.id" ), index=True ), + Column( "deferred_job_id", Integer, ForeignKey( "deferred_job.id" ), index=True ), + Column( "transfer_job_id", Integer, ForeignKey( "transfer_job.id" ), index=True ), + Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), + Column( "fasta_path", String( 255 ) ), + Column( "created_time", DateTime, default=now ), + Column( "modified_time", DateTime, default=now, onupdate=now ), + Column( "indexer", String( 64 ) ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), + ) + Task.table = Table( "task", metadata, Column( "id", Integer, primary_key=True ), Column( "create_time", DateTime, default=now ), @@ -1512,6 +1525,13 @@ assign_mapper( context, JobImportHistoryArchive, JobImportHistoryArchive.table, properties=dict( job = relation( Job ), history = relation( History ) ) ) +assign_mapper( context, GenomeIndexToolData, GenomeIndexToolData.table, + properties=dict( job = relation( Job ), + dataset = relation( Dataset ), + user = relation( User ), + deferred = relation( DeferredJob, backref='deferred_job' ), + transfer = relation( TransferJob, backref='transfer_job' ) ) ) + assign_mapper( context, PostJobAction, PostJobAction.table, properties=dict(workflow_step = relation( WorkflowStep, backref='post_job_actions', primaryjoin=(WorkflowStep.table.c.id == PostJobAction.table.c.workflow_step_id)))) diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/model/migrate/versions/0098_genome_index_tool_data_table.py --- /dev/null +++ b/lib/galaxy/model/migrate/versions/0098_genome_index_tool_data_table.py @@ -0,0 +1,53 @@ +""" +Migration script to create the genome_index_tool_data table. +""" + +from sqlalchemy import * +from migrate import * + +import datetime +now = datetime.datetime.utcnow + +# Need our custom types, but don't import anything else from model +from galaxy.model.custom_types import * + +import sys, logging +log = logging.getLogger( __name__ ) +log.setLevel(logging.DEBUG) +handler = logging.StreamHandler( sys.stdout ) +format = "%(name)s %(levelname)s %(asctime)s %(message)s" +formatter = logging.Formatter( format ) +handler.setFormatter( formatter ) +log.addHandler( handler ) + +metadata = MetaData( migrate_engine ) + +# New table in changeset TODO:TODO +GenomeIndexToolData_table = Table( "genome_index_tool_data", metadata, + Column( "id", Integer, primary_key=True ), + Column( "job_id", Integer, ForeignKey( "job.id" ), index=True ), + Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), + Column( "deferred_job_id", Integer, ForeignKey( "deferred_job.id" ), index=True ), + Column( "transfer_job_id", Integer, ForeignKey( "transfer_job.id" ), index=True ), + Column( "fasta_path", String( 255 ) ), + Column( "created_time", DateTime, default=now ), + Column( "modified_time", DateTime, default=now, onupdate=now ), + Column( "indexer", String( 64 ) ), + Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), + ) + +def upgrade(): + print __doc__ + + metadata.reflect() + try: + GenomeIndexToolData_table.create() + except Exception, e: + log.debug( "Creating genome_index_tool_data table failed: %s" % str( e ) ) + +def downgrade(): + metadata.reflect() + try: + GenomeIndexToolData_table.drop() + except Exception, e: + log.debug( "Dropping genome_index_tool_data table failed: %s" % str( e ) ) diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2556,6 +2556,9 @@ class ImportHistoryTool( Tool ): tool_type = 'import_history' +class GenomeIndexTool( Tool ): + tool_type = 'index_genome' + # Populate tool_type to ToolClass mappings tool_types = {} for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool ]: diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/tools/actions/index_genome.py --- /dev/null +++ b/lib/galaxy/tools/actions/index_genome.py @@ -0,0 +1,65 @@ +import tempfile +from __init__ import ToolAction +from galaxy.util.odict import odict +from galaxy.tools.genome_index import * + +import logging +log = logging.getLogger( __name__ ) + +class GenomeIndexToolAction( ToolAction ): + """Tool action used for exporting a history to an archive. """ + + def execute( self, tool, trans, *args, **kwargs ): + # + # Get genome to index. + # + incoming = kwargs['incoming'] + # + # Create the job and output dataset objects + # + job = trans.app.model.Job() + job.tool_id = tool.id + job.user_id = incoming['user'] + start_job_state = job.state # should be job.states.NEW + job.state = job.states.WAITING # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters + trans.sa_session.add( job ) + + # Create dataset that will serve as archive. + temp_dataset = trans.app.model.Dataset( state=trans.app.model.Dataset.states.NEW ) + trans.sa_session.add( temp_dataset ) + + trans.sa_session.flush() # ensure job.id and archive_dataset.id are available + trans.app.object_store.create( temp_dataset ) # set the object store id, create dataset (because galaxy likes having datasets) + + # + # Setup job and job wrapper. + # + + # Add association for keeping track of job, history, archive relationship. + user = trans.sa_session.query( trans.app.model.User ).get( int( incoming['user'] ) ) + assoc = trans.app.model.GenomeIndexToolData( job=job, dataset=temp_dataset, fasta_path=incoming['path'], \ + indexer=incoming['indexer'], user=user, \ + deferred_job=kwargs['deferred'], transfer_job=kwargs['transfer'] ) + trans.sa_session.add( assoc ) + + job_wrapper = GenomeIndexToolWrapper( job ) + cmd_line = job_wrapper.setup_job( assoc ) + # + # Add parameters to job_parameter table. + # + + # Set additional parameters. + incoming[ '__GENOME_INDEX_COMMAND__' ] = cmd_line + for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): + job.add_parameter( name, value ) + + job.state = start_job_state # job inputs have been configured, restore initial job state + trans.sa_session.flush() + + + # Queue the job for execution + trans.app.job_queue.put( job.id, tool ) + log.info( "Added genome index job to the job queue, id: %s" % str( job.id ) ) + + return job, odict() + diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/tools/genome_index/__init__.py --- /dev/null +++ b/lib/galaxy/tools/genome_index/__init__.py @@ -0,0 +1,184 @@ +import os, shutil, logging, tempfile, json, tarfile +from galaxy import model, util +from galaxy.web.framework.helpers import to_unicode +from galaxy.model.item_attrs import UsesAnnotations +from galaxy.util.json import * +from galaxy.web.base.controller import UsesHistory +from galaxy.tools.data import ToolDataTableManager + +log = logging.getLogger(__name__) + +def load_genome_index_tools( toolbox ): + """ Adds tools for indexing genomes via the main job runner. """ + # Use same process as that used in load_external_metadata_tool; see that + # method for why create tool description files on the fly. + tool_xml_text = """ + <tool id="__GENOME_INDEX__" name="Index Genome" version="0.1" tool_type="genome_index"> + <type class="GenomeIndexTool" module="galaxy.tools"/> + <action module="galaxy.tools.actions.index_genome" class="GenomeIndexToolAction"/> + <command>$__GENOME_INDEX_COMMAND__ $output_file $output_file.files_path</command> + <inputs> + <param name="__GENOME_INDEX_COMMAND__" type="hidden"/> + </inputs> + <outputs> + <data format="txt" name="output_file"/> + </outputs> + </tool> + """ + + # Load export tool. + tmp_name = tempfile.NamedTemporaryFile() + tmp_name.write( tool_xml_text ) + tmp_name.flush() + genome_index_tool = toolbox.load_tool( tmp_name.name ) + toolbox.tools_by_id[ genome_index_tool.id ] = genome_index_tool + log.debug( "Loaded genome index tool: %s", genome_index_tool.id ) + +class GenomeIndexToolWrapper( object ): + """ Provides support for performing jobs that index a genome. """ + def __init__( self, job_id ): + self.locations = dict() + self.job_id = job_id + + def setup_job( self, genobj ): + """ Perform setup for job to index a genome and return an archive. Method generates + attribute files, sets the corresponding attributes in the associated database + object, and returns a command line for running the job. The command line + includes the command, inputs, and options; it does not include the output + file because it must be set at runtime. """ + + # + # Create and return command line for running tool. + # + scriptpath = os.path.join( os.path.abspath( os.getcwd() ), "lib/galaxy/tools/genome_index/index_genome.py" ) + return "python %s %s %s" % ( scriptpath, genobj.indexer, genobj.fasta_path ) + + def postprocessing( self, sa_session, app ): + """ Finish the job, move the finished indexes to their final resting place, + and update the .loc files where applicable. """ + gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first() + indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', + bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' ) + + + if gitd: + destination = None + tdtman = ToolDataTableManager() + xmltree = tdtman.load_from_config_file(app.config.tool_data_table_config_path) + for node in xmltree: + table = node.get('name') + location = node.findall('file')[0].get('path') + self.locations[table] = os.path.abspath( location ) + locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] ) + deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first() + params = deferred.params + dbkey = params[ 'dbkey' ] + basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey ) + intname = params[ 'intname' ] + indexer = gitd.indexer + workingdir = os.path.abspath( gitd.dataset.extra_files_path ) + fp = open( gitd.dataset.get_file_name(), 'r' ) + logloc = json.load( fp ) + fp.close() + location = [] + indexdata = gitd.dataset.extra_files_path + if indexer == '2bit': + indexdata = os.path.join( workingdir, '%s.2bit' % dbkey ) + destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey ) + location.append( dict( line='\t'.join( [ 'seq', dbkey, os.path.join( destination, '%s.2bit' % dbkey ) ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) ) + elif indexer == 'bowtie': + self._ex_tar( workingdir, 'cs.tar' ) + destination = os.path.join( basepath, 'bowtie_index' ) + for var in [ 'nt', 'cs' ]: + for line in logloc[ var ]: + idx = line + if var == 'nt': + locfile = self.locations[ 'bowtie_indexes' ] + locdir = os.path.join( destination, idx ) + else: + locfile = self.locations[ 'bowtie_indexes_color' ] + locdir = os.path.join( destination, var, idx ) + location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) + elif indexer == 'bowtie2': + destination = os.path.join( basepath, 'bowtie2_index' ) + for line in logloc[ 'nt' ]: + idx = line + locfile = self.locations[ 'bowtie2_indexes' ] + locdir = os.path.join( destination, idx ) + location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) + elif indexer == 'bwa': + self._ex_tar( workingdir, 'cs.tar' ) + destination = os.path.join( basepath, 'bwa_index' ) + for var in [ 'nt', 'cs' ]: + for line in logloc[ var ]: + idx = line + if var == 'nt': + locfile = self.locations[ 'bwa_indexes' ] + locdir = os.path.join( destination, idx ) + else: + locfile = self.locations[ 'bwa_indexes_color' ] + locdir = os.path.join( destination, var, idx ) + location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) + elif indexer == 'perm': + self._ex_tar( workingdir, 'cs.tar' ) + destination = os.path.join( basepath, 'perm_index' ) + for var in [ 'nt', 'cs' ]: + for line in logloc[ var ]: + idx = line.pop() + if var == 'nt': + locfile = self.locations[ 'perm_base_indexes' ] + locdir = os.path.join( destination, idx ) + else: + locfile = self.locations[ 'perm_color_indexes' ] + locdir = os.path.join( destination, var, idx ) + line.append( locdir ) + location.append( dict( line='\t'.join( line ), file=locfile ) ) + elif indexer == 'picard': + destination = os.path.join( basepath, 'srma_index' ) + for var in [ 'nt' ]: + for line in logloc[ var ]: + idx = line + locfile = self.locations[ 'picard_indexes' ] + locdir = os.path.join( destination, idx ) + location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) + elif indexer == 'sam': + destination = os.path.join( basepath, 'sam_index' ) + for var in [ 'nt' ]: + for line in logloc[ var ]: + locfile = self.locations[ 'sam_fa_indexes' ] + locdir = os.path.join( destination, line ) + location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) ) + + if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ): + log.debug( 'Moving %s to %s' % ( indexdata, destination ) ) + shutil.move( indexdata, destination ) + if indexer not in [ '2bit' ]: + genome = '%s.fa' + target = os.path.join( destination, genome ) + farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), destination ) + os.symlink( farel, target ) + if os.path.exists( os.path.join( destination, 'cs' ) ): + target = os.path.join( destination, 'cs', genome ) + farel = os.path.relpath( os.path.join( basepath, 'seq', genome ), os.path.join( destination, 'cs' ) ) + os.symlink( os.path.join( farel, target ) ) + for line in location: + self._add_line( line[ 'file' ], line[ 'line' ] ) + + def _ex_tar( self, directory, filename ): + fh = tarfile.open( os.path.join( directory, filename ) ) + fh.extractall( path=directory ) + fh.close() + os.remove( os.path.join( directory, filename ) ) + + def _add_line( self, locfile, newline ): + filepath = locfile + origlines = [] + output = [] + comments = [] + with open( filepath, 'r' ) as destfile: + for line in destfile: + origlines.append( line.strip() ) + if newline not in origlines: + origlines.append( newline ) + with open( filepath, 'w+' ) as destfile: + destfile.write( '\n'.join( origlines ) ) diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/tools/genome_index/index_genome.py --- /dev/null +++ b/lib/galaxy/tools/genome_index/index_genome.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python +""" +Export a history to an archive file using attribute files. + +usage: %prog history_attrs dataset_attrs job_attrs out_file + -G, --gzip: gzip archive file +""" +import optparse, sys, os, tempfile, time, subprocess, shlex, json, tarfile, shutil + +class ManagedIndexer(): + def __init__( self, output_file, infile, workingdir ): + self.workingdir = os.path.abspath( workingdir ) + self.outfile = open( os.path.abspath( output_file ), 'w' ) + self.basedir = os.path.split( self.workingdir )[0] + self.fasta = os.path.abspath( infile ) + self.locations = dict( nt=[], cs=[] ) + self.log = [] + self.indexers = { + 'bwa': '_bwa', + 'bowtie': '_bowtie', + 'bowtie2': '_bowtie2', + '2bit': '_twobit', + 'perm': '_perm', + 'bfast': '_bfast', + 'picard': '_picard', + 'sam': '_sam' + } + if not os.path.exists( self.workingdir ): + os.makedirs( self.workingdir ) + self.logfile = open( os.path.join( self.workingdir, 'ManagedIndexer.log' ), 'w+' ) + + def run_indexer( self, indexer ): + self.fapath = self.fasta + self.fafile = os.path.basename( self.fapath ) + with WithChDir( self.basedir ): + if indexer not in self.indexers: + raise KeyError, 'The requested indexing function does not exist' + else: + with WithChDir( self.workingdir ): + self._log( 'Running indexer %s.' % indexer ) + result = getattr( self, self.indexers[ indexer ] )() + if result is None: + self._log( 'Error running indexer %s.' % indexer ) + self._flush_files() + raise Exception + else: + self._log( 'Indexer %s completed successfully.' % indexer ) + self._flush_files() + + def _flush_files( self ): + json.dump( self.locations, self.outfile ) + self.outfile.close() + self.logfile.close() + + def _log( self, stuff ): + timestamp = time.strftime('%Y-%m-%d %H:%M:%S %z') + self.logfile.write( "[%s] %s\n" % (timestamp, stuff) ) + + def _bwa( self ): + with WithChDir( self.workingdir ): + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'bwa index -a bwtsw %s' % self.fafile ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result != 0: + newcommand = shlex.split( 'bwa index -c %s' % self.fafile ) + result = call( newcommand, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'nt' ].append( self.fafile ) + os.remove( self.fafile ) + os.makedirs( 'cs' ) + with WithChDir( 'cs' ): + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'bwa index -a bwtsw -c %s' % self.fafile ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result != 0: + newcommand = shlex.split( 'bwa index -c %s' % self.fafile ) + result = call( newcommand, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'cs' ].append( self.fafile ) + os.remove( self.fafile ) + else: + return False + else: + self.locations[ 'cs' ].append( self.fafile ) + os.remove( self.fafile ) + temptar = tarfile.open( 'cs.tar', 'w' ) + temptar.add( 'cs' ) + temptar.close() + shutil.rmtree( 'cs' ) + return True + else: + return False + + def _bowtie( self ): + ref_base = os.path.splitext(self.fafile)[0] + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'bowtie-build -f %s %s' % ( self.fafile, ref_base ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'nt' ].append( ref_base ) + os.remove( self.fafile ) + indexdir = os.path.join( os.getcwd(), 'cs' ) + os.makedirs( indexdir ) + with WithChDir( indexdir ): + ref_base = os.path.splitext(self.fafile)[0] + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'bowtie-build -C -f %s %s' % ( self.fafile, ref_base ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'cs' ].append( ref_base ) + else: + return False + os.remove( os.path.join( indexdir, self.fafile ) ) + temptar = tarfile.open( 'cs.tar', 'w' ) + temptar.add( 'cs' ) + temptar.close() + shutil.rmtree( 'cs' ) + return True + else: + return False + + def _bowtie2( self ): + ref_base = os.path.splitext(self.fafile)[0] + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'bowtie2-build %s %s' % ( self.fafile, ref_base ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'nt' ].append( ref_base ) + os.remove( self.fafile ) + return True + else: + return False + + def _twobit( self ): + """Index reference files using 2bit for random access. + """ + ref_base = os.path.splitext(self.fafile)[0] + out_file = "%s.2bit" % ref_base + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'faToTwoBit %s %s' % ( self.fafile, out_file ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations['nt'].append( out_file ) + os.remove( self.fafile ) + return True + else: + return False + + def _perm( self ): + local_ref = self.fafile + if not os.path.exists( local_ref ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + genome = os.path.splitext( local_ref )[0] + read_length = 50 + for seed in [ 'F3', 'F4' ]: + key = '%s_%s_%s' % (genome, seed, read_length) + desc = '%s: seed=%s, read length=%s' % (genome, seed, read_length) + index = "%s_base_%s_%s.index" % (genome, seed, read_length) + command = shlex.split("PerM %s %s --readFormat fastq --seed %s -m -s %s" % (local_ref, read_length, seed, index)) + result = subprocess.call( command ) + if result == 0: + self.locations[ 'nt' ].append( [ key, desc, index ] ) + else: + return False + os.remove( local_ref ) + os.makedirs( 'cs' ) + with WithChDir( 'cs' ): + if not os.path.exists( local_ref ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + for seed in [ 'F3', 'F4' ]: + key = '%s_%s_%s' % (genome, seed, read_length) + desc = '%s: seed=%s, read length=%s' % (genome, seed, read_length) + index = "%s_color_%s_%s.index" % (genome, seed, read_length) + command = shlex.split("PerM %s %s --readFormat csfastq --seed %s -m -s %s" % (local_ref, read_length, seed, index)) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result == 0: + self.locations[ 'cs' ].append( [ key, desc, index ] ) + else: + return False + os.remove( local_ref ) + temptar = tarfile.open( 'cs.tar', 'w' ) + temptar.add( 'cs' ) + temptar.close() + shutil.rmtree( 'cs' ) + return True + + def _bfast( self ): + """Indexes bfast in color and nucleotide space for longer reads. + + This preps for 40+bp sized reads, which is bfast's strength. + """ + dir_name_nt = 'nt' + dir_name_cs = 'cs' + window_size = 14 + bfast_nt_masks = [ + "1111111111111111111111", + "1111101110111010100101011011111", + "1011110101101001011000011010001111111", + "10111001101001100100111101010001011111", + "11111011011101111011111111", + "111111100101001000101111101110111", + "11110101110010100010101101010111111", + "111101101011011001100000101101001011101", + "1111011010001000110101100101100110100111", + "1111010010110110101110010110111011", + ] + bfast_color_masks = [ + "1111111111111111111111", + "111110100111110011111111111", + "10111111011001100011111000111111", + "1111111100101111000001100011111011", + "111111110001111110011111111", + "11111011010011000011000110011111111", + "1111111111110011101111111", + "111011000011111111001111011111", + "1110110001011010011100101111101111", + "111111001000110001011100110001100011111", + ] + local_ref = self.fafile + os.makedirs( dir_name_nt ) + os.makedirs( dir_name_cs ) + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + with WithChDir( dir_name_nt ): + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + # nucleotide space + command = shlex.split( "bfast fasta2brg -f %s -A 0" % local_ref ) + result = subprocess.call( command, stderr=self.logfile ) + for i, mask in enumerate( bfast_nt_masks ): + command = shlex.split("bfast index -d 1 -n 4 -f %s -A 0 -m %s -w %s -i %s" % + ( local_ref, mask, window_size, i + 1 ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + os.remove( self.fafile ) + if result != 0: + return False + else: + os.remove( self.fafile ) + with WithChDir( dir_name_cs ): + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + # colorspace + command = shlex.split( "bfast fasta2brg -f %s -A 1" % local_ref ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + for i, mask in enumerate( bfast_color_masks ): + command = shlex.split( "bfast index -d 1 -n 4 -f %s -A 1 -m %s -w %s -i %s" % + ( local_ref, mask, window_size, i + 1 ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result != 0: + return False + else: + os.remove( self.fafile ) + self.locations = None + return True + + def _picard( self ): + local_ref = self.fafile + srma = '/Users/dave/srma.jar' + genome = os.path.splitext( self.fafile )[0] + if not os.path.exists( self.fafile ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split( 'samtools faidx %s' % self.fafile ) + subprocess.call( command, stderr=self.logfile ) + os.rename( '%s.fai' % self.fafile, '%s.fai' % genome ) + command = shlex.split( "java -cp %s net.sf.picard.sam.CreateSequenceDictionary R=%s O=%s/%s.dict URI=%s" \ + % ( srma, local_ref, os.curdir, genome, local_ref ) ) + result = subprocess.call( command, stderr=self.logfile, stdout=self.logfile ) + if result != 0: + return False + else: + self.locations[ 'nt' ].append( self.fafile ) + #os.remove( '%s.fai' % genome ) + os.remove( self.fafile ) + return True + + def _sam( self ): + local_ref = self.fafile + local_file = os.path.splitext( self.fafile )[ 0 ] + if not os.path.exists( local_ref ): + os.symlink( os.path.relpath( self.fapath ), self.fafile ) + command = shlex.split("samtools faidx %s" % local_ref) + result = subprocess.call( command, stderr=self.logfile ) + if result != 0: + return False + else: + self.locations[ 'nt' ].append( local_ref ) + os.remove( local_ref ) + return True + +class WithChDir(): + def __init__( self, target ): + self.working = target + self.previous = os.getcwd() + def __enter__( self ): + os.chdir( self.working ) + def __exit__( self, *args ): + os.chdir( self.previous ) + + +if __name__ == "__main__": + # Parse command line. + parser = optparse.OptionParser() + (options, args) = parser.parse_args() + indexer, infile, outfile, working_dir = args + + # Create archive. + idxobj = ManagedIndexer( outfile, infile, working_dir ) + idxobj.run_indexer( indexer ) + \ No newline at end of file diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 lib/galaxy/web/controllers/data_admin.py --- /dev/null +++ b/lib/galaxy/web/controllers/data_admin.py @@ -0,0 +1,175 @@ +import sys, ftplib, json +from galaxy import model, util +from galaxy.jobs import transfer_manager +from galaxy.web.base.controller import * +from galaxy.web.framework.helpers import time_ago, iff, grids +from galaxy.model.orm import * +from library_common import get_comptypes, lucene_search, whoosh_search + +# Older py compatibility +try: + set() +except: + from sets import Set as set + +import logging +log = logging.getLogger( __name__ ) + +class DataAdmin( BaseUIController ): + jobstyles = dict( + done='panel-done-message', + waiting='state-color-waiting', + running='state-color-running', + downloaded='state-color-running', + new='state-color-new', + ok='panel-done-message', + error='panel-error-message' + ) + + @web.expose + @web.require_admin + def manage_data( self, trans, **kwd ): + dbkeys = trans.db_builds + return trans.fill_template( '/admin/data_admin/data_form.mako', dbkeys=dbkeys ) + + @web.expose + @web.require_admin + def download_build( self, trans, **kwd ): + """Download a genome from a remote source and add it to the library.""" + params = util.Params( kwd ) + source = params.get('source', '') + longname = params.get('longname', None) + if not isinstance( params.get( 'indexers', None ), list ): + indexers = [ params.get( 'indexers', None ) ] + else: + indexers = params.get( 'indexers', None ) + if indexers is not None: + if indexers == [None]: + indexers = None + url = None + liftover = None + newlift = [] + dbkey = params.get( 'dbkey', None ) + dbkeys = dict() + protocol = 'http' + + if source == 'NCBI': + dbkey = params.get('dbkey', '')[0] + url = 'http://togows.dbcls.jp/entry/ncbi-nucleotide/%s.fasta' % dbkey + elif source == 'Broad': + dbkey = params.get('dbkey', '')[0] + url = 'ftp://ftp.broadinstitute.org/pub/seq/references/%s.fasta' % dbkey + elif source == 'UCSC': + longname = None + for build in trans.db_builds: + if dbkey[1] == build[0]: + dbkey = build[0] + longname = build[1] + break + assert dbkey is not '?', 'That build was not found' + ftp = ftplib.FTP('hgdownload.cse.ucsc.edu') + ftp.login('anonymous', 'user@example.com') + checker = [] + liftover = [] + newlift = [] + try: + ftp.retrlines('NLST /goldenPath/%s/liftOver/*.chain.gz' % dbkey, liftover.append) + for chain in liftover: + fname = chain.split( '/' )[-1] + target = fname.replace( '.over.chain.gz', '' ).split( 'To' )[1] + target = target[0].lower() + target[1:] + newlift.append( [ chain, dbkey, target ] ) + current = dbkey[0].upper() + dbkey[1:] + targetfile = '%sTo%s.over.chain.gz' % ( target, current ) + newlift.append( [ '/goldenPath/%s/liftOver/%s' % ( target, targetfile ), target, dbkey ] ) + except: + newlift = None + pass + ftp.retrlines('NLST /goldenPath/%s/bigZips/' % dbkey, checker.append) + for filename in [ dbkey, 'chromFa' ]: + for extension in [ '.tar.gz', '.tar.bz2', '.zip', '.fa.gz', '.fa.bz2' ]: + testfile = '/goldenPath/%s/bigZips/%s%s' % ( dbkey, filename, extension ) + if testfile in checker: + url = 'ftp://hgdownload.cse.ucsc.edu%s' % testfile + break; + else: + continue + if url is None: + message = u'The genome %s was not found on the UCSC server.' % dbkey + status = u'error' + return trans.fill_template( '/admin/data_admin/data_form.mako', + message=message, + status=status ) + elif source == 'Ensembl': + section = params.get('ensembl_section', '') + release1 = params.get('release_number', '') + organism = params.get('organism', '') + name = params.get('name', '') + longname = organism + dbkey = name + release2 = params.get('release2', '') + release2 = ".%s" % release2 if release2 else "" + if section == 'standard': + url = 'ftp://ftp.ensembl.org/pub/release-%s/fasta/%s/dna/%s.%s%s.dna.toplevel.fa.gz' % \ + (release1, organism.lower(), organism, name, release2) + else: + url = 'ftp://ftp.ensemblgenomes.org/pub/%s/release-%s/fasta/%s/dna/%s.%s%s.dna.toplevel.fa.gz' % \ + (section, release1, organism.lower(), organism, name, release2) + elif source == 'local': + url = 'http://127.0.0.1/%s.tar.gz' % dbkey + else: + raise ValueError + params = dict( protocol='http', name=dbkey, datatype='fasta', url=url, user=trans.user.id ) + jobid = trans.app.job_manager.deferred_job_queue.plugins['GenomeTransferPlugin'].create_job( trans, url, dbkey, longname, indexers ) + chainjob = [] + if newlift is not None: + for chain in newlift: + liftover_url = u'ftp://hgdownload.cse.ucsc.edu%s' % chain[0] + from_genome = chain[1] + to_genome = chain[2] + destfile = liftover_url.split('/')[-1].replace('.gz', '') + chainjob.append( trans.app.job_manager.deferred_job_queue.plugins['LiftOverTransferPlugin'].create_job( trans, liftover_url, dbkey, from_genome, to_genome, destfile ) ) + job = trans.app.job_manager.deferred_job_queue.plugins['GenomeTransferPlugin'].get_job_status( jobid ) + job.params['liftover'] = chainjob + trans.app.model.context.current.add( job ) + trans.app.model.context.current.flush() + return trans.response.send_redirect( web.url_for( controller='data_admin', + action='monitor_status', + job=jobid ) ) + + @web.expose + @web.require_admin + def monitor_status( self, trans, **kwd ): + params = util.Params( kwd ) + jobid = params.get( 'job', '' ) + chains = params.get( 'chains', [] ) + jobs = self._get_jobs( jobid, trans ) + return trans.fill_template( '/admin/data_admin/download_status.mako', mainjob=jobid, jobs=jobs ) + + @web.expose + @web.require_admin + def ajax_statusupdate( self, trans, **kwd ): + sa_session = trans.app.model.context.current + jobs = [] + params = util.Params( kwd ) + jobid = params.get( 'jobid', '' ) + jobs = self._get_jobs( jobid, trans ) + return trans.fill_template( '/admin/data_admin/ajax_statusupdate.mako', mainjob=jobid, jobs=jobs ) + + def _get_jobs( self, jobid, trans ): + jobs = [] + job = trans.app.job_manager.deferred_job_queue.plugins['GenomeTransferPlugin'].get_job_status( jobid ) + sa_session = trans.app.model.context.current + idxjobs = sa_session.query( model.GenomeIndexToolData ).filter_by( deferred_job_id=job.id, transfer_job_id=job.transfer_job.id ).all() + if job.params[ 'liftover' ] is not None: + for jobid in job.params[ 'liftover' ]: + lo_job = trans.app.job_manager.deferred_job_queue.plugins['LiftOverTransferPlugin'].get_job_status( jobid ) + jobs.append( dict( jobid=lo_job.id, state=lo_job.state, type='Download liftOver' ) ) + for idxjob in idxjobs: + jobentry = sa_session.query( model.Job ).filter_by( id=idxjob.job_id ).first() + jobs.append( dict( jobid=jobentry.id, state=jobentry.state, type='Index Genome' ) ) + jobs.append( dict ( jobid=job.id, state=job.state, type='Main Job' ) ) + jobs.append( dict ( jobid=job.transfer_job.id, state=job.transfer_job.state, type='Download Genome' ) ) + for je in jobs: + je[ 'style' ] = self.jobstyles[ je[ 'state' ] ] + return jobs diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 templates/admin/data_admin/ajax_statusupdate.mako --- /dev/null +++ b/templates/admin/data_admin/ajax_statusupdate.mako @@ -0,0 +1,5 @@ +%for jobentry in jobs: + <div class="${jobentry['style']} dialog-box" id="job${jobentry['jobid']}" data-state="${jobentry['state']}" data-jobid="${jobentry['jobid']}" data-type="${jobentry['type']}"> + <span class="inline">${jobentry['type']}</span> + </div> +%endfor diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 templates/admin/data_admin/data_form.mako --- /dev/null +++ b/templates/admin/data_admin/data_form.mako @@ -0,0 +1,190 @@ +<%inherit file="/base.mako"/> +<%namespace file="/message.mako" import="render_msg" /> +<%namespace file="/library/common/common.mako" import="common_javascripts" /> + +<%! + def inherit(context): + if context.get('use_panels'): + return '/webapps/galaxy/base_panels.mako' + else: + return '/base.mako' +%> +<%inherit file="${inherit(context)}"/> + +<%def name="init()"> +<% + self.has_left_panel=False + self.has_right_panel=False + self.message_box_visible=False + self.active_view="user" + self.overlay_visible=False + self.has_accessible_datasets = False +%> +</%def> +<%def name="stylesheets()"> + ${parent.stylesheets()} + ${h.css( "autocomplete_tagging" )} +</%def> +<%def name="javascripts()"> + ${parent.javascripts()} + ${h.js("jquery.autocomplete", "autocomplete_tagging" )} +</%def> +## +## Override methods from base.mako and base_panels.mako +## +<%def name="center_panel()"> + <div style="overflow: auto; height: 100%;"> + <div class="page-container" style="padding: 10px;"> + ${render_content()} + </div> + </div> +</%def> +<style type="text/css"> + .params-block { display: none; } +</style> +<div class="toolForm"> + %if message: + <div class="${status}">${message}</div> + %endif + <div class="toolFormTitle">Get build from a remote server</div> + <div class="toolFormBody"> + <form name="download_build" action="${h.url_for( controller='data_admin', action='download_build' )}" enctype="multipart/form-data" method="post"> + <div class="form-row"> + <label for="source">Data Source</label> + <select id="datasource" name="source" label="Data Source"> + <option value="local">localhost</option> + <option value="UCSC">UCSC</option> + <option value="Broad">Broad Institute</option> + <option value="NCBI">NCBI</option> + <option value="Ensembl">EnsemblGenome</option> + </select> + <div style="clear: both;"> </div> + </div> + <div class="form-row"> + <label for="indexers">Indexers</label> + <select name="indexers" multiple style="width: 200px; height: 125px;"> + <option value="2bit">TwoBit</option> + <option value="bowtie">Bowtie</option> + <option value="bowtie2">Bowtie 2</option> + <option value="bwa">BWA</option> + <option value="perm">PerM</option> + <option value="picard">Picard</option> + <option value="sam">sam</option> + </select> + <div class="toolParamHelp" style="clear: both;"> + Select the indexers you want to run on the FASTA file after downloading. + </div> + </div> + <h2>Parameters</h2> + <div id="params_generic" class="params-block" style="display: block;"> + <div class="form-row"> + <label for="longname">Internal Name</label> + <input name="longname" type="text" label="Internal Name" /> + <div style="clear: both;"> </div> + </div> + <div class="form-row"> + <label for="uniqid">Internal Unique Identifier</label> + <input name="uniqid" type="text" label="Internal Identifier" /> + <div style="clear: both;"> </div> + </div> + <div id="dlparams"> + <div class="form-row"> + <label for="dbkey">External Name</label> + <input name="dbkey" type="text" label="Genome Unique Name" /> + <div style="clear: both;"> </div> + </div> + </div> + </div> + <div id="params_ensembl" class="params-block"> + <div class="form-row"> + <label for="ensembl_section">Section</label> + <input name="ensembl_section" type="text" label="Section" /> + <div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + Ensembl section, either standard or one of plants, protists, metazoa, fungi, bacteria. + </div> + </div> + <div class="form-row"> + <label for="release_number">Release Number</label> + <input name="release_number" type="text" label="Release" /> + <div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + Release number, e.g. ftp://ftp.ensembl.org/pub/release-<strong style="color: red;">56</strong>/fasta/callithrix_jacchus/dna/Callithrix_jacchus.calJac3.56.dna.toplevel.fa.gz + </div> + </div> + <div class="form-row"> + <label for="organism">Organism</label> + <input name="organism" type="text" label="Organism" /> + <div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + Organism long name, e.g. ftp://ftp.ensembl.org/pub/release-56/fasta/callithrix_jacchus/dna/<strong style="color: red;">Callithrix_jacchus</strong>.calJac3.56.dna.toplevel.fa.gz + </div> + </div> + <div class="form-row"> + <label for="name">Name</label> + <input name="name" type="text" label="name" /> + <div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + Organism short name, e.g. ftp://ftp.ensembl.org/pub/release-56/fasta/callithrix_jacchus/dna/Callithrix_jacchus.<strong style="color: red;">calJac3</strong>.56.dna.toplevel.fa.gz + </div> + </div> + <div class="form-row"> + <label for="release2">Release ID</label> + <input name="release2" type="text" label="Release ID" /> + <div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + Release ID, e.g. ftp://ftp.ensembl.org/pub/release-56/fasta/callithrix_jacchus/dna/Callithrix_jacchus.calJac3.<strong style="color: red;">56</strong>.dna.toplevel.fa.gz + </div> + </div> + </div> + <div id="params_ucsc" class="params-block"> + <div class="form-row"> + <label>Genome:</label> + <div class="form-row-input"> + <select name="dbkey" last_selected_value="?"> + %for dbkey in dbkeys: + %if dbkey[0] == last_used_build: + <option value="${dbkey[0]}" selected>${dbkey[1]}</option> + %else: + <option value="${dbkey[0]}">${dbkey[1]}</option> + %endif + %endfor + </select> + </div> + <div class="toolParamHelp" style="clear: both;"> + If you can't find the build you want in this list, <insert link to instructions here> + </div> + </div> + </div> + <div class="form-row"> + <input type="submit" class="primary-button" name="runtool_btn" value="Download and index"/> + </div> + <script type="text/javascript"> + $(document).ready(function() { + checkDataSource(); + }); + $('#datasource').change(function() { + checkDataSource(); + }); + function checkDataSource() { + var ds = $('#datasource').val() + $('.params-block').each(function() { + $(this).hide(); + }); + switch (ds) { + case 'UCSC': + $('#params_ucsc').show(); + break; + case 'Ensembl': + $('#params_ensembl').show(); + break; + case 'NCBI': + case 'Broad': + default: + $('#params_generic').show(); + break; + } + }; + </script> + </form> +</div> diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 templates/admin/data_admin/download_status.mako --- /dev/null +++ b/templates/admin/data_admin/download_status.mako @@ -0,0 +1,65 @@ +<%namespace file="/library/common/library_item_info.mako" import="render_library_item_info" /> +<%namespace file="/library/common/common.mako" import="render_actions_on_multiple_items" /> +<%namespace file="/library/common/common.mako" import="render_compression_types_help" /> +<%namespace file="/library/common/common.mako" import="common_javascripts" /> + +<%! + def inherit(context): + if context.get('use_panels'): + return '/webapps/galaxy/base_panels.mako' + else: + return '/base.mako' +%> +<%inherit file="${inherit(context)}"/> + +<%def name="init()"> +<% + self.has_left_panel=False + self.has_right_panel=False + self.message_box_visible=False + self.active_view="user" + self.overlay_visible=False + self.has_accessible_datasets = False +%> +</%def> + +## +## Override methods from base.mako and base_panels.mako +## +<%def name="center_panel()"> + <div style="overflow: auto; height: 100%;"> + <div class="page-container" style="padding: 10px;"> + ${render_content()} + </div> + </div> +</%def> +<p>The genome build and any selected indexers have been added to the job queue. Below you will see the status of each job.</p> +<div id="jobStatus" data-job="${mainjob}"> +%for jobentry in jobs: + <div class="${jobentry['style']} dialog-box" id="job${jobentry['jobid']}" data-state="${jobentry['state']}" data-jobid="${jobentry['jobid']}" data-type="${jobentry['type']}"> + <span class="inline">${jobentry['type']}</span> + </div> +%endfor +</div> +<a href="${h.url_for( controller='data_admin', action='manage_data' )}">Return to the download form</a> +<script type="text/javascript"> + function getNewHtml(jobid) { + $.get('${h.url_for( controller='data_admin', action='ajax_statusupdate' )}', { jobid: jobid }, function(data) { + $('#jobStatus').html(data); + }); + $('#jobStatus').children().each(function() { + state = $(this).attr('class'); + //alert(state); + if (state != 'panel-done-message dialog-box' && state != 'panel-error-message dialog-box') { + setJobRefreshers(); + return; + } + }); + } + function setJobRefreshers() { + $('#jobStatus').delay(3000).queue(function(n) { getNewHtml($(this).attr('data-job')); n(); }).fadeIn(750); + } + $(document).ready(function() { + setJobRefreshers(); + }); +</script> \ No newline at end of file diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 templates/webapps/galaxy/admin/index.mako --- a/templates/webapps/galaxy/admin/index.mako +++ b/templates/webapps/galaxy/admin/index.mako @@ -57,6 +57,7 @@ <div class="toolSectionBg"><div class="toolTitle"><a href="${h.url_for( controller='admin', action='quotas', webapp=webapp )}" target="galaxy_main">Manage quotas</a></div><div class="toolTitle"><a href="${h.url_for( controller='library_admin', action='browse_libraries' )}" target="galaxy_main">Manage data libraries</a></div> + <div class="toolTitle"><a href="${h.url_for( controller='data_admin', action='manage_data' )}" target="galaxy_main">Manage local data</a></div></div></div><div class="toolSectionPad"></div> diff -r b5090cb8044469b7f5d3136cedbe80f009745f0b -r 67b5abaaf8bc702529a91a5d1d69318a2aaf6c75 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -117,6 +117,9 @@ # -- Files and directories +# Path where genome builds are stored. This defaults to tool-data/genome +#genome_data_path = tool-data/genome + # Dataset files are stored in this directory. #file_path = database/files Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.