commit/galaxy-central: 4 new changesets
4 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/47c6c6bb8eec/ Changeset: 47c6c6bb8eec User: iracooke Date: 2014-07-07 03:16:09 Summary: Add sqlite datatype and corresponding dataprovider Affected #: 3 files diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -177,6 +177,7 @@ <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/><datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" description="Any data in tab delimited format (tabular)." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Tabular_.28tab_delimited.29"/><datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="sqlite" type="galaxy.datatypes.binary:SQlite" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true" description="Any text file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Plain_text"/><datatype extension="linecount" type="galaxy.datatypes.data:LineCount" display_in_upload="false"/><datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/> @@ -262,6 +263,7 @@ --><sniffer type="galaxy.datatypes.tabular:Vcf"/><sniffer type="galaxy.datatypes.binary:TwoBit"/> + <sniffer type="galaxy.datatypes.binary:SQlite"/><sniffer type="galaxy.datatypes.binary:Bam"/><sniffer type="galaxy.datatypes.binary:Sff"/><sniffer type="galaxy.datatypes.xml:Phyloxml"/> diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -12,6 +12,7 @@ import subprocess import tempfile import zipfile +import sqlite3 from urllib import urlencode, quote_plus from galaxy import eggs @@ -545,3 +546,45 @@ return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size())) Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit) + + +@dataproviders.decorators.has_dataproviders +class SQlite ( Binary ): + file_ext = "sqlite" + + # Connects and runs a query that should work on any real database + # If the file is not sqlite, an exception will be thrown and the sniffer will return false + def sniff( self, filename ): + try: + conn = sqlite3.connect(filename) + schema_version=conn.cursor().execute("pragma schema_version").fetchone() + conn.close() + if schema_version is not None: + return True + return False + except: + return False + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "SQLite Database" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "SQLite Database (%s)" % ( data.nice_size( dataset.get_size() ) ) + + + @dataproviders.decorators.dataprovider_factory( 'sqlite', dataproviders.dataset.SQliteDataProvider.settings ) + def sqlite_dataprovider( self, dataset, **settings ): + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.dataset.SQliteDataProvider( dataset_source, **settings ) + + +Binary.register_sniffable_binary_format("sqlite","sqlite",SQlite) + diff -r 6d50cd22380253129121e3e7359a6286ee9b1663 -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 lib/galaxy/datatypes/dataproviders/dataset.py --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -11,6 +11,7 @@ import line import column import external +import sqlite3 from galaxy import eggs eggs.require( 'bx-python' ) @@ -700,3 +701,29 @@ #TODO: as samtools - need more info on output format raise NotImplementedError() super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs ) + + + +class SQliteDataProvider ( base.DataProvider ): + """ + Data provider that uses a sqlite database file as its source. + + Allows any query to be run and returns the resulting rows as sqlite3 row objects + """ + settings = { + 'query' : 'str' + } + + def __init__( self, source, query=None, **kwargs ): + self.query=query + self.connection = sqlite3.connect(source.dataset.file_name); + self.connection.row_factory = sqlite3.Row + super( SQliteDataProvider, self ).__init__( source, **kwargs ) + + def __iter__( self ): + if self.query is not None: + for row in self.connection.cursor().execute(self.query): + yield row + else: + yield + https://bitbucket.org/galaxy/galaxy-central/commits/034c0159c0cf/ Changeset: 034c0159c0cf User: iracooke Date: 2014-07-24 06:30:17 Summary: Merged galaxy/galaxy-central into default Affected #: 278 files diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b .hgignore --- a/.hgignore +++ b/.hgignore @@ -11,6 +11,9 @@ scripts/scramble/lib scripts/scramble/archives +# Python virtualenv +.venv + # Database stuff database/beaker_sessions database/community_files diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -12,7 +12,6 @@ from galaxy.visualization.data_providers.registry import DataProviderRegistry from galaxy.visualization.registry import VisualizationsRegistry from galaxy.tools.imp_exp import load_history_imp_exp_tools -from galaxy.tools.genome_index import load_genome_index_tools from galaxy.sample_tracking import external_service_types from galaxy.openid.providers import OpenIDProviders from galaxy.tools.data_manager.manager import DataManagers @@ -93,8 +92,6 @@ self.datatypes_registry.load_external_metadata_tool( self.toolbox ) # Load history import/export tools. load_history_imp_exp_tools( self.toolbox ) - # Load genome indexer tool. - load_genome_index_tools( self.toolbox ) # visualizations registry: associates resources with visualizations, controls how to render self.visualizations_registry = None if self.config.visualization_plugins_directory: diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -15,13 +15,12 @@ from galaxy.web.formatting import expand_pretty_datetime_format from galaxy.util import string_as_bool from galaxy.util import listify -from galaxy.util import parse_xml from galaxy.util.dbkeys import GenomeBuilds from galaxy import eggs -import pkg_resources log = logging.getLogger( __name__ ) + def resolve_path( path, root ): """If 'path' is relative make absolute by prepending 'root'""" if not( os.path.isabs( path ) ): @@ -40,9 +39,9 @@ self.config_dict = kwargs self.root = kwargs.get( 'root_dir', '.' ) # Collect the umask and primary gid from the environment - self.umask = os.umask( 077 ) # get the current umask - os.umask( self.umask ) # can't get w/o set, so set it back - self.gid = os.getgid() # if running under newgrp(1) we'll need to fix the group of data created on the cluster + self.umask = os.umask( 077 ) # get the current umask + os.umask( self.umask ) # can't get w/o set, so set it back + self.gid = os.getgid() # if running under newgrp(1) we'll need to fix the group of data created on the cluster # Database related configuration self.database = resolve_path( kwargs.get( "database_file", "database/universe.sqlite" ), self.root ) @@ -75,7 +74,7 @@ self.enable_unique_workflow_defaults = string_as_bool( kwargs.get( 'enable_unique_workflow_defaults', False ) ) self.tool_path = resolve_path( kwargs.get( "tool_path", "tools" ), self.root ) self.tool_data_path = resolve_path( kwargs.get( "tool_data_path", "tool-data" ), os.getcwd() ) - self.len_file_path = resolve_path( kwargs.get( "len_file_path", os.path.join( self.tool_data_path, 'shared','ucsc','chrom') ), self.root ) + self.len_file_path = resolve_path( kwargs.get( "len_file_path", os.path.join( self.tool_data_path, 'shared', 'ucsc', 'chrom') ), self.root ) self.test_conf = resolve_path( kwargs.get( "test_conf", "" ), self.root ) # The value of migrated_tools_config is the file reserved for containing only those tools that have been eliminated from the distribution # and moved to the tool shed. @@ -169,7 +168,7 @@ self.admin_users = kwargs.get( "admin_users", "" ) self.admin_users_list = [u.strip() for u in self.admin_users.split(',') if u] self.reset_password_length = int( kwargs.get('reset_password_length', '15') ) - self.mailing_join_addr = kwargs.get('mailing_join_addr',"galaxy-announce-join@bx.psu.edu") + self.mailing_join_addr = kwargs.get('mailing_join_addr', 'galaxy-announce-join@bx.psu.edu') self.error_email_to = kwargs.get( 'error_email_to', None ) self.activation_email = kwargs.get( 'activation_email', None ) self.user_activation_on = string_as_bool( kwargs.get( 'user_activation_on', False ) ) @@ -271,9 +270,9 @@ self.object_store_cache_path = resolve_path( kwargs.get( "object_store_cache_path", "database/object_store_cache" ), self.root ) # Handle AWS-specific config options for backward compatibility if kwargs.get( 'aws_access_key', None) is not None: - self.os_access_key= kwargs.get( 'aws_access_key', None ) - self.os_secret_key= kwargs.get( 'aws_secret_key', None ) - self.os_bucket_name= kwargs.get( 's3_bucket', None ) + self.os_access_key = kwargs.get( 'aws_access_key', None ) + self.os_secret_key = kwargs.get( 'aws_secret_key', None ) + self.os_bucket_name = kwargs.get( 's3_bucket', None ) self.os_use_reduced_redundancy = kwargs.get( 'use_reduced_redundancy', False ) else: self.os_access_key = kwargs.get( 'os_access_key', None ) @@ -376,6 +375,8 @@ self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) ) # visualization plugin framework self.visualization_plugins_directory = kwargs.get( 'visualization_plugins_directory', None ) + # Default chunk size for chunkable datatypes -- 64k + self.display_chunk_size = int( kwargs.get( 'display_chunk_size', 65536) ) @property def sentry_dsn_public( self ): @@ -452,19 +453,13 @@ except Exception, e: raise ConfigurationError( "Unable to create missing directory: %s\n%s" % ( path, e ) ) # Create the directories that it makes sense to create - for path in self.file_path, \ - self.new_file_path, \ - self.job_working_directory, \ - self.cluster_files_directory, \ - self.template_cache, \ - self.ftp_upload_dir, \ - self.library_import_dir, \ - self.user_library_import_dir, \ - self.nginx_upload_store, \ - './static/genetrack/plots', \ - self.whoosh_index_dir, \ - self.object_store_cache_path, \ - os.path.join( self.tool_data_path, 'shared', 'jars' ): + for path in (self.file_path, self.new_file_path, + self.job_working_directory, self.cluster_files_directory, + self.template_cache, self.ftp_upload_dir, + self.library_import_dir, self.user_library_import_dir, + self.nginx_upload_store, './static/genetrack/plots', + self.whoosh_index_dir, self.object_store_cache_path, + os.path.join( self.tool_data_path, 'shared', 'jars' )): self._ensure_directory( path ) # Check that required files exist tool_configs = self.tool_configs @@ -480,7 +475,7 @@ if key in self.deprecated_options: log.warning( "Config option '%s' is deprecated and will be removed in a future release. Please consult the latest version of the sample configuration file." % key ) - def is_admin_user( self,user ): + def is_admin_user( self, user ): """ Determine if the provided user is listed in `admin_users`. @@ -495,12 +490,13 @@ """ return resolve_path( path, self.root ) + def get_database_engine_options( kwargs, model_prefix='' ): """ Allow options for the SQLAlchemy database engine to be passed by using the prefix "database_engine_option". """ - conversions = { + conversions = { 'convert_unicode': string_as_bool, 'pool_timeout': int, 'echo': string_as_bool, @@ -522,6 +518,7 @@ rval[ key ] = value return rval + def configure_logging( config ): """ Allow some basic logging configuration to be read from ini file. @@ -556,7 +553,7 @@ root.addHandler( handler ) # If sentry is configured, also log to it if config.sentry_dsn: - pkg_resources.require( "raven" ) + eggs.require( "raven" ) from raven.handlers.logging import SentryHandler sentry_handler = SentryHandler( config.sentry_dsn ) sentry_handler.setLevel( logging.WARN ) diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -126,10 +126,21 @@ rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy ) return rval - def from_JSON_dict( self, filename, path_rewriter=None ): + def from_JSON_dict( self, filename=None, path_rewriter=None, json_dict=None ): dataset = self.parent - log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) - JSONified_dict = json.load( open( filename ) ) + if filename is not None: + log.debug( 'loading metadata from file for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) + JSONified_dict = json.load( open( filename ) ) + elif json_dict is not None: + log.debug( 'loading metadata from dict for: %s %s' % ( dataset.__class__.__name__, dataset.id ) ) + if isinstance( json_dict, basestring ): + JSONified_dict = json.loads( json_dict ) + elif isinstance( json_dict, dict ): + JSONified_dict = json_dict + else: + raise ValueError( "json_dict must be either a dictionary or a string, got %s." % ( type( json_dict ) ) ) + else: + raise ValueError( "You must provide either a filename or a json_dict" ) for name, spec in self.spec.items(): if name in JSONified_dict: from_ext_kwds = {} @@ -143,13 +154,15 @@ #metadata associated with our dataset, we'll delete it from our dataset's metadata dict del dataset._metadata[ name ] - def to_JSON_dict( self, filename ): + def to_JSON_dict( self, filename=None ): #galaxy.model.customtypes.json_encoder.encode() meta_dict = {} dataset_meta_dict = self.parent._metadata for name, spec in self.spec.items(): if name in dataset_meta_dict: meta_dict[ name ] = spec.param.to_external_value( dataset_meta_dict[ name ] ) + if filename is None: + return json.dumps( meta_dict ) json.dump( meta_dict, open( filename, 'wb+' ) ) def __getstate__( self ): diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -25,7 +25,6 @@ # All tabular data is chunkable. CHUNKABLE = True - CHUNK_SIZE = 10000 """Add metadata elements""" MetadataElement( name="comment_lines", default=0, desc="Number of comment lines", readonly=False, optional=True, no_value=0 ) @@ -262,13 +261,13 @@ def get_chunk(self, trans, dataset, chunk): ck_index = int(chunk) f = open(dataset.file_name) - f.seek(ck_index * self.CHUNK_SIZE) + f.seek(ck_index * trans.app.config.display_chunk_size) # If we aren't at the start of the file, seek to next newline. Do this better eventually. if f.tell() != 0: cursor = f.read(1) while cursor and cursor != '\n': cursor = f.read(1) - ck_data = f.read(self.CHUNK_SIZE) + ck_data = f.read(trans.app.config.display_chunk_size) cursor = f.read(1) while cursor and ck_data[-1] != '\n': ck_data += cursor diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/exceptions/__init__.py --- a/lib/galaxy/exceptions/__init__.py +++ b/lib/galaxy/exceptions/__init__.py @@ -86,6 +86,11 @@ err_code = error_codes.USER_REQUEST_INVALID_PARAMETER +class AuthenticationFailed( MessageException ): + status_code = 401 + err_code = error_codes.USER_AUTHENTICATION_FAILED + + class AuthenticationRequired( MessageException ): status_code = 403 #TODO: as 401 and send WWW-Authenticate: ??? diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/exceptions/error_codes.json --- a/lib/galaxy/exceptions/error_codes.json +++ b/lib/galaxy/exceptions/error_codes.json @@ -60,6 +60,11 @@ "message": "Supplied incorrect or incompatible tool meta parameters." }, { + "name": "USER_AUTHENTICATION_FAILED", + "code": 401001, + "message": "Authentication failed, invalid credentials supplied." + }, + { "name": "USER_NO_API_KEY", "code": 403001, "message": "API authentication required for this request" diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1192,9 +1192,6 @@ param_dict = self.tool.params_from_strings( param_dict, self.app ) # Check for and move associated_files self.tool.collect_associated_files(out_data, self.working_directory) - gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() - if gitd: - self.tool.collect_associated_files({'': gitd}, self.working_directory) # Create generated output children and primary datasets and add to param_dict collected_datasets = { 'children': self.tool.collect_child_datasets(out_data, self.working_directory), @@ -1248,7 +1245,6 @@ self.external_output_metadata.cleanup_external_metadata( self.sa_session ) galaxy.tools.imp_exp.JobExportHistoryArchiveWrapper( self.job_id ).cleanup_after_job( self.sa_session ) galaxy.tools.imp_exp.JobImportHistoryArchiveWrapper( self.app, self.job_id ).cleanup_after_job() - galaxy.tools.genome_index.GenomeIndexToolWrapper( self.job_id ).postprocessing( self.sa_session, self.app ) if delete_files: self.app.object_store.delete(self.get_job(), base_dir='job_work', entire_dir=True, dir_only=True, extra_dir=str(self.job_id)) except: @@ -1351,10 +1347,8 @@ dataset_path_rewriter = self.dataset_path_rewriter job = self.get_job() - # Job output datasets are combination of history, library, jeha and gitd datasets. + # Job output datasets are combination of history, library, and jeha datasets. special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() - if not special: - special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() false_path = None results = [] diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/actions/post.py --- a/lib/galaxy/jobs/actions/post.py +++ b/lib/galaxy/jobs/actions/post.py @@ -215,8 +215,17 @@ p_str += "<label for='pja__"+pja.output_name+"__RenameDatasetAction__newname'>New output name:</label>\ <input type='text' name='pja__"+pja.output_name+"__RenameDatasetAction__newname' value=''/>"; } + inputlist = []; + $.each(node.input_terminals, function(i, v){ + inputlist.push(v.name); + }); + if (inputlist !== []){ + p_str += "Available inputs are: <strong>" + inputlist.join(', ') + "</strong>"; + }else{ + p_str += "No inputs are available for templating into this action."; + } """ - return get_form_template(cls.name, cls.verbose_name, form, "This action will rename the result dataset.") + return get_form_template(cls.name, cls.verbose_name, form, "This action will rename the result dataset. See <a href='https://wiki.galaxyproject.org/Learn/AdvancedWorkflow/Variables'>the wiki</a> for usage information.") @classmethod def get_short_str(cls, pja): diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/genome_index.py --- a/lib/galaxy/jobs/deferred/genome_index.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Module for managing genome transfer jobs. -""" -from __future__ import with_statement - -import logging, shutil, gzip, bz2, zipfile, tempfile, tarfile, sys, os - -from galaxy import eggs -from sqlalchemy import and_ -from data_transfer import * - -log = logging.getLogger( __name__ ) - -__all__ = [ 'GenomeIndexPlugin' ] - -class GenomeIndexPlugin( DataTransfer ): - - def __init__( self, app ): - super( GenomeIndexPlugin, self ).__init__( app ) - self.app = app - self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__'] - self.sa_session = app.model.context.current - - def create_job( self, trans, path, indexes, dbkey, intname ): - params = dict( user=trans.user.id, path=path, indexes=indexes, dbkey=dbkey, intname=intname ) - deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'GenomeIndexPlugin', params = params ) - self.sa_session.add( deferred ) - self.sa_session.flush() - log.debug( 'Job created, id %d' % deferred.id ) - return deferred.id - - def check_job( self, job ): - log.debug( 'Job check' ) - return 'ready' - - def run_job( self, job ): - incoming = dict( path=os.path.abspath( job.params[ 'path' ] ), indexer=job.params[ 'indexes' ][0], user=job.params[ 'user' ] ) - indexjob = self.tool.execute( self, set_output_hid=False, history=None, incoming=incoming, transfer=None, deferred=job ) - job.params[ 'indexjob' ] = indexjob[0].id - job.state = self.app.model.DeferredJob.states.RUNNING - self.sa_session.add( job ) - self.sa_session.flush() - return self.app.model.DeferredJob.states.RUNNING diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/genome_transfer.py --- a/lib/galaxy/jobs/deferred/genome_transfer.py +++ /dev/null @@ -1,250 +0,0 @@ -""" -Module for managing genome transfer jobs. -""" -from __future__ import with_statement - -import logging, shutil, gzip, bz2, zipfile, tempfile, tarfile, sys - -from galaxy import eggs -from sqlalchemy import and_ - -from galaxy.util.odict import odict -from galaxy.workflow.modules import module_factory -from galaxy.jobs.actions.post import ActionBox - -from galaxy.tools.parameters import visit_input_values -from galaxy.tools.parameters.basic import DataToolParameter -from galaxy.tools.data import ToolDataTableManager - -from galaxy.datatypes.checkers import * -from galaxy.datatypes.sequence import Fasta -from data_transfer import * - -log = logging.getLogger( __name__ ) - -__all__ = [ 'GenomeTransferPlugin' ] - -class GenomeTransferPlugin( DataTransfer ): - - locations = {} - - def __init__( self, app ): - super( GenomeTransferPlugin, self ).__init__( app ) - self.app = app - self.tool = app.toolbox.tools_by_id['__GENOME_INDEX__'] - self.sa_session = app.model.context.current - tdtman = ToolDataTableManager( app.config.tool_data_path ) - xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path ) - for node in xmltree: - table = node.get('name') - location = node.findall('file')[0].get('path') - self.locations[table] = location - - def create_job( self, trans, url, dbkey, intname, indexes ): - job = trans.app.transfer_manager.new( protocol='http', url=url ) - params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http', type='init_transfer', url=url, dbkey=dbkey, indexes=indexes, intname=intname, liftover=None ) - deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'GenomeTransferPlugin', params = params ) - self.sa_session.add( deferred ) - self.sa_session.flush() - return deferred.id - - def check_job( self, job ): - if job.params['type'] == 'init_transfer': - if not hasattr(job, 'transfer_job'): - job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) - else: - self.sa_session.refresh( job.transfer_job ) - if job.transfer_job.state == 'done': - transfer = job.transfer_job - transfer.state = 'downloaded' - job.params['type'] = 'extract_transfer' - self.sa_session.add( job ) - self.sa_session.add( transfer ) - self.sa_session.flush() - return self.job_states.READY - elif job.transfer_job.state == 'running': - return self.job_states.WAIT - elif job.transfer_job.state == 'new': - assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ] - self.app.transfer_manager.run( job.transfer_job ) - self.sa_session.add( job.transfer_job ) - self.sa_session.flush() - return self.job_states.WAIT - else: - log.error( "An error occurred while downloading from %s" % job.params[ 'url' ] ) - return self.job_states.INVALID - elif job.params[ 'type' ] == 'extract_transfer': - return self.job_states.READY - - def get_job_status( self, jobid ): - job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) ) - if 'transfer_job_id' in job.params: - if not hasattr( job, 'transfer_job' ): - job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) - else: - self.sa_session.refresh( job.transfer_job ) - return job - - def run_job( self, job ): - params = job.params - dbkey = params[ 'dbkey' ] - if not hasattr( job, 'transfer_job' ): - job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) - else: - self.sa_session.refresh( job.transfer_job ) - transfer = job.transfer_job - if params[ 'type' ] == 'extract_transfer': - CHUNK_SIZE = 2**20 - destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), job.params[ 'dbkey' ], 'seq' ) - destfile = '%s.fa' % job.params[ 'dbkey' ] - destfilepath = os.path.join( destpath, destfile ) - tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] ) - tmppath = os.path.dirname( os.path.abspath( transfer.path ) ) - if not os.path.exists( destpath ): - os.makedirs( destpath ) - protocol = job.params[ 'protocol' ] - data_type = self._check_compress( transfer.path ) - if data_type is None: - sniffer = Fasta() - if sniffer.sniff( transfer.path ): - data_type = 'fasta' - fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False ) - if data_type in [ 'tar.gzip', 'tar.bzip' ]: - fp = open( transfer.path, 'r' ) - tar = tarfile.open( mode = 'r:*', bufsize = CHUNK_SIZE, fileobj = fp ) - files = tar.getmembers() - for filename in files: - z = tar.extractfile(filename) - while 1: - try: - chunk = z.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - log.error( 'Problem decompressing compressed data' ) - exit() - if not chunk: - break - os.write( fd, chunk ) - os.write( fd, '\n' ) - os.close( fd ) - tar.close() - fp.close() - elif data_type == 'gzip': - compressed = gzip.open( transfer.path, mode = 'rb' ) - while 1: - try: - chunk = compressed.read( CHUNK_SIZE ) - except IOError: - compressed.close() - log.error( 'Problem decompressing compressed data' ) - exit() - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - compressed.close() - elif data_type == 'bzip': - compressed = bz2.BZ2File( transfer.path, mode = 'r' ) - while 1: - try: - chunk = compressed.read( CHUNK_SIZE ) - except IOError: - compressed.close() - log.error( 'Problem decompressing compressed data' ) - exit() - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - compressed.close() - elif data_type == 'zip': - uncompressed_name = None - unzipped = False - z = zipfile.ZipFile( transfer.path ) - z.debug = 3 - for name in z.namelist(): - if name.endswith('/'): - continue - zipped_file = z.open( name ) - while 1: - try: - chunk = zipped_file.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - log.error( 'Problem decompressing zipped data' ) - return self.app.model.DeferredJob.states.INVALID - if not chunk: - break - os.write( fd, chunk ) - zipped_file.close() - os.close( fd ) - z.close() - elif data_type == 'fasta': - uncompressed = transfer.path - else: - job.state = self.app.model.DeferredJob.states.INVALID - log.error( "Unrecognized compression format for file %s." % transfer.path ) - self.sa_session.add( job ) - self.sa_session.flush() - return - shutil.move( uncompressed, destfilepath ) - if os.path.exists( transfer.path ): - os.remove( transfer.path ) - os.chmod( destfilepath, 0644 ) - fastaline = '\t'.join( [ dbkey, dbkey, params[ 'intname' ], os.path.abspath( destfilepath ) ] ) - self._add_line( 'all_fasta', fastaline ) - if params[ 'indexes' ] is not None: - job.state = self.app.model.DeferredJob.states.WAITING - job.params[ 'indexjobs' ] = [] - else: - job.state = self.app.model.DeferredJob.states.OK - job.params[ 'type' ] = 'finish_transfer' - transfer.path = os.path.abspath(destfilepath) - transfer.state = 'done' - self.sa_session.add( job ) - self.sa_session.add( transfer ) - if transfer.state == 'done': - if params[ 'indexes' ] is not None: - for indexer in params[ 'indexes' ]: - incoming = dict(indexer=indexer, dbkey=params[ 'dbkey' ], intname=params[ 'intname' ], path=transfer.path, user=params['user'] ) - deferred = self.tool.execute( self, set_output_hid=False, history=None, incoming=incoming, transfer=transfer, deferred=job ) - job.params[ 'indexjobs' ].append( deferred[0].id ) - else: - job.state = self.app.model.DeferredJob.states.OK - self.sa_session.add( job ) - self.sa_session.flush() - return self.app.model.DeferredJob.states.OK - - def _check_compress( self, filepath ): - retval = '' - if tarfile.is_tarfile( filepath ): - retval = 'tar.' - if check_zip( filepath ): - return 'zip' - is_bzipped, is_valid = check_bz2( filepath ) - if is_bzipped and is_valid: - return retval + 'bzip' - is_gzipped, is_valid = check_gzip( filepath ) - if is_gzipped and is_valid: - return retval + 'gzip' - return None - - def _add_line( self, locfile, newline ): - filepath = self.locations[ locfile ] - origlines = [] - output = [] - comments = [] - with open( filepath, 'r' ) as destfile: - for line in destfile: - if line.startswith( '#' ): - comments.append( line.strip() ) - else: - origlines.append( line.strip() ) - if newline not in origlines: - origlines.append( newline ) - output.extend( comments ) - origlines.sort() - output.extend( origlines ) - with open( filepath, 'w+' ) as destfile: - destfile.write( '\n'.join( output ) ) - diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/deferred/liftover_transfer.py --- a/lib/galaxy/jobs/deferred/liftover_transfer.py +++ /dev/null @@ -1,158 +0,0 @@ -""" -Module for managing genome transfer jobs. -""" -from __future__ import with_statement - -import logging, shutil, gzip, tempfile, sys - -from galaxy import eggs -from sqlalchemy import and_ - -from galaxy.util.odict import odict -from galaxy.workflow.modules import module_factory -from galaxy.jobs.actions.post import ActionBox - -from galaxy.tools.parameters import visit_input_values -from galaxy.tools.parameters.basic import DataToolParameter - -from galaxy.datatypes.checkers import * - -from data_transfer import * - -log = logging.getLogger( __name__ ) - -__all__ = [ 'LiftOverTransferPlugin' ] - -class LiftOverTransferPlugin( DataTransfer ): - - locations = {} - - def __init__( self, app ): - super( LiftOverTransferPlugin, self ).__init__( app ) - self.app = app - self.sa_session = app.model.context.current - - def create_job( self, trans, url, dbkey, from_genome, to_genome, destfile, parentjob ): - job = trans.app.transfer_manager.new( protocol='http', url=url ) - params = dict( user=trans.user.id, transfer_job_id=job.id, protocol='http', - type='init_transfer', dbkey=dbkey, from_genome=from_genome, - to_genome=to_genome, destfile=destfile, parentjob=parentjob ) - deferred = trans.app.model.DeferredJob( state = self.app.model.DeferredJob.states.NEW, plugin = 'LiftOverTransferPlugin', params = params ) - self.sa_session.add( deferred ) - self.sa_session.flush() - return deferred.id - - def check_job( self, job ): - if job.params['type'] == 'init_transfer': - if not hasattr(job, 'transfer_job'): - job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) - else: - self.sa_session.refresh( job.transfer_job ) - if job.transfer_job.state == 'done': - transfer = job.transfer_job - transfer.state = 'downloaded' - job.params['type'] = 'extract_transfer' - self.sa_session.add( job ) - self.sa_session.add( transfer ) - self.sa_session.flush() - return self.job_states.READY - elif job.transfer_job.state == 'running': - return self.job_states.WAIT - elif job.transfer_job.state == 'new': - assert job.params[ 'protocol' ] in [ 'http', 'ftp', 'https' ], 'Unknown protocol %s' % job.params[ 'protocol' ] - ready = True - parent = self.sa_session.query( self.app.model.DeferredJob ).get( int( job.params[ 'parentjob' ] ) ) - if not hasattr( parent, 'transfer_job' ): - parent.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( parent.params[ 'transfer_job_id' ] ) ) - if parent.transfer_job.state not in [ 'ok', 'error', 'done' ]: - ready = False - for lo_job in parent.params[ 'liftover' ]: - liftoverjob = self.sa_session.query( self.app.model.TransferJob ).get( int( lo_job ) ) - if liftoverjob: - if liftoverjob.state not in [ 'ok', 'error', 'new', 'done' ]: - ready = False - if ready: - self.app.transfer_manager.run( job.transfer_job ) - self.sa_session.add( job.transfer_job ) - self.sa_session.flush() - return self.job_states.WAIT - else: - log.error( "An error occurred while downloading from %s" % job.transfer_job.params[ 'url' ] ) - return self.job_states.INVALID - elif job.params[ 'type' ] == 'extract_transfer': - return self.job_states.READY - - def get_job_status( self, jobid ): - job = self.sa_session.query( self.app.model.DeferredJob ).get( int( jobid ) ) - return job - - def run_job( self, job ): - params = job.params - dbkey = params[ 'dbkey' ] - source = params[ 'from_genome' ] - target = params[ 'to_genome' ] - if not hasattr( job, 'transfer_job' ): - job.transfer_job = self.sa_session.query( self.app.model.TransferJob ).get( int( job.params[ 'transfer_job_id' ] ) ) - else: - self.sa_session.refresh( job.transfer_job ) - transfer = job.transfer_job - if params[ 'type' ] == 'extract_transfer': - CHUNK_SIZE = 2**20 - destpath = os.path.join( self.app.config.get( 'genome_data_path', 'tool-data/genome' ), source, 'liftOver' ) - if not os.path.exists( destpath ): - os.makedirs( destpath ) - destfile = job.params[ 'destfile' ] - destfilepath = os.path.join( destpath, destfile ) - tmpprefix = '%s_%s_download_unzip_' % ( job.params['dbkey'], job.params[ 'transfer_job_id' ] ) - tmppath = os.path.dirname( os.path.abspath( transfer.path ) ) - if not os.path.exists( destpath ): - os.makedirs( destpath ) - fd, uncompressed = tempfile.mkstemp( prefix=tmpprefix, dir=tmppath, text=False ) - chain = gzip.open( transfer.path, 'rb' ) - while 1: - try: - chunk = chain.read( CHUNK_SIZE ) - except IOError: - os.close( fd ) - log.error( 'Problem decompressing compressed data' ) - exit() - if not chunk: - break - os.write( fd, chunk ) - os.close( fd ) - chain.close() - # Replace the gzipped file with the decompressed file if it's safe to do so - shutil.move( uncompressed, destfilepath ) - os.remove( transfer.path ) - os.chmod( destfilepath, 0644 ) - locline = '\t'.join( [ source, target, os.path.abspath( destfilepath ) ] ) - self._add_line( locline ) - job.state = self.app.model.DeferredJob.states.OK - job.params[ 'type' ] = 'finish_transfer' - transfer.path = os.path.abspath(destfilepath) - transfer.state = 'done' - parentjob = self.sa_session.query( self.app.model.DeferredJob ).get( int( job.params[ 'parentjob' ] ) ) - finished = True - for i in parentjob.params[ 'liftover' ]: - sibling = self.sa_session.query( self.app.model.DeferredJob ).get( int( i ) ) - if sibling.state not in [ 'done', 'ok', 'error' ]: - finished = False - if finished: - parentjob.state = self.app.model.DeferredJob.states.OK - self.sa_session.add( parentjob ) - self.sa_session.add( job ) - self.sa_session.add( transfer ) - self.sa_session.flush() - return self.app.model.DeferredJob.states.OK - - def _add_line( self, newline ): - filepath = 'tool-data/liftOver.loc' - origlines = [] - with open( filepath, 'r' ) as destfile: - for line in destfile: - origlines.append( line.strip() ) - if newline not in origlines: - origlines.append( newline ) - with open( filepath, 'w+' ) as destfile: - destfile.write( '\n'.join( origlines ) ) - diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/mapper.py --- a/lib/galaxy/jobs/mapper.py +++ b/lib/galaxy/jobs/mapper.py @@ -83,7 +83,7 @@ if "job" in function_arg_names or "user" in function_arg_names or "user_email" in function_arg_names or "resource_params" in function_arg_names: job = self.job_wrapper.get_job() history = job.history - user = history and history.user + user = job.user user_email = user and str(user.email) if "job" in function_arg_names: diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/jobs/metrics/formatting.py --- a/lib/galaxy/jobs/metrics/formatting.py +++ b/lib/galaxy/jobs/metrics/formatting.py @@ -15,4 +15,4 @@ elif value < 3600: return "%s minutes" % ( value / 60 ) else: - return "%s days and %s minutes" % ( value / 3600, ( value % 3600 ) / 60 ) + return "%s hours and %s minutes" % ( value / 3600, ( value % 3600 ) / 60 ) diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/__init__.py --- a/lib/galaxy/managers/__init__.py +++ b/lib/galaxy/managers/__init__.py @@ -1,4 +1,33 @@ -""" 'Business logic' independent of web transactions/user context (trans) -should be pushed into models - but logic that requires the context trans -should be placed under this module. """ +Classes that manage resources (models, tools, etc.) by using the current +Transaction. + +Encapsulates the intersection of trans (or trans.sa_session), models, +and Controllers. + +Responsibilities: + model operations that involve the trans/sa_session (CRUD) + security: + ownership, accessibility + common aspect-oriented operations via new mixins: + sharable, annotatable, tagable, ratable + +Not responsible for: + encoding/decoding ids + any http gobblygook + formatting of returned data (always python structures) + formatting of raised errors + +The goal is to have Controllers only handle: + query-string/payload parsing and encoding/decoding ids + http + return formatting + +and: + control, improve namespacing in Controllers + DRY for Controller ops (define here - use in both UI/API Controllers) + +In other words, 'Business logic' independent of web transactions/user context +(trans) should be pushed into models - but logic that requires the context +trans should be placed under this module. +""" diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/base.py --- /dev/null +++ b/lib/galaxy/managers/base.py @@ -0,0 +1,7 @@ + + +class ModelManager( object ): + pass + +class ModelSerializer( object ): + pass diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/hdas.py --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -1,16 +1,35 @@ +""" +Manager and Serializer for HDAs. + +HistoryDatasetAssociations (HDAs) are datasets contained or created in a +history. +""" + from galaxy import exceptions -from ..managers import histories +from galaxy.managers import base as manager_base +from galaxy.managers import histories as history_manager -class HDAManager( object ): +import galaxy.web +import galaxy.datatypes.metadata +from galaxy import objectstore + +class HDAManager( manager_base.ModelManager ): + """ + Interface/service object for interacting with HDAs. + """ def __init__( self ): - self.histories_mgr = histories.HistoryManager() + """ + Set up and initialize other managers needed by hdas. + """ + self.histories_mgr = history_manager.HistoryManager() def get( self, trans, unencoded_id, check_ownership=True, check_accessible=True ): """ + Get an HDA by its unencoded db id, checking ownership (via its history) + or accessibility (via dataset shares/permissions). """ - # this is a replacement for UsesHistoryDatasetAssociationMixin because mixins are a bad soln/structure hda = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( unencoded_id ) if hda is None: raise exceptions.ObjectNotFound() @@ -19,7 +38,8 @@ def secure( self, trans, hda, check_ownership=True, check_accessible=True ): """ - checks if (a) user owns item or (b) item is accessible to user. + check ownership (via its history) or accessibility (via dataset + shares/permissions). """ # all items are accessible to an admin if trans.user and trans.user_is_admin(): @@ -31,12 +51,18 @@ return hda def can_access_dataset( self, trans, hda ): + """ + Use security agent to see if current user has access to dataset. + """ current_user_roles = trans.get_current_user_roles() return trans.app.security_agent.can_access_dataset( current_user_roles, hda.dataset ) #TODO: is_owner, is_accessible def check_ownership( self, trans, hda ): + """ + Use history to see if current user owns HDA. + """ if not trans.user: #if hda.history == trans.history: # return hda @@ -51,6 +77,9 @@ "HistoryDatasetAssociation is not owned by the current user", type='error' ) def check_accessible( self, trans, hda ): + """ + Raise error if HDA is not accessible. + """ if trans.user and trans.user_is_admin(): return hda # check for access of the containing history... @@ -62,6 +91,151 @@ "HistoryDatasetAssociation is not accessible to the current user", type='error' ) def err_if_uploading( self, trans, hda ): + """ + Raise error if HDA is still uploading. + """ if hda.state == trans.model.Dataset.states.UPLOAD: raise exceptions.Conflict( "Please wait until this dataset finishes uploading" ) return hda + + def get_hda_dict( self, trans, hda ): + """ + Return full details of this HDA in dictionary form. + """ + #precondition: the user's access to this hda has already been checked + #TODO:?? postcondition: all ids are encoded (is this really what we want at this level?) + expose_dataset_path = trans.user_is_admin() or trans.app.config.expose_dataset_path + hda_dict = hda.to_dict( view='element', expose_dataset_path=expose_dataset_path ) + hda_dict[ 'api_type' ] = "file" + + # Add additional attributes that depend on trans must be added here rather than at the model level. + can_access_hda = trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset ) + can_access_hda = ( trans.user_is_admin() or can_access_hda ) + if not can_access_hda: + return self.get_inaccessible_hda_dict( trans, hda ) + hda_dict[ 'accessible' ] = True + + #TODO: I'm unclear as to which access pattern is right + hda_dict[ 'annotation' ] = hda.get_item_annotation_str( trans.sa_session, hda.history.user, hda ) + #annotation = getattr( hda, 'annotation', hda.get_item_annotation_str( trans.sa_session, trans.user, hda ) ) + + # ---- return here if deleted AND purged OR can't access + purged = ( hda.purged or hda.dataset.purged ) + if ( hda.deleted and purged ): + #TODO: to_dict should really go AFTER this - only summary data + return trans.security.encode_dict_ids( hda_dict ) + + if expose_dataset_path: + try: + hda_dict[ 'file_name' ] = hda.file_name + except objectstore.ObjectNotFound: + log.exception( 'objectstore.ObjectNotFound, HDA %s.', hda.id ) + + hda_dict[ 'download_url' ] = galaxy.web.url_for( 'history_contents_display', + history_id = trans.security.encode_id( hda.history.id ), + history_content_id = trans.security.encode_id( hda.id ) ) + + # indeces, assoc. metadata files, etc. + meta_files = [] + for meta_type in hda.metadata.spec.keys(): + if isinstance( hda.metadata.spec[ meta_type ].param, galaxy.datatypes.metadata.FileParameter ): + meta_files.append( dict( file_type=meta_type ) ) + if meta_files: + hda_dict[ 'meta_files' ] = meta_files + + # currently, the viz reg is optional - handle on/off + if trans.app.visualizations_registry: + hda_dict[ 'visualizations' ] = trans.app.visualizations_registry.get_visualizations( trans, hda ) + else: + hda_dict[ 'visualizations' ] = hda.get_visualizations() + #TODO: it may also be wiser to remove from here and add as API call that loads the visualizations + # when the visualizations button is clicked (instead of preloading/pre-checking) + + # ---- return here if deleted + if hda.deleted and not purged: + return trans.security.encode_dict_ids( hda_dict ) + + return trans.security.encode_dict_ids( hda_dict ) + + def get_inaccessible_hda_dict( self, trans, hda ): + """ + Return truncated serialization of HDA when inaccessible to user. + """ + return trans.security.encode_dict_ids({ + 'id' : hda.id, + 'history_id': hda.history.id, + 'hid' : hda.hid, + 'name' : hda.name, + 'state' : hda.state, + 'deleted' : hda.deleted, + 'visible' : hda.visible, + 'accessible': False + }) + + def get_hda_dict_with_error( self, trans, hda=None, history_id=None, id=None, error_msg='Error' ): + """ + Return truncated serialization of HDA when error raised getting + details. + """ + return trans.security.encode_dict_ids({ + 'id' : hda.id if hda else id, + 'history_id': hda.history.id if hda else history_id, + 'hid' : hda.hid if hda else '(unknown)', + 'name' : hda.name if hda else '(unknown)', + 'error' : error_msg, + 'state' : trans.model.Dataset.states.NEW + }) + + def get_display_apps( self, trans, hda ): + """ + Return dictionary containing new-style display app urls. + """ + display_apps = [] + for display_app in hda.get_display_applications( trans ).itervalues(): + + app_links = [] + for link_app in display_app.links.itervalues(): + app_links.append({ + 'target': link_app.url.get( 'target_frame', '_blank' ), + 'href' : link_app.get_display_url( hda, trans ), + 'text' : gettext( link_app.name ) + }) + if app_links: + display_apps.append( dict( label=display_app.name, links=app_links ) ) + + return display_apps + + def get_old_display_applications( self, trans, hda ): + """ + Return dictionary containing old-style display app urls. + """ + display_apps = [] + if not trans.app.config.enable_old_display_applications: + return display_apps + + for display_app in hda.datatype.get_display_types(): + target_frame, display_links = hda.datatype.get_display_links( hda, + display_app, trans.app, trans.request.base ) + + if len( display_links ) > 0: + display_label = hda.datatype.get_display_label( display_app ) + + app_links = [] + for display_name, display_link in display_links: + app_links.append({ + 'target': target_frame, + 'href' : display_link, + 'text' : gettext( display_name ) + }) + if app_links: + display_apps.append( dict( label=display_label, links=app_links ) ) + + return display_apps + + +# ============================================================================= +class HistorySerializer( manager_base.ModelSerializer ): + """ + Interface/service object for serializing HDAs into dictionaries. + """ + pass diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/managers/histories.py --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -1,9 +1,32 @@ +""" +Manager and Serializer for histories. + +Histories are containers for datasets or dataset collections +created (or copied) by users over the course of an analysis. +""" + from galaxy import exceptions from galaxy.model import orm +from galaxy.managers import base as manager_base +import galaxy.managers.hdas -class HistoryManager( object ): +import galaxy.web +import galaxy.dataset_collections.util + +import logging +log = logging.getLogger( __name__ ) + + +# ============================================================================= +class HistoryManager( manager_base.ModelManager ): + """ + Interface/service object for interacting with HDAs. + """ + #TODO: all the following would be more useful if passed the user instead of defaulting to trans.user + def __init__( self, *args, **kwargs ): + super( HistoryManager, self ).__init__( *args, **kwargs ) def get( self, trans, unencoded_id, check_ownership=True, check_accessible=True, deleted=None ): """ @@ -44,7 +67,7 @@ def secure( self, trans, history, check_ownership=True, check_accessible=True ): """ - checks if (a) user owns item or (b) item is accessible to user. + Checks if (a) user owns item or (b) item is accessible to user. """ # all items are accessible to an admin if trans.user and trans.user_is_admin(): @@ -56,15 +79,28 @@ return history def is_current( self, trans, history ): + """ + True if the given history is the user's current history. + + Returns False if the session has no current history. + """ + if trans.history is None: + return False return trans.history == history def is_owner( self, trans, history ): + """ + True if the current user is the owner of the given history. + """ # anon users are only allowed to view their current history if not trans.user: return self.is_current( trans, history ) return trans.user == history.user def check_ownership( self, trans, history ): + """ + Raises error if the current user is not the owner of the history. + """ if trans.user and trans.user_is_admin(): return history if not trans.user and not self.is_current( trans, history ): @@ -74,6 +110,9 @@ raise exceptions.ItemOwnershipException( "History is not owned by the current user", type='error' ) def is_accessible( self, trans, history ): + """ + True if the user can access (read) the current history. + """ # admin always have access if trans.user and trans.user_is_admin(): return True @@ -88,6 +127,103 @@ return False def check_accessible( self, trans, history ): + """ + Raises error if the current user can't access the history. + """ if self.is_accessible( trans, history ): return history raise exceptions.ItemAccessibilityException( "History is not accessible to the current user", type='error' ) + + #TODO: bleh... + def _get_history_data( self, trans, history ): + """ + Returns a dictionary containing ``history`` and ``contents``, serialized + history and an array of serialized history contents respectively. + """ + hda_mgr = galaxy.managers.hdas.HDAManager() + collection_dictifier = galaxy.dataset_collections.util.dictify_dataset_collection_instance + + history_dictionary = {} + contents_dictionaries = [] + try: + #for content in history.contents_iter( **contents_kwds ): + for content in history.contents_iter( types=[ 'dataset', 'dataset_collection' ] ): + hda_dict = {} + + if isinstance( content, trans.app.model.HistoryDatasetAssociation ): + try: + hda_dict = hda_mgr.get_hda_dict( trans, content ) + except Exception, exc: + # don't fail entire list if hda err's, record and move on + log.exception( 'Error bootstrapping hda: %s', exc ) + hda_dict = hda_mgr.get_hda_dict_with_error( trans, content, str( exc ) ) + + elif isinstance( content, trans.app.model.HistoryDatasetCollectionAssociation ): + try: + service = trans.app.dataset_collections_service + dataset_collection_instance = service.get_dataset_collection_instance( + trans=trans, + instance_type='history', + id=trans.security.encode_id( content.id ), + ) + hda_dict = collection_dictifier( dataset_collection_instance, + security=trans.security, parent=dataset_collection_instance.history, view="element" ) + + except Exception, exc: + log.exception( "Error in history API at listing dataset collection: %s", exc ) + #TODO: return some dict with the error + + contents_dictionaries.append( hda_dict ) + + # re-use the hdas above to get the history data... + history_dictionary = self.get_history_dict( trans, history, contents_dictionaries=contents_dictionaries ) + + except Exception, exc: + user_id = str( trans.user.id ) if trans.user else '(anonymous)' + log.exception( 'Error bootstrapping history for user %s: %s', user_id, str( exc ) ) + message = ( 'An error occurred getting the history data from the server. ' + + 'Please contact a Galaxy administrator if the problem persists.' ) + history_dictionary[ 'error' ] = message + + return { + 'history' : history_dictionary, + 'contents' : contents_dictionaries + } + + def get_history_dict( self, trans, history, contents_dictionaries=None ): + """ + Returns history data in the form of a dictionary. + """ + #TODO: to serializer + history_dict = history.to_dict( view='element', value_mapper={ 'id':trans.security.encode_id }) + history_dict[ 'user_id' ] = None + if history.user_id: + history_dict[ 'user_id' ] = trans.security.encode_id( history.user_id ) + + history_dict[ 'nice_size' ] = history.get_disk_size( nice_size=True ) + history_dict[ 'annotation' ] = history.get_item_annotation_str( trans.sa_session, history.user, history ) + if not history_dict[ 'annotation' ]: + history_dict[ 'annotation' ] = '' + + #TODO: item_slug url + if history_dict[ 'importable' ] and history_dict[ 'slug' ]: + username_and_slug = ( '/' ).join(( 'u', history.user.username, 'h', history_dict[ 'slug' ] )) + history_dict[ 'username_and_slug' ] = username_and_slug + +#TODO: re-add + #hda_summaries = hda_dictionaries if hda_dictionaries else self.get_hda_summary_dicts( trans, history ) + ##TODO remove the following in v2 + #( state_counts, state_ids ) = self._get_hda_state_summaries( trans, hda_summaries ) + #history_dict[ 'state_details' ] = state_counts + #history_dict[ 'state_ids' ] = state_ids + #history_dict[ 'state' ] = self._get_history_state_from_hdas( trans, history, state_counts ) + + return history_dict + + +# ============================================================================= +class HistorySerializer( manager_base.ModelSerializer ): + """ + Interface/service object for serializing histories into dictionaries. + """ + pass diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py --- a/lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py +++ b/lib/galaxy/model/migrate/versions/0104_update_genome_downloader_job_parameters.py @@ -23,6 +23,8 @@ handler.setFormatter( formatter ) log.addHandler( handler ) +metadata = MetaData() +context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) ) class DeferredJob( object ): states = Bunch( NEW = 'new', @@ -37,12 +39,8 @@ self.params = params def upgrade(migrate_engine): - metadata = MetaData() metadata.bind = migrate_engine - Session = sessionmaker( bind=migrate_engine) - context = Session() - DeferredJob.table = Table( "deferred_job", metadata, Column( "id", Integer, primary_key=True ), Column( "create_time", DateTime, default=now ), @@ -70,12 +68,8 @@ context.flush() def downgrade(migrate_engine): - metadata = MetaData() metadata.bind = migrate_engine - Session = sessionmaker( bind=migrate_engine) - context = Session() - jobs = context.query( DeferredJob ).filter_by( plugin='GenomeTransferPlugin' ).all() for job in jobs: diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3213,7 +3213,9 @@ # Populate tool_type to ToolClass mappings tool_types = {} -for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool, DataManagerTool ]: +for tool_class in [ Tool, SetMetadataTool, OutputParameterJSONTool, + DataManagerTool, DataSourceTool, AsyncDataSourceTool, + DataDestinationTool ]: tool_types[ tool_class.tool_type ] = tool_class diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/actions/index_genome.py --- a/lib/galaxy/tools/actions/index_genome.py +++ /dev/null @@ -1,67 +0,0 @@ -import tempfile -from __init__ import ToolAction -from galaxy.util.odict import odict -from galaxy.tools.genome_index import * - -import logging -log = logging.getLogger( __name__ ) - -class GenomeIndexToolAction( ToolAction ): - """Tool action used for exporting a history to an archive. """ - - def execute( self, tool, trans, *args, **kwargs ): - # - # Get genome to index. - # - incoming = kwargs['incoming'] - # - # Create the job and output dataset objects - # - job = trans.app.model.Job() - job.tool_id = tool.id - job.user_id = incoming['user'] - start_job_state = job.state # should be job.states.NEW - job.state = job.states.WAITING # we need to set job state to something other than NEW, - # or else when tracking jobs in db it will be picked up - # before we have added input / output parameters - trans.sa_session.add( job ) - - # Create dataset that will serve as archive. - temp_dataset = trans.app.model.Dataset( state=trans.app.model.Dataset.states.NEW ) - trans.sa_session.add( temp_dataset ) - - trans.sa_session.flush() # ensure job.id and archive_dataset.id are available - trans.app.object_store.create( temp_dataset ) # set the object store id, create dataset (because galaxy likes having datasets) - - # - # Setup job and job wrapper. - # - - # Add association for keeping track of index jobs, transfer jobs, and so on. - user = trans.sa_session.query( trans.app.model.User ).get( int( incoming['user'] ) ) - assoc = trans.app.model.GenomeIndexToolData( job=job, dataset=temp_dataset, fasta_path=incoming['path'], \ - indexer=incoming['indexer'], user=user, \ - deferred_job=kwargs['deferred'], transfer_job=kwargs['transfer'] ) - trans.sa_session.add( assoc ) - - job_wrapper = GenomeIndexToolWrapper( job ) - cmd_line = job_wrapper.setup_job( assoc ) - - # - # Add parameters to job_parameter table. - # - incoming[ '__GENOME_INDEX_COMMAND__' ] = cmd_line - for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): - job.add_parameter( name, value ) - - job.state = start_job_state # job inputs have been configured, restore initial job state - job.set_handler(tool.get_job_handler(None)) - trans.sa_session.flush() - - - # Queue the job for execution - trans.app.job_queue.put( job.id, tool.id ) - log.info( "Added genome index job to the job queue, id: %s" % str( job.id ) ) - - return job, odict() - diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/deps/containers.py --- a/lib/galaxy/tools/deps/containers.py +++ b/lib/galaxy/tools/deps/containers.py @@ -191,19 +191,26 @@ # TODO: Remove redundant volumes... volumes = docker_util.DockerVolume.volumes_from_str(volumes_raw) volumes_from = self.destination_info.get("docker_volumes_from", docker_util.DEFAULT_VOLUMES_FROM) - return docker_util.build_docker_run_command( + + docker_host_props = dict( + docker_cmd=prop("cmd", docker_util.DEFAULT_DOCKER_COMMAND), + sudo=asbool(prop("sudo", docker_util.DEFAULT_SUDO)), + sudo_cmd=prop("sudo_cmd", docker_util.DEFAULT_SUDO_COMMAND), + host=prop("host", docker_util.DEFAULT_HOST), + ) + + cache_command = docker_util.build_docker_cache_command(self.container_id, **docker_host_props) + run_command = docker_util.build_docker_run_command( command, self.container_id, volumes=volumes, volumes_from=volumes_from, env_directives=env_directives, working_directory=working_directory, - docker_cmd=prop("cmd", docker_util.DEFAULT_DOCKER_COMMAND), - sudo=asbool(prop("sudo", docker_util.DEFAULT_SUDO)), - sudo_cmd=prop("sudo_cmd", docker_util.DEFAULT_SUDO_COMMAND), - host=prop("host", docker_util.DEFAULT_HOST), - net=prop("net", "none") # By default, docker instance has networking disabled + net=prop("net", "none"), # By default, docker instance has networking disabled + **docker_host_props ) + return "%s\n%s" % (cache_command, run_command) def __expand_str(self, value): if not value: diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/deps/docker_util.py --- a/lib/galaxy/tools/deps/docker_util.py +++ b/lib/galaxy/tools/deps/docker_util.py @@ -50,28 +50,41 @@ return ":".join([self.from_path, self.to_path, self.how]) +def build_docker_cache_command( + image, + docker_cmd=DEFAULT_DOCKER_COMMAND, + sudo=DEFAULT_SUDO, + sudo_cmd=DEFAULT_SUDO_COMMAND, + host=DEFAULT_HOST, +): + inspect_command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host) + inspect_command_parts.extend(["inspect", image]) + inspect_image_command = " ".join(inspect_command_parts) + + pull_command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host) + pull_command_parts.extend(["pull", image]) + pull_image_command = " ".join(pull_command_parts) + cache_command = "%s > /dev/null 2>&1\n[ $? -ne 0 ] && %s > /dev/null 2>&1\n" % (inspect_image_command, pull_image_command) + return cache_command + + def build_docker_run_command( container_command, image, tag=None, - docker_cmd=DEFAULT_DOCKER_COMMAND, volumes=[], volumes_from=DEFAULT_VOLUMES_FROM, memory=DEFAULT_MEMORY, env_directives=[], working_directory=DEFAULT_WORKING_DIRECTORY, + name=None, + net=DEFAULT_NET, + docker_cmd=DEFAULT_DOCKER_COMMAND, sudo=DEFAULT_SUDO, sudo_cmd=DEFAULT_SUDO_COMMAND, - name=None, host=DEFAULT_HOST, - net=DEFAULT_NET, ): - command_parts = [] - if sudo: - command_parts.append(sudo_cmd) - command_parts.append(docker_cmd) - if host: - command_parts.append(["-H", host]) + command_parts = __docker_prefix(docker_cmd, sudo, sudo_cmd, host) command_parts.append("run") for env_directive in env_directives: command_parts.extend(["-e", env_directive]) @@ -93,3 +106,15 @@ command_parts.append(full_image) command_parts.append(container_command) return " ".join(command_parts) + + +def __docker_prefix(docker_cmd, sudo, sudo_cmd, host): + """ Prefix to issue a docker command. + """ + command_parts = [] + if sudo: + command_parts.append(sudo_cmd) + command_parts.append(docker_cmd) + if host: + command_parts.append(["-H", host]) + return command_parts diff -r 47c6c6bb8eec81060cbf2ddb2396c19e78444747 -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b lib/galaxy/tools/genome_index/__init__.py --- a/lib/galaxy/tools/genome_index/__init__.py +++ /dev/null @@ -1,243 +0,0 @@ -from __future__ import with_statement - -import json -import logging -import os -import shutil -import tarfile -import tempfile - -from galaxy import model, util -from galaxy.web.framework.helpers import to_unicode -from galaxy.model.item_attrs import UsesAnnotations -from galaxy.util.json import * -from galaxy.web.base.controller import UsesHistoryMixin -from galaxy.tools.data import ToolDataTableManager - - -log = logging.getLogger(__name__) - -def load_genome_index_tools( toolbox ): - """ Adds tools for indexing genomes via the main job runner. """ - # Create XML for loading the tool. - tool_xml_text = """ - <tool id="__GENOME_INDEX__" name="Index Genome" version="0.1" tool_type="genome_index"> - <type class="GenomeIndexTool" module="galaxy.tools"/> - <action module="galaxy.tools.actions.index_genome" class="GenomeIndexToolAction"/> - <command>$__GENOME_INDEX_COMMAND__ $output_file $output_file.files_path "$__app__.config.rsync_url" "$__app__.config.tool_data_path"</command> - <inputs> - <param name="__GENOME_INDEX_COMMAND__" type="hidden"/> - </inputs> - <outputs> - <data format="txt" name="output_file"/> - </outputs> - <stdio> - <exit_code range="1:" err_level="fatal" /> - </stdio> - </tool> - """ - - # Load index tool. - tmp_name = tempfile.NamedTemporaryFile() - tmp_name.write( tool_xml_text ) - tmp_name.flush() - genome_index_tool = toolbox.load_tool( tmp_name.name ) - toolbox.tools_by_id[ genome_index_tool.id ] = genome_index_tool - log.debug( "Loaded genome index tool: %s", genome_index_tool.id ) - -class GenomeIndexToolWrapper( object ): - """ Provides support for performing jobs that index a genome. """ - def __init__( self, job_id ): - self.locations = dict() - self.job_id = job_id - - def setup_job( self, genobj ): - """ Perform setup for job to index a genome and return an archive. Method generates - attribute files, sets the corresponding attributes in the associated database - object, and returns a command line for running the job. The command line - includes the command, inputs, and options; it does not include the output - file because it must be set at runtime. """ - - # - # Create and return command line for running tool. - # - scriptpath = os.path.join( os.path.abspath( os.getcwd() ), "lib/galaxy/tools/genome_index/index_genome.py" ) - return "python %s %s %s" % ( scriptpath, genobj.indexer, genobj.fasta_path ) - - def postprocessing( self, sa_session, app ): - """ Finish the job, move the finished indexes to their final resting place, - and update the .loc files where applicable. """ - gitd = sa_session.query( model.GenomeIndexToolData ).filter_by( job_id=self.job_id ).first() - indexdirs = dict( bfast='bfast_index', bowtie='bowtie_index', bowtie2='bowtie2_index', - bwa='bwa_index', perm='perm_%s_index', picard='srma_index', sam='sam_index' ) - - - if gitd: - fp = open( gitd.dataset.get_file_name(), 'r' ) - deferred = sa_session.query( model.DeferredJob ).filter_by( id=gitd.deferred_job_id ).first() - try: - logloc = json.load( fp ) - except ValueError: - deferred.state = app.model.DeferredJob.states.ERROR - sa_session.add( deferred ) - sa_session.flush() - log.debug( 'Indexing job failed, setting deferred job state to error.' ) - return False - finally: - fp.close() - destination = None - tdtman = ToolDataTableManager( app.config.tool_data_path ) - xmltree = tdtman.load_from_config_file( app.config.tool_data_table_config_path, app.config.tool_data_path ) - for node in xmltree: - table = node.get('name') - location = node.findall('file')[0].get('path') - self.locations[table] = os.path.abspath( location ) - locbase = os.path.abspath( os.path.split( self.locations['all_fasta'] )[0] ) - params = deferred.params - dbkey = params[ 'dbkey' ] - basepath = os.path.join( os.path.abspath( app.config.genome_data_path ), dbkey ) - intname = params[ 'intname' ] - indexer = gitd.indexer - workingdir = os.path.abspath( gitd.dataset.extra_files_path ) - location = [] - indexdata = gitd.dataset.extra_files_path - if indexer == '2bit': - indexdata = os.path.join( workingdir, '%s.2bit' % dbkey ) - destination = os.path.join( basepath, 'seq', '%s.2bit' % dbkey ) - location.append( dict( line='\t'.join( [ 'seq', dbkey, destination ] ), file= os.path.join( locbase, 'alignseq.loc' ) ) ) - elif indexer == 'bowtie': - self._ex_tar( workingdir, 'cs.tar' ) - destination = os.path.join( basepath, 'bowtie_index' ) - for var in [ 'nt', 'cs' ]: - for line in logloc[ var ]: - idx = line - if var == 'nt': - locfile = self.locations[ 'bowtie_indexes' ] - locdir = os.path.join( destination, idx ) - else: - locfile = self.locations[ 'bowtie_indexes_color' ] - locdir = os.path.join( destination, var, idx ) - location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) - elif indexer == 'bowtie2': - destination = os.path.join( basepath, 'bowtie2_index' ) - for line in logloc[ 'nt' ]: - idx = line - locfile = self.locations[ 'bowtie2_indexes' ] - locdir = os.path.join( destination, idx ) - location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) - elif indexer == 'bwa': - self._ex_tar( workingdir, 'cs.tar' ) - destination = os.path.join( basepath, 'bwa_index' ) - for var in [ 'nt', 'cs' ]: - for line in logloc[ var ]: - idx = line - if var == 'nt': - locfile = self.locations[ 'bwa_indexes' ] - locdir = os.path.join( destination, idx ) - else: - locfile = self.locations[ 'bwa_indexes_color' ] - locdir = os.path.join( destination, var, idx ) - location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) - elif indexer == 'perm': - self._ex_tar( workingdir, 'cs.tar' ) - destination = os.path.join( basepath, 'perm_index' ) - for var in [ 'nt', 'cs' ]: - for line in logloc[ var ]: - idx = line.pop() - if var == 'nt': - locfile = self.locations[ 'perm_base_indexes' ] - locdir = os.path.join( destination, idx ) - else: - locfile = self.locations[ 'perm_color_indexes' ] - locdir = os.path.join( destination, var, idx ) - line.append( locdir ) - location.append( dict( line='\t'.join( line ), file=locfile ) ) - elif indexer == 'picard': - destination = os.path.join( basepath, 'srma_index' ) - for var in [ 'nt' ]: - for line in logloc[ var ]: - idx = line - locfile = self.locations[ 'picard_indexes' ] - locdir = os.path.join( destination, idx ) - location.append( dict( line='\t'.join( [ dbkey, dbkey, intname, locdir ] ), file=locfile ) ) - elif indexer == 'sam': - destination = os.path.join( basepath, 'sam_index' ) - for var in [ 'nt' ]: - for line in logloc[ var ]: - locfile = self.locations[ 'sam_fa_indexes' ] - locdir = os.path.join( destination, line ) - location.append( dict( line='\t'.join( [ 'index', dbkey, locdir ] ), file=locfile ) ) - - if destination is not None and os.path.exists( os.path.split( destination )[0] ) and not os.path.exists( destination ): - log.debug( 'Moving %s to %s' % ( indexdata, destination ) ) - shutil.move( indexdata, destination ) - if indexer not in [ '2bit' ]: - genome = '%s.fa' % dbkey - target = os.path.join( destination, genome ) - fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) ) - self._check_link( fasta, target ) - if os.path.exists( os.path.join( destination, 'cs' ) ): - target = os.path.join( destination, 'cs', genome ) - fasta = os.path.abspath( os.path.join( basepath, 'seq', genome ) ) - self._check_link( fasta, target ) - for line in location: - self._add_line( line[ 'file' ], line[ 'line' ] ) - deferred.state = app.model.DeferredJob.states.OK - sa_session.add( deferred ) - sa_session.flush() - - - def _check_link( self, targetfile, symlink ): - target = os.path.relpath( targetfile, os.path.dirname( symlink ) ) - filename = os.path.basename( targetfile ) - if not os.path.exists( targetfile ): # this should never happen. - raise Exception, "%s not found. Unable to proceed without a FASTA file. Aborting." % targetfile - if os.path.exists( symlink ) and os.path.islink( symlink ): - if os.path.realpath( symlink ) == os.path.abspath( targetfile ): # symlink exists, points to the correct FASTA file. - return - else: # no it doesn't. Make a new one, and this time do it right. - os.remove( symlink ) - os.symlink( target, symlink ) - return - elif not os.path.exists( symlink ): # no symlink to the FASTA file. Create one. - os.symlink( target, symlink ) - return - elif os.path.exists( symlink ) and not os.path.islink( symlink ): - if self._hash_file( targetfile ) == self._hash_file( symlink ): # files are identical. No need to panic. - return - else: - if os.path.getsize( symlink ) == 0: # somehow an empty file got copied instead of the symlink. Delete with extreme prejudice. - os.remove( symlink ) - os.symlink( target, symlink ) - return - else: - raise Exception, "Regular file %s exists, is not empty, contents do not match %s." % ( symlink, targetfile ) - - def _hash_file( self, filename ): - import hashlib - md5 = hashlib.md5() - with open( filename, 'rb' ) as f: - for chunk in iter( lambda: f.read( 8192 ), '' ): - md5.update( chunk ) - return md5.digest() - - - def _ex_tar( self, directory, filename ): - fh = tarfile.open( os.path.join( directory, filename ) ) - fh.extractall( path=directory ) - fh.close() - os.remove( os.path.join( directory, filename ) ) - - def _add_line( self, locfile, newline ): - filepath = locfile - origlines = [] - output = [] - comments = [] - with open( filepath, 'r' ) as destfile: - for line in destfile: - origlines.append( line.strip() ) - if newline not in origlines: - origlines.append( newline ) - with open( filepath, 'w+' ) as destfile: - origlines.append( '' ) - destfile.write( '\n'.join( origlines ) ) This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/4ca1677c8d5b/ Changeset: 4ca1677c8d5b User: iracooke Date: 2014-07-24 09:16:24 Summary: Added whitelisting to sqlite data provider Affected #: 1 file diff -r 034c0159c0cfbc7736eeee4132fbedda8a465c6b -r 4ca1677c8d5bc6486b0b1dff50372a23dfaf307d lib/galaxy/datatypes/dataproviders/dataset.py --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -12,6 +12,7 @@ import column import external import sqlite3 +import re from galaxy import eggs eggs.require( 'bx-python' ) @@ -720,8 +721,18 @@ self.connection.row_factory = sqlite3.Row super( SQliteDataProvider, self ).__init__( source, **kwargs ) + def query_matches_whitelist(self,query): + if re.match("select ",query,re.IGNORECASE): + if re.search("^([^\"]|\"[^\"]*\")*?;",query) or re.search("^([^\']|\'[^\']*\')*?;",query): + return False + else: + return True + return False + + + def __iter__( self ): - if self.query is not None: + if (self.query is not None) and self.query_matches_whitelist(self.query): for row in self.connection.cursor().execute(self.query): yield row else: https://bitbucket.org/galaxy/galaxy-central/commits/56a7b27577de/ Changeset: 56a7b27577de User: dannon Date: 2014-07-25 00:29:57 Summary: Merged in iracooke/galaxy-central (pull request #434) Add sqlite datatype and corresponding dataprovider Affected #: 3 files diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -177,6 +177,7 @@ <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/><datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" description="Any data in tab delimited format (tabular)." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Tabular_.28tab_delimited.29"/><datatype extension="twobit" type="galaxy.datatypes.binary:TwoBit" mimetype="application/octet-stream" display_in_upload="true"/> + <datatype extension="sqlite" type="galaxy.datatypes.binary:SQlite" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true" description="Any text file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Plain_text"/><datatype extension="linecount" type="galaxy.datatypes.data:LineCount" display_in_upload="false"/><datatype extension="memexml" type="galaxy.datatypes.xml:MEMEXml" mimetype="application/xml" display_in_upload="true"/> @@ -262,6 +263,7 @@ --><sniffer type="galaxy.datatypes.tabular:Vcf"/><sniffer type="galaxy.datatypes.binary:TwoBit"/> + <sniffer type="galaxy.datatypes.binary:SQlite"/><sniffer type="galaxy.datatypes.binary:Bam"/><sniffer type="galaxy.datatypes.binary:Sff"/><sniffer type="galaxy.datatypes.xml:Phyloxml"/> diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -12,6 +12,7 @@ import subprocess import tempfile import zipfile +import sqlite3 from urllib import urlencode, quote_plus from galaxy import eggs @@ -545,3 +546,45 @@ return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size())) Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit) + + +@dataproviders.decorators.has_dataproviders +class SQlite ( Binary ): + file_ext = "sqlite" + + # Connects and runs a query that should work on any real database + # If the file is not sqlite, an exception will be thrown and the sniffer will return false + def sniff( self, filename ): + try: + conn = sqlite3.connect(filename) + schema_version=conn.cursor().execute("pragma schema_version").fetchone() + conn.close() + if schema_version is not None: + return True + return False + except: + return False + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "SQLite Database" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "SQLite Database (%s)" % ( data.nice_size( dataset.get_size() ) ) + + + @dataproviders.decorators.dataprovider_factory( 'sqlite', dataproviders.dataset.SQliteDataProvider.settings ) + def sqlite_dataprovider( self, dataset, **settings ): + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.dataset.SQliteDataProvider( dataset_source, **settings ) + + +Binary.register_sniffable_binary_format("sqlite","sqlite",SQlite) + diff -r f03f9c5a5efc7fecb4ca131f66c441d55cc5d353 -r 56a7b27577dec0dac17ef94d4c131e7f1f61cb15 lib/galaxy/datatypes/dataproviders/dataset.py --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -11,6 +11,8 @@ import line import column import external +import sqlite3 +import re from galaxy import eggs eggs.require( 'bx-python' ) @@ -700,3 +702,39 @@ #TODO: as samtools - need more info on output format raise NotImplementedError() super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs ) + + + +class SQliteDataProvider ( base.DataProvider ): + """ + Data provider that uses a sqlite database file as its source. + + Allows any query to be run and returns the resulting rows as sqlite3 row objects + """ + settings = { + 'query' : 'str' + } + + def __init__( self, source, query=None, **kwargs ): + self.query=query + self.connection = sqlite3.connect(source.dataset.file_name); + self.connection.row_factory = sqlite3.Row + super( SQliteDataProvider, self ).__init__( source, **kwargs ) + + def query_matches_whitelist(self,query): + if re.match("select ",query,re.IGNORECASE): + if re.search("^([^\"]|\"[^\"]*\")*?;",query) or re.search("^([^\']|\'[^\']*\')*?;",query): + return False + else: + return True + return False + + + + def __iter__( self ): + if (self.query is not None) and self.query_matches_whitelist(self.query): + for row in self.connection.cursor().execute(self.query): + yield row + else: + yield + Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org