[hg] galaxy 1568: Add a new metadata type of Metadata Files.
details: http://www.bx.psu.edu/hg/galaxy/rev/0b022adfdc34 changeset: 1568:0b022adfdc34 user: Dan Blankenberg <dan@bx.psu.edu> date: Wed Oct 22 13:49:22 2008 -0400 description: Add a new metadata type of Metadata Files. These are now used to store the list of chromosomes for species as well as the index for MAF files. MAF tools have been enhanced to make use of index files when available. TODO: When datasets are purged from disk, these files should also be purged. 23 file(s) affected in this change: lib/galaxy/datatypes/data.py lib/galaxy/datatypes/metadata.py lib/galaxy/datatypes/sequence.py lib/galaxy/jobs/__init__.py lib/galaxy/model/__init__.py lib/galaxy/model/mapping.py lib/galaxy/tools/actions/upload.py lib/galaxy/tools/parameters/__init__.py lib/galaxy/tools/parameters/basic.py lib/galaxy/tools/parameters/validation.py lib/galaxy/tools/util/maf_utilities.py lib/galaxy/util/__init__.py templates/dataset/edit_attributes.mako tools/data_source/data_source.py tools/maf/genebed_maf_to_fasta.xml tools/maf/interval2maf.py tools/maf/interval2maf.xml tools/maf/interval_maf_to_merged_fasta.py tools/maf/interval_maf_to_merged_fasta.xml tools/maf/maf_stats.py tools/maf/maf_stats.xml tools/stats/filtering.py tools/visualization/GMAJ.xml diffs (672 lines): diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/datatypes/data.py Wed Oct 22 13:49:22 2008 -0400 @@ -9,7 +9,6 @@ # Valid first column and strand column values vor bed, other formats col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho'] valid_strand = ['+', '-', '.'] -gzip_magic = '\037\213' class DataMeta( type ): """ @@ -86,10 +85,19 @@ def set_readonly_meta( self, dataset ): """Unimplemented method, resets the readonly metadata values""" return True - def missing_meta( self, dataset ): - """Checks for empty metadata values, Returns True if non-optional metadata is missing""" - for key, value in dataset.metadata.items(): - if dataset.metadata.spec[key].get("optional"): continue #we skip check for optional values here + def missing_meta( self, dataset, check = [], skip = [] ): + """ + Checks for empty metadata values, Returns True if non-optional metadata is missing + Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored + Specifying a list of 'skip' items will return True even when a named metadata value is missing + """ + if check: + to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ] + else: + to_check = dataset.metadata.items() + for key, value in to_check: + if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ): + continue #we skip check for optional and nonrequested values here if not value: return True return False @@ -328,7 +336,7 @@ line = line[ :WIDTH ] if not data_checked and line: data_checked = True - if line[0:2] == gzip_magic: + if line[0:2] == util.gzip_magic: file_type = 'gzipped' break else: diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/datatypes/metadata.py Wed Oct 22 13:49:22 2008 -0400 @@ -1,8 +1,9 @@ -import sys, logging +import sys, logging, copy, shutil from galaxy.util import string_as_bool from galaxy.util.odict import odict from galaxy.web import form_builder +import galaxy.model log = logging.getLogger( __name__ ) @@ -75,7 +76,13 @@ def get_html_by_name( self, name, **kwd ): if name in self.spec: return self.spec[name].param.get_html( value=getattr( self, name ), context=self, **kwd ) - + def make_dict_copy( self, to_copy ): + """Makes a deep copy of input iterable to_copy according to self.spec""" + rval = {} + for key, value in to_copy.items(): + if key in self.spec: + rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy ) + return rval class MetadataSpecCollection( odict ): """ @@ -121,7 +128,10 @@ def to_string( self, value ): return str( value ) - + + def make_copy( self, value, target_context = None, source_context = None ): + return copy.deepcopy( value ) + @classmethod def marshal ( cls, value ): """ @@ -150,7 +160,6 @@ Turns a value into its usable form. """ return value - class MetadataElementSpec( object ): """ @@ -280,16 +289,14 @@ return ",".join( map( str, value ) ) class PythonObjectParameter( MetadataParameter ): - def __init__( self, spec ): - MetadataParameter.__init__( self, spec ) def to_string( self, value ): if not value: - return self.spec.to_string( self.spec.no_value ) - return self.spec.to_string( value ) + return self.spec._to_string( self.spec.no_value ) + return self.spec._to_string( value ) def get_html_field( self, value=None, context={}, other_values={}, **kwd ): - return form_builder.TextField( self.spec.name, value=self.to_string( value ) ) + return form_builder.TextField( self.spec.name, value=self._to_string( value ) ) def get_html( self, value=None, context={}, other_values={}, **kwd ): return str( self ) @@ -297,3 +304,40 @@ @classmethod def marshal( cls, value ): return value + +class FileParameter( MetadataParameter ): + + def to_string( self, value ): + if not value: + return str( self.spec.no_value ) + return value.file_name + + def get_html_field( self, value=None, context={}, other_values={}, **kwd ): + return form_builder.TextField( self.spec.name, value=str( value.id ) ) + + def get_html( self, value=None, context={}, other_values={}, **kwd ): + return "<div>No display available for Metadata Files</div>" + + def wrap( self, value ): + if isinstance( value, galaxy.model.MetadataFile ): + return value + try: + return galaxy.model.MetadataFile.get( value ) + except: + #value was not a valid id + return None + + def make_copy( self, value, target_context = None, source_context = None ): + value = self.wrap( value ) + if value: + new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name ) + new_value.flush() + shutil.copy( value.file_name, new_value.file_name ) + return self.unwrap( new_value ) + return None + + @classmethod + def marshal( cls, value ): + if isinstance( value, galaxy.model.MetadataFile ): + value = value.id + return value diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/datatypes/sequence.py Wed Oct 22 13:49:22 2008 -0400 @@ -8,6 +8,7 @@ from cgi import escape from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata +import galaxy.model from galaxy import util from sniff import * @@ -24,7 +25,6 @@ """Add metadata elements""" MetadataElement( name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True, no_value=None ) - MetadataElement( name="species_chromosomes", desc="Species Chromosomes", value={}, param=metadata.PythonObjectParameter, readonly=True, no_value={}, to_string=str, visible=False ) class Fasta( Sequence ): """Class representing a FASTA sequence""" @@ -192,23 +192,30 @@ class Maf( Alignment ): """Class describing a Maf alignment""" file_ext = "maf" + + #Readonly and optional, users can't unset it, but if it is not set, we are generally ok; if required use a metadata validator in the tool definition + MetadataElement( name="species_chromosomes", desc="Species Chromosomes", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) + MetadataElement( name="maf_index", desc="MAF Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) def init_meta( self, dataset, copy_from=None ): Alignment.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, **kwd ): """ - Parses and sets species and chromosomes from MAF files. + Parses and sets species, chromosomes, index from MAF file. """ + #these metadata values are not accessable by users, always overwrite + species = [] species_chromosomes = {} + maf_reader = bx.align.maf.Reader( open( dataset.file_name ) ) + indexes = bx.interval_index_file.Indexes() try: - for i, m in enumerate( bx.align.maf.Reader( open(dataset.file_name) ) ): - for c in m.components: - ## spec,chrom = bx.align.maf.src_split( c.src ) - ## if not spec or not chrom: spec = chrom = c.src - # "src_split" finds the rightmost dot, which is probably - # wrong in general, and certainly here. + while True: + pos = maf_reader.file.tell() + block = maf_reader.next() + if block is None: break + for c in block.components: spec = c.src chrom = None if "." in spec: @@ -218,20 +225,44 @@ species_chromosomes[spec] = [] if chrom and chrom not in species_chromosomes[spec]: species_chromosomes[spec].append( chrom ) - # only check first 100,000 blocks for species - if i > 100000: break - except: + indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size ) + except: #bad MAF file pass - #these metadata values are not accessable by users, always overwrite dataset.metadata.species = species - dataset.metadata.species_chromosomes = species_chromosomes + #only overwrite the contents if our newly determined chromosomes don't match stored + chrom_file = dataset.metadata.species_chromosomes + compare_chroms = {} + if chrom_file: + try: + for line in open( chrom_file.file_name ): + fields = line.split( "\t" ) + if fields: + spec = fields.pop( 0 ) + if spec: + compare_chroms[spec] = fields + except: + pass + #write out species chromosomes again only if values are different + if not species_chromosomes or compare_chroms != species_chromosomes: + tmp_file = tempfile.TemporaryFile( 'w+b' ) + for spec, chroms in species_chromosomes.items(): + tmp_file.write( "%s\t%s\n" % ( spec, "\t".join( chroms ) ) ) + + if not chrom_file: + chrom_file = galaxy.model.MetadataFile( dataset = dataset, name = "species_chromosomes" ) + chrom_file.flush() + tmp_file.seek( 0 ) + open( chrom_file.file_name, 'wb' ).write( tmp_file.read() ) + dataset.metadata.species_chromosomes = chrom_file + tmp_file.close() + + index_file = dataset.metadata.maf_index + if not index_file: + index_file = galaxy.model.MetadataFile( dataset = dataset, name="maf_index" ) + index_file.flush() + indexes.write( open( index_file.file_name, 'w' ) ) + dataset.metadata.maf_index = index_file - def missing_meta( self, dataset ): - """Checks to see if species is set""" - if dataset.metadata.species in [None, []]: - return True - return False - def display_peek( self, dataset ): """Returns formated html of peek""" return self.make_html_table( dataset ) diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/jobs/__init__.py Wed Oct 22 13:49:22 2008 -0400 @@ -475,7 +475,16 @@ def get_input_fnames( self ): job = model.Job.get( self.job_id ) - return [ da.dataset.file_name for da in job.input_datasets if da.dataset ] + filenames = [] + for da in job.input_datasets: #da is JobToInputDatasetAssociation object + if da.dataset: + filenames.append( da.dataset.file_name ) + #we will need to stage in metadata file names also + #TODO: would be better to only stage in metadata files that are actually needed (found in command line, referenced in config files, etc.) + for key, value in da.dataset.metadata.items(): + if isinstance( value, model.MetadataFile ): + filenames.append( value.file_name ) + return filenames def get_output_fnames( self ): job = model.Job.get( self.job_id ) diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/model/__init__.py Wed Oct 22 13:49:22 2008 -0400 @@ -5,7 +5,7 @@ the relationship cardinalities are obvious (e.g. prefer Dataset to Data) """ -import os.path, os, errno, copy +import os.path, os, errno import sha import galaxy.datatypes from galaxy.util.bunch import Bunch @@ -165,7 +165,7 @@ return self._metadata_collection def set_metadata( self, bunch ): # Needs to accept a MetadataCollection, a bunch, or a dict - self._metadata = dict( [ ( key, copy.deepcopy( value ) ) for key, value in bunch.items() ] ) + self._metadata = self.metadata.make_dict_copy( bunch ) metadata = property( get_metadata, set_metadata ) """ @@ -223,8 +223,8 @@ return self.datatype.set_meta( self, **kwd ) def set_readonly_meta( self, **kwd ): return self.datatype.set_readonly_meta( self, **kwd ) - def missing_meta( self ): - return self.datatype.missing_meta( self ) + def missing_meta( self, **kwd ): + return self.datatype.missing_meta( self, **kwd ) def as_display_type( self, type, **kwd ): return self.datatype.as_display_type( self, type, **kwd ) def display_peek( self ): @@ -258,8 +258,9 @@ return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd ) def copy( self, copy_children = False, parent_id = None ): - des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, metadata=self._metadata, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self ) + des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self ) des.flush() + des.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id if copy_children: for child in self.children: child_copy = child.copy( copy_children = copy_children, parent_id = des.id ) @@ -564,6 +565,24 @@ self.user = None self.order_index = None +class MetadataFile( object ): + def __init__( self, dataset = None, name = None ): + self.dataset = dataset + self.name = name + @property + def file_name( self ): + assert self.id is not None, "ID must be set before filename used (commit the object)" + path = os.path.join( Dataset.file_path, '_metadata_files', *directory_hash_id( self.id ) ) + # Create directory if it does not exist + try: + os.makedirs( path ) + except OSError, e: + # File Exists is okay, otherwise reraise + if e.errno != errno.EEXIST: + raise + # Return filename inside hashed directory + return os.path.abspath( os.path.join( path, "metadata_%d.dat" % self.id ) ) + ## ---- Utility methods ------------------------------------------------------- def directory_hash_id( id ): diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/model/mapping.py --- a/lib/galaxy/model/mapping.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/model/mapping.py Wed Oct 22 13:49:22 2008 -0400 @@ -238,6 +238,15 @@ Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ), Column( "order_index", Integer ) ) +MetadataFile.table = Table( "metadata_file", metadata, + Column( "id", Integer, primary_key=True ), + Column( "name", String ), + Column( "hda_id", Integer, ForeignKey( "history_dataset_association.id" ), index=True, nullable=True ), + Column( "create_time", DateTime, default=now ), + Column( "update_time", DateTime, index=True, default=now, onupdate=now ), + Column( "deleted", Boolean, index=True, default=False ), + Column( "purged", Boolean, index=True, default=False ) ) + # With the tables defined we can define the mappers and setup the # relationships between the model objects. @@ -363,6 +372,9 @@ assign_mapper( context, StoredWorkflowMenuEntry, StoredWorkflowMenuEntry.table, properties=dict( stored_workflow=relation( StoredWorkflow ) ) ) +assign_mapper( context, MetadataFile, MetadataFile.table, + properties=dict( dataset=relation( HistoryDatasetAssociation ) ) ) + def db_next_hid( self ): """ Override __next_hid to generate from the database in a concurrency diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/actions/upload.py --- a/lib/galaxy/tools/actions/upload.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/tools/actions/upload.py Wed Oct 22 13:49:22 2008 -0400 @@ -196,7 +196,7 @@ temp = open( temp_name, "U" ) magic_check = temp.read( 2 ) temp.close() - if magic_check != datatypes.data.gzip_magic: + if magic_check != util.gzip_magic: return ( False, False ) CHUNK_SIZE = 2**15 # 32Kb gzipped_file = gzip.GzipFile( temp_name ) diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/__init__.py --- a/lib/galaxy/tools/parameters/__init__.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/tools/parameters/__init__.py Wed Oct 22 13:49:22 2008 -0400 @@ -60,4 +60,4 @@ if key in params: value = params[key].value_from_basic( value, app, ignore_errors ) rval[ key ] = value - return rval \ No newline at end of file + return rval diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/basic.py --- a/lib/galaxy/tools/parameters/basic.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/tools/parameters/basic.py Wed Oct 22 13:49:22 2008 -0400 @@ -829,6 +829,8 @@ options = [] for filter_key, filter_value in self.filtered.iteritems(): dataset = other_values[filter_key] + if dataset.__class__.__name__.endswith( "DatasetFilenameWrapper" ): #this is a bad way to check for this, but problems importing class ( due to circular imports? ) + dataset = dataset.dataset if dataset: for meta_key, meta_dict in filter_value.iteritems(): if dataset.metadata.spec[meta_key].param.to_string( dataset.metadata.get( meta_key ) ) == meta_dict['value']: diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/validation.py --- a/lib/galaxy/tools/parameters/validation.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/tools/parameters/validation.py Wed Oct 22 13:49:22 2008 -0400 @@ -163,13 +163,15 @@ """ Validator that checks for missing metadata """ - def __init__( self, message=None ): + def __init__( self, message = None, check = "", skip = "" ): self.message = message + self.check = check.split( "," ) + self.skip = skip.split( "," ) @classmethod def from_element( cls, param, elem ): - return cls( elem.get( 'message', None ) ) + return cls( message=elem.get( 'message', None ), check=elem.get( 'check', "" ), skip=elem.get( 'skip', "" ) ) def validate( self, value, history=None ): - if value and value.missing_meta(): + if value and value.missing_meta( check = self.check, skip = self.skip ): if self.message is None: self.message = "Metadata missing, click the pencil icon in the history item to edit / save the metadata attributes" raise ValueError( self.message ) diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/util/maf_utilities.py --- a/lib/galaxy/tools/util/maf_utilities.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/tools/util/maf_utilities.py Wed Oct 22 13:49:22 2008 -0400 @@ -145,8 +145,16 @@ except: pass return None + +#return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created +def open_or_build_maf_index( maf_file, index_filename, species = None ): + try: + return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), None ) + except: + return build_maf_index( maf_file, species = species ) + -#builds and returns (index, index_filename) for specified maf_file +#builds and returns ( index, index_filename ) for specified maf_file def build_maf_index( maf_file, species = None ): indexes = bx.interval_index_file.Indexes() try: diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py Wed Oct 22 12:15:06 2008 -0400 +++ b/lib/galaxy/util/__init__.py Wed Oct 22 13:49:22 2008 -0400 @@ -16,6 +16,8 @@ log = logging.getLogger(__name__) _lock = threading.RLock() + +gzip_magic = '\037\213' def synchronized(func): """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator.""" diff -r b02b8d9196a8 -r 0b022adfdc34 templates/dataset/edit_attributes.mako --- a/templates/dataset/edit_attributes.mako Wed Oct 22 12:15:06 2008 -0400 +++ b/templates/dataset/edit_attributes.mako Wed Oct 22 13:49:22 2008 -0400 @@ -65,6 +65,9 @@ if they are not accurate. </div> </form> + %if data.missing_meta(): + <div class="errormessagesmall">Required metadata values are missing. Some of these values may not be editable by the user. Selecting "Auto-detect" will attempt to fix these values.</div> + %endif </div> </div> diff -r b02b8d9196a8 -r 0b022adfdc34 tools/data_source/data_source.py --- a/tools/data_source/data_source.py Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/data_source/data_source.py Wed Oct 22 13:49:22 2008 -0400 @@ -2,7 +2,8 @@ #Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file. import urllib, sys, os, gzip, tempfile, shutil from galaxy import eggs -from galaxy.datatypes import data +#from galaxy.datatypes import data +from galaxy.util import gzip_magic assert sys.version_info[:2] >= ( 2, 4 ) @@ -14,7 +15,7 @@ temp = open( filename, "U" ) magic_check = temp.read( 2 ) temp.close() - if magic_check != data.gzip_magic: + if magic_check != gzip_magic: return False return True diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/genebed_maf_to_fasta.xml --- a/tools/maf/genebed_maf_to_fasta.xml Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/genebed_maf_to_fasta.xml Wed Oct 22 13:49:22 2008 -0400 @@ -1,6 +1,6 @@ <tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks"> <description>given a set of coding exon intervals</description> - <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} + <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} #end if </command> diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval2maf.py --- a/tools/maf/interval2maf.py Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/interval2maf.py Wed Oct 22 13:49:22 2008 -0400 @@ -16,6 +16,7 @@ -S, --strandCol=S: Column of Strand -t, --mafType=t: Type of MAF source to use -m, --mafFile=m: Path of source MAF file, if not using cached version + -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version -i, --interval_file=i: Input interval file -o, --output_file=o: Output MAF file -p, --species=p: Species to include in output @@ -92,7 +93,7 @@ print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) sys.exit() elif options.mafFile: - index, index_filename = maf_utilities.build_maf_index( options.mafFile, species = [dbkey] ) + index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] ) if index is None: print >> sys.stderr, "Your MAF file appears to be malformed." sys.exit() diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval2maf.xml --- a/tools/maf/interval2maf.xml Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/interval2maf.xml Wed Oct 22 13:49:22 2008 -0400 @@ -1,7 +1,7 @@ <tool id="Interval2Maf1" name="Extract MAF blocks"> <description>given a set of genomic intervals</description> <command interpreter="python"> - #if $maf_source_type.maf_source == "user":#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc + #if $maf_source_type.maf_source == "user":#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --mafIndex=$maf_source_type.mafFile.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc #else:#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$maf_source_type.mafType --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc #end if </command> diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval_maf_to_merged_fasta.py --- a/tools/maf/interval_maf_to_merged_fasta.py Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/interval_maf_to_merged_fasta.py Wed Oct 22 13:49:22 2008 -0400 @@ -15,6 +15,7 @@ -G, --geneBED: Input is a Gene BED file, process and join exons as one region -t, --mafSourceType=t: Type of MAF source to use -m, --mafSource=m: Path of source MAF file, if not using cached version + -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version -i, --interval_file=i: Input interval file -o, --output_file=o: Output MAF file -p, --species=p: Species to include in output @@ -105,7 +106,7 @@ stop_err( "The MAF source specified (%s) appears to be invalid." % ( options.mafSource ) ) elif options.mafSourceType.lower() in ["user"]: #index maf for use here, need to remove index_file when finished - index, index_filename = maf_utilities.build_maf_index( options.mafSource, species = [primary_species] ) + index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species = [primary_species] ) if index is None: stop_err( "Your MAF file appears to be malformed." ) else: diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval_maf_to_merged_fasta.xml --- a/tools/maf/interval_maf_to_merged_fasta.xml Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/interval_maf_to_merged_fasta.xml Wed Oct 22 13:49:22 2008 -0400 @@ -1,6 +1,6 @@ <tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks"> <description>given a set of genomic intervals</description> - <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} + <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR} #end if </command> diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/maf_stats.py --- a/tools/maf/maf_stats.py Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/maf_stats.py Wed Oct 22 13:49:22 2008 -0400 @@ -31,10 +31,14 @@ else: summary = False mafIndexFile = "%s/maf_index.loc" % sys.argv[9] + try: + maf_index_filename = sys.argv[10].strip() + except: + maf_index_filename = None index = index_filename = None if maf_source_type == "user": #index maf for use here - index, index_filename = maf_utilities.build_maf_index( input_maf_filename, species = [dbkey] ) + index, index_filename = maf_utilities.open_or_build_maf_index( input_maf_filename, maf_index_filename, species = [dbkey] ) if index is None: print >>sys.stderr, "Your MAF file appears to be malformed." sys.exit() diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/maf_stats.xml --- a/tools/maf/maf_stats.xml Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/maf/maf_stats.xml Wed Oct 22 13:49:22 2008 -0400 @@ -7,7 +7,10 @@ #else: $maf_source_type.maf_source $maf_source_type.mafType $input1 $out_file1 $dbkey ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $summary #end if - ${GALAXY_DATA_INDEX_DIR} + ${GALAXY_DATA_INDEX_DIR} + #if $maf_source_type.maf_source == "user": + $input2.metadata.maf_index + #end if </command> <inputs> <param format="interval" name="input1" label="Interval File" type="data"> diff -r b02b8d9196a8 -r 0b022adfdc34 tools/stats/filtering.py --- a/tools/stats/filtering.py Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/stats/filtering.py Wed Oct 22 13:49:22 2008 -0400 @@ -4,7 +4,6 @@ import sys, sets, re, os.path from galaxy import eggs -from galaxy.datatypes import metadata assert sys.version_info[:2] >= ( 2, 4 ) diff -r b02b8d9196a8 -r 0b022adfdc34 tools/visualization/GMAJ.xml --- a/tools/visualization/GMAJ.xml Wed Oct 22 12:15:06 2008 -0400 +++ b/tools/visualization/GMAJ.xml Wed Oct 22 13:49:22 2008 -0400 @@ -2,7 +2,9 @@ <description>Multiple Alignment Viewer</description> <command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command> <inputs> - <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"/> + <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"> + <validator type="metadata" check="species_chromosomes" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/> + </param> <param name="refseq" label="Reference Sequence" type="select"> <option value="first" selected="true">First sequence in each block</option> <option value="any">Any sequence</option> @@ -103,9 +105,19 @@ #set $seq_count = 0 #for $annotation_count, $annotation in $enumerate( $annotations ): -#if $annotation.annotation_style.style == "galaxy": -#if $maf_input.dataset.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.dataset.metadata.species_chromosomes and $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]: -#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]] +#if $annotation.annotation_style.style == "galaxy": +#set $species_chromosomes = {} +#if $maf_input.dataset.metadata.species_chromosomes: +#for $line in open( $maf_input.dataset.metadata.species_chromosomes.file_name ): +#set $fields = $line.split( "\t" ) +#if $fields: +#set $spec = $fields.pop( 0 ) +#set $species_chromosomes[spec] = $fields +#end if +#end for +#end if +#if $species_chromosomes and $annotation.annotation_style['species'].value in $species_chromosomes and $species_chromosomes[$annotation.annotation_style['species'].value]: +#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $species_chromosomes[$annotation.annotation_style['species'].value]] #else: #set $seq_names = [$annotation.annotation_style['species']] #end if @@ -171,4 +183,4 @@ Gmaj is a tool for viewing and manipulating Generalized Multiple Alignments (GMAs) produced by programs such as TBA (though it can also be used with maf-format alignments from other sources). It can display interactive graphical and text representations of the alignments, a diagram showing the locations of exons and repeats, and other annotations -- all with the user's choice of reference sequence. </help> -</tool> \ No newline at end of file +</tool>
participants (1)
-
Nate Coraor