details: http://www.bx.psu.edu/hg/galaxy/rev/83dc9642a59e changeset: 3156:83dc9642a59e user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Dec 08 09:05:35 2009 -0500 description: Rename the before_edit() and after_edit() metadata related methods to be more appropriately named before_setting_metadata() and after_setting_metadata(), and add the metthods to all datatype classes. Move sorting of Bam files from the Bam set_meta() method to this new method. diffstat: lib/galaxy/datatypes/binary.py | 130 ++++++++++++++++---------------- lib/galaxy/datatypes/chrominfo.py | 5 +- lib/galaxy/datatypes/coverage.py | 4 +- lib/galaxy/datatypes/data.py | 11 +- lib/galaxy/datatypes/genetics.py | 66 ++++++++++++++++- lib/galaxy/datatypes/images.py | 12 +++ lib/galaxy/datatypes/interval.py | 21 ++++- lib/galaxy/datatypes/qualityscore.py | 11 ++ lib/galaxy/datatypes/sequence.py | 30 +++++++- lib/galaxy/datatypes/tabular.py | 9 ++ lib/galaxy/datatypes/tracks.py | 3 + lib/galaxy/datatypes/xml.py | 4 + lib/galaxy/jobs/__init__.py | 1 + lib/galaxy/tools/__init__.py | 5 +- lib/galaxy/web/controllers/library.py | 7 +- lib/galaxy/web/controllers/library_admin.py | 7 +- lib/galaxy/web/controllers/root.py | 9 +- tools/data_source/hbvar_filter.py | 1 + tools/maf/maf_to_bed_code.py | 1 + tools/samtools/sam_to_bam.py | 21 ++++- 20 files changed, 271 insertions(+), 87 deletions(-) diffs (968 lines): diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/binary.py Tue Dec 08 09:05:35 2009 -0500 @@ -17,6 +17,9 @@ class Binary( data.Data ): """Binary data""" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: @@ -32,6 +35,10 @@ class Ab1( Binary ): """Class describing an ab1 binary sequence file""" file_ext = "ab1" + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: export_url = "/history_add_to?" + urlencode( {'history_id':dataset.history_id,'ext':'ab1','name':'ab1 sequence','info':'Sequence file','dbkey':dataset.dbkey} ) @@ -51,83 +58,67 @@ file_ext = "bam" MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) - def init_meta( self, dataset, copy_from=None ): - Binary.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, **kwd ): - """ Ensures that the Bam file contents are sorted and creates the index for the BAM file. """ - errors = False - # These metadata values are not accessible by users, always overwrite - index_file = dataset.metadata.bam_index + def before_setting_metadata( self, dataset ): + """ Ensures that the Bam file contents are sorted. This function is called on the dataset before set_meta() is called.""" + sorted = False + try: + index_file = dataset.metadata.bam_index + except: + index_file = None if index_file: # If an index file already exists on disk, then the data must have previously been sorted # since samtools requires a sorted Bam file in order to create an index. sorted = os.path.exists( index_file.file_name ) - else: - index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset ) - sorted = False - tmp_dir = tempfile.gettempdir() - try: + if not sorted: + # Use samtools to sort the Bam file + tmp_dir = tempfile.gettempdir() # Create a symlink from the temporary directory to the dataset file so that samtools can mess with it. tmp_dataset_file_name = os.path.join( tmp_dir, os.path.basename( dataset.file_name ) ) # Here tmp_dataset_file_name looks something like /tmp/dataset_XX.dat os.symlink( dataset.file_name, tmp_dataset_file_name ) + # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. + # TODO: This command may also create temporary files <out.prefix>.%d.bam when the + # whole alignment cannot be fitted into memory ( controlled by option -m ). We're + # not handling this case here. + tmp_sorted_dataset_file = tempfile.NamedTemporaryFile( prefix=tmp_dataset_file_name ) + tmp_sorted_dataset_file_name = tmp_sorted_dataset_file.name + tmp_sorted_dataset_file.close() + command = "samtools sort %s %s 2>/dev/null" % ( tmp_dataset_file_name, tmp_sorted_dataset_file_name ) + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + tmp_sorted_bam_file_name = '%s.bam' % tmp_sorted_dataset_file_name + # Move tmp_sorted_bam_file_name to our output dataset location + shutil.move( tmp_sorted_bam_file_name, dataset.file_name ) + # Remove all remaining temporary files + os.unlink( tmp_dataset_file_name ) + def init_meta( self, dataset, copy_from=None ): + Binary.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, **kwd ): + """ Creates the index for the BAM file. """ + # These metadata values are not accessible by users, always overwrite + index_file = dataset.metadata.bam_index + if not index_file: + index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset ) + tmp_dir = tempfile.gettempdir() + # Create a symlink from the temporary directory to the dataset file so that samtools can mess with it. + tmp_dataset_file_name = os.path.join( tmp_dir, os.path.basename( dataset.file_name ) ) + # Here tmp_dataset_file_name looks something like /tmp/dataset_XX.dat + os.symlink( dataset.file_name, tmp_dataset_file_name ) + errors = False + try: + # Create the Bam index + command = 'samtools index %s' % tmp_dataset_file_name + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() except Exception, e: errors = True - err_msg = 'Error creating tmp symlink to file (%s). ' % str( dataset.file_name ) + err_msg = 'Error creating index for BAM file (%s)' % str( tmp_dataset_file_name ) log.exception( err_msg ) sys.stderr.write( err_msg + str( e ) ) - if not errors and not sorted: - try: - # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. - # TODO: This command may also create temporary files <out.prefix>.%d.bam when the - # whole alignment cannot be fitted into memory ( controlled by option -m ). We're - # not handling this case here. - tmp_sorted_dataset_file = tempfile.NamedTemporaryFile( prefix=tmp_dataset_file_name ) - tmp_sorted_dataset_file_name = tmp_sorted_dataset_file.name - tmp_sorted_dataset_file.close() - command = "samtools sort %s %s 2>/dev/null" % ( tmp_dataset_file_name, tmp_sorted_dataset_file_name ) - proc = subprocess.Popen( args=command, shell=True ) - proc.wait() - except Exception, e: - errors = True - err_msg = 'Error sorting alignments from (%s). ' % tmp_dataset_file_name - log.exception( err_msg ) - sys.stderr.write( err_msg + str( e ) ) if not errors: - if sorted: - try: - # Create the Bam index - command = 'samtools index %s' % tmp_dataset_file_name - proc = subprocess.Popen( args=command, shell=True ) - proc.wait() - except Exception, e: - errors = True - err_msg = 'Error creating index for BAM file (%s)' % str( tmp_dataset_file_name ) - log.exception( err_msg ) - sys.stderr.write( err_msg + str( e ) ) - else: - tmp_sorted_bam_file_name = '%s.bam' % tmp_sorted_dataset_file_name - try: - # Create the Bam index - command = 'samtools index %s' % tmp_sorted_bam_file_name - proc = subprocess.Popen( args=command, shell=True ) - proc.wait() - except Exception, e: - errors = True - err_msg = 'Error creating index for BAM file (%s)' % str( tmp_sorted_dataset_file_name ) - log.exception( err_msg ) - sys.stderr.write( err_msg + str( e ) ) - if not errors: - if sorted: - # Move the temporary index file ~/tmp/dataset_XX.dat.bai to our metadata file - # storage location ~/database/files/_metadata_files/dataset_XX.dat - shutil.move( '%s.bai' % ( tmp_dataset_file_name ), index_file.file_name ) - else: - # Move tmp_sorted_bam_file_name to our output dataset location - shutil.move( tmp_sorted_bam_file_name, dataset.file_name ) - # Move the temporary sorted index file ~/tmp/dataset_XX.dat.bai to our metadata file - # storage location ~/database/files/_metadata_files/dataset_XX.dat - shutil.move( '%s.bai' % ( tmp_sorted_bam_file_name ), index_file.file_name ) + # Move the temporary index file ~/tmp/dataset_XX.dat.bai to our metadata file + # storage location ~/database/files/_metadata_files/dataset_XX.dat + shutil.move( '%s.bai' % ( tmp_dataset_file_name ), index_file.file_name ) # Remove all remaining temporary files os.unlink( tmp_dataset_file_name ) # Set the metadata @@ -159,6 +150,10 @@ class Binseq( Binary ): """Class describing a zip archive of binary sequence files""" file_ext = "binseq.zip" + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: zip_file = zipfile.ZipFile( dataset.file_name, "r" ) @@ -180,6 +175,10 @@ class Scf( Binary ): """Class describing an scf binary sequence file""" file_ext = "scf" + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'scf','name':'scf sequence','info':'Sequence file','dbkey':dataset.dbkey}) @@ -199,6 +198,9 @@ file_ext = "sff" def __init__( self, **kwd ): Binary.__init__( self, **kwd ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): # The first 4 bytes of any sff file is '.sff', and the file is binary. For details # about the format, see http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/chrominfo.py --- a/lib/galaxy/datatypes/chrominfo.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/chrominfo.py Tue Dec 08 09:05:35 2009 -0500 @@ -11,4 +11,7 @@ file_ext = "len" MetadataElement( name="chrom", default=1, desc="Chrom column", param=metadata.ColumnParameter ) MetadataElement( name="length", default=2, desc="Length column", param=metadata.ColumnParameter ) - + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/coverage.py --- a/lib/galaxy/datatypes/coverage.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/coverage.py Tue Dec 08 09:05:35 2009 -0500 @@ -29,7 +29,9 @@ MetadataElement( name="reverseCol", desc="Optional reverse read column", param=metadata.ColumnParameter, optional=True, no_value=0 ) MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) - + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def get_track_window(self, dataset, data, start, end): """ Assumes we have a numpy file. diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/data.py Tue Dec 08 09:05:35 2009 -0500 @@ -256,11 +256,11 @@ if return_output: return converted_dataset return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid) - def before_edit( self, dataset ): - """This function is called on the dataset before metadata is edited.""" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is set.""" pass - def after_edit( self, dataset ): - """This function is called on the dataset after metadata is edited.""" + def after_setting_metadata( self, dataset ): + """This function is called on the dataset after metadata is set.""" dataset.clear_associated_files( metadata_safe = True ) def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, is_binary = False, space_to_tab = True, **kwds ): kwds[ 'name' ] = name @@ -346,6 +346,9 @@ def get_mime(self): """Returns the mime type of the datatype""" return 'text/plain' + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is set.""" + pass def set_meta( self, dataset, **kwd ): """ Set the number of lines of data in dataset, diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/genetics.py Tue Dec 08 09:05:35 2009 -0500 @@ -47,6 +47,9 @@ self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) def as_ucsc_display_file( self, dataset, **kwd ): return open( dataset.file_name ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -202,6 +205,9 @@ """Initialize featurelistt datatype""" Tabular.__init__( self, **kwd ) self.column_names = [] + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] @@ -240,6 +246,9 @@ self.column_names[0] = 'FID' self.column_names[1] = 'IID' # this is what Plink wants as at 2009 + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff(self,filename): """ """ @@ -264,6 +273,9 @@ rgTabList.__init__( self, **kwd ) for i,s in enumerate(['#FeatureId', 'Chr', 'Genpos', 'Mappos']): self.column_names[i] = s + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Rgenetics(Html): """ @@ -317,6 +329,9 @@ f.write("\n".join( rval )) f.write('\n') f.close() + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, **kwd ): """ for lped/pbed eg @@ -358,6 +373,9 @@ """ file_ext="snpmatrix" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "Binary RGenetics file" @@ -387,6 +405,9 @@ Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Pphe(Rgenetics): """ @@ -397,6 +418,9 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Lmap(Rgenetics): """ @@ -404,6 +428,10 @@ """ file_ext="lmap" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass + class Fphe(Rgenetics): """ fake class to distinguish different species of Rgenetics data collections @@ -413,6 +441,9 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.fphe', description = 'FBAT Phenotype File', substitute_name_with_metadata = 'base_name' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Phe(Rgenetics): """ @@ -423,6 +454,9 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Fped(Rgenetics): """ @@ -433,6 +467,9 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Pbed(Rgenetics): """ @@ -445,6 +482,9 @@ self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Eigenstratgeno(Rgenetics): """ @@ -457,6 +497,9 @@ self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Eigenstratpca(Rgenetics): """ @@ -467,18 +510,27 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Snptest(Rgenetics): """ fake class to distinguish different species of Rgenetics data collections """ file_ext="snptest" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class Pheno(Tabular): """ base class for pheno files """ file_ext = 'pheno' + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class RexpBase( Html ): """ @@ -646,6 +698,9 @@ f.write("\n".join( rval )) f.write('\n') f.close() + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def init_meta( self, dataset, copy_from=None ): """Add metadata elements""" if copy_from: @@ -734,7 +789,10 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.affybatch', description = 'AffyBatch R object saved to file', substitute_name_with_metadata = 'base_name', is_binary=True ) - + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass + class Eset( RexpBase ): """derived class for BioC data structures in Galaxy """ file_ext = "eset" @@ -743,6 +801,9 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.eset', description = 'ESet R object saved to file', substitute_name_with_metadata = 'base_name', is_binary = True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass class MAlist( RexpBase ): """derived class for BioC data structures in Galaxy """ @@ -752,6 +813,9 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.malist', description = 'MAlist R object saved to file', substitute_name_with_metadata = 'base_name', is_binary = True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass if __name__ == '__main__': import doctest, sys diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/images.py Tue Dec 08 09:05:35 2009 -0500 @@ -15,6 +15,9 @@ class Image( data.Data ): """Class describing an image""" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = 'Image in %s format' % dataset.extension @@ -51,6 +54,9 @@ """Class describing a GMAJ Applet""" file_ext = "gmaj.zip" copy_safe_peek = False + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: if hasattr( dataset, 'history_id' ): @@ -102,6 +108,9 @@ class Html( data.Text ): """Class describing an html file""" file_ext = "html" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "HTML file" @@ -136,6 +145,9 @@ """Class describing a LAJ Applet""" file_ext = "laj" copy_safe_peek = False + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: if hasattr( dataset, 'history_id' ): diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/interval.py Tue Dec 08 09:05:35 2009 -0500 @@ -75,7 +75,9 @@ else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' - + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, first_line_is_header = False, **kwd ): Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 0 ) @@ -340,7 +342,10 @@ MetadataElement( name="strandCol", desc="Strand column (click box & select)", param=metadata.ColumnParameter, optional=True, no_value=0 ) MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) ###do we need to repeat these? they are the same as should be inherited from interval type - + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): """Sets the metadata information for datasets previously determined to be in bed format.""" i = 0 @@ -499,6 +504,9 @@ """Initialize datatype, by adding GBrowse display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'c_elegans', 'display in Wormbase', 'as_gbrowse_display_file', 'gbrowse_links' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -636,6 +644,9 @@ def __init__(self, **kwd): """Initialize datatype, by adding GBrowse display app""" Gff.__init__(self, **kwd) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -799,6 +810,9 @@ return ret_val def make_html_table( self, dataset ): return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -890,6 +904,9 @@ """Initialize interval datatype, by adding UCSC display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, overwrite = True, **kwd ): Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 ) def display_peek( self, dataset ): diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/qualityscore.py --- a/lib/galaxy/datatypes/qualityscore.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/qualityscore.py Tue Dec 08 09:05:35 2009 -0500 @@ -15,6 +15,9 @@ """ file_ext = "qualsolid" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ >>> fname = get_test_fname( 'sequence.fasta' ) @@ -64,6 +67,9 @@ """ file_ext = "qual454" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ >>> fname = get_test_fname( 'sequence.fasta' ) @@ -102,4 +108,9 @@ until we know more about quality score formats """ file_ext = "qualsolexa" + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass + \ No newline at end of file diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/sequence.py Tue Dec 08 09:05:35 2009 -0500 @@ -21,6 +21,9 @@ """Add metadata elements""" MetadataElement( name="sequences", default=0, desc="Number of sequences", readonly=True, visible=False, optional=True, no_value=0 ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, **kwd ): """ Set the number of sequences and the number of data lines in dataset. @@ -56,11 +59,17 @@ """Add metadata elements""" MetadataElement( name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True, no_value=None ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass + class Fasta( Sequence ): """Class representing a FASTA sequence""" - file_ext = "fasta" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ Determines whether the file is in fasta format @@ -113,6 +122,9 @@ """ Class representing the SOLID Color-Space sequence ( csfasta ) """ file_ext = "csfasta" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ Color-space sequence: @@ -154,6 +166,9 @@ """Class representing a generic FASTQ sequence""" file_ext = "fastq" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_meta( self, dataset, **kwd ): """ Set the number of sequences and the number of data lines @@ -205,6 +220,10 @@ """Class representing a FASTQ sequence ( the Sanger variant )""" file_ext = "fastqsanger" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass + try: from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) @@ -297,6 +316,9 @@ MetadataElement( name="species_chromosomes", desc="Species Chromosomes", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) MetadataElement( name="maf_index", desc="MAF Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def init_meta( self, dataset, copy_from=None ): Alignment.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, **kwd ): @@ -403,6 +425,9 @@ file_ext = "axt" + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ Determines whether the file is in axt format @@ -455,6 +480,9 @@ # here simply for backward compatibility ( although it is still in the datatypes registry ). Subclassing # from data.Text eliminates managing metadata elements inherited from the Alignemnt class. + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def sniff( self, filename ): """ Determines whether the file is in lav format diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/tabular.py Tue Dec 08 09:05:35 2009 -0500 @@ -23,6 +23,9 @@ MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False, no_value=0 ) MetadataElement( name="column_types", default=[], desc="Column types", param=metadata.ColumnTypesParameter, readonly=True, visible=False, no_value=[] ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def init_meta( self, dataset, copy_from=None ): data.Text.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): @@ -224,6 +227,9 @@ 'Superorder', 'Order', 'Suborder', 'Superfamily', 'Family', 'Subfamily', 'Tribe', 'Subtribe', 'Genus', 'Subgenus', 'Species', 'Subspecies' ] + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] @@ -253,6 +259,9 @@ self.column_names = ['QNAME', 'FLAG', 'RNAME', 'POS', 'MAPQ', 'CIGAR', 'MRNM', 'MPOS', 'ISIZE', 'SEQ', 'QUAL', 'OPT' ] + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/tracks.py --- a/lib/galaxy/datatypes/tracks.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/tracks.py Tue Dec 08 09:05:35 2009 -0500 @@ -23,6 +23,9 @@ def __init__(self, **kwargs): super( GeneTrack, self ).__init__( **kwargs ) self.add_display_app( 'genetrack', 'View in', '', 'genetrack_link' ) + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def get_display_links( self, dataset, type, app, base_url, target_frame='galaxy_main', **kwd ): return data.Data.get_display_links( self, dataset, type, app, base_url, target_frame=target_frame, **kwd ) def genetrack_link( self, hda, type, app, base_url ): diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/datatypes/xml.py --- a/lib/galaxy/datatypes/xml.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/datatypes/xml.py Tue Dec 08 09:05:35 2009 -0500 @@ -10,6 +10,10 @@ class BlastXml( data.Text ): """NCBI Blast XML Output data""" file_ext = "blastxml" + + def before_setting_metadata( self, dataset ): + """This function is called on the dataset before metadata is edited.""" + pass def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/jobs/__init__.py Tue Dec 08 09:05:35 2009 -0500 @@ -537,6 +537,7 @@ #it would be quicker to just copy the metadata from the originating output dataset, #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta() if not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ): + dataset.datatype.before_setting_metadata( dataset ) # Only set metadata values if they are missing... dataset.set_meta( overwrite = False ) else: diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/tools/__init__.py Tue Dec 08 09:05:35 2009 -0500 @@ -1418,6 +1418,7 @@ if data.extension != data_type: data = app.datatypes_registry.change_datatype( data, data_type ) elif not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ): + data.datatype.before_setting_metadata( data ) data.set_meta() if data.missing_meta(): data = app.datatypes_registry.change_datatype( data, 'tabular' ) @@ -1472,6 +1473,7 @@ self.sa_session.flush() child_dataset.set_size() child_dataset.name = "Secondary Dataset (%s)" % ( designation ) + child_dataset.datatype.before_setting_metadata( child_dataset ) child_dataset.init_meta() child_dataset.set_meta() child_dataset.set_peek() @@ -1531,6 +1533,7 @@ primary_data.set_size() primary_data.name = outdata.name primary_data.info = outdata.info + primary_dataset.datatype.before_setting_metadata( primary_dataset ) primary_data.init_meta( copy_from=outdata ) primary_data.dbkey = dbkey primary_data.set_meta() @@ -1567,7 +1570,7 @@ dataset.metadata.from_JSON_dict( external_metadata.get_output_filenames_by_dataset( dataset, app.model.context ).filename_out ) # If setting external metadata has failed, how can we inform the user? # For now, we'll leave the default metadata and set the state back to its original. - dataset.datatype.after_edit( dataset ) + dataset.datatype.after_setting_metadata( dataset ) dataset.state = param_dict.get( '__ORIGINAL_DATASET_STATE__' ) self.sa_session.add( dataset ) self.sa_session.flush() diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/web/controllers/library.py --- a/lib/galaxy/web/controllers/library.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/web/controllers/library.py Tue Dec 08 09:05:35 2009 -0500 @@ -465,7 +465,7 @@ else: setattr( ldda.metadata, name, spec.unwrap( params.get ( name, None ) ) ) ldda.metadata.dbkey = dbkey - ldda.datatype.after_edit( ldda ) + ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() msg = 'Attributes updated for library dataset %s' % ldda.name messagetype = 'done' @@ -487,8 +487,9 @@ if name not in [ 'name', 'info', 'dbkey' ]: if spec.get( 'default' ): setattr( ldda.metadata, name, spec.unwrap( spec.get( 'default' ) ) ) + ldda.datatype.before_setting_metadata( ldda ) ldda.datatype.set_meta( ldda ) - ldda.datatype.after_edit( ldda ) + ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() msg = 'Attributes updated for library dataset %s' % ldda.name messagetype = 'done' @@ -520,7 +521,7 @@ msg=msg, messagetype=messagetype ) if trans.app.security_agent.can_modify_library_item( user, roles, ldda ): - ldda.datatype.before_edit( ldda ) + ldda.datatype.before_setting_metadata( ldda ) if "dbkey" in ldda.datatype.metadata_spec and not ldda.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/web/controllers/library_admin.py --- a/lib/galaxy/web/controllers/library_admin.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/web/controllers/library_admin.py Tue Dec 08 09:05:35 2009 -0500 @@ -475,7 +475,7 @@ else: setattr( ldda.metadata, name, spec.unwrap( params.get ( name, None ) ) ) ldda.metadata.dbkey = dbkey - ldda.datatype.after_edit( ldda ) + ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() msg = 'Attributes updated for library dataset %s' % ldda.name messagetype = 'done' @@ -493,8 +493,9 @@ if name not in [ 'name', 'info', 'dbkey' ]: if spec.get( 'default' ): setattr( ldda.metadata, name, spec.unwrap( spec.get( 'default' ) ) ) + ldda.datatype.before_setting_metadata( ldda ) ldda.datatype.set_meta( ldda ) - ldda.datatype.after_edit( ldda ) + ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() msg = 'Attributes updated for library dataset %s' % ldda.name return trans.fill_template( "/admin/library/ldda_edit_info.mako", @@ -516,7 +517,7 @@ widgets=widgets, msg=msg, messagetype=messagetype ) - ldda.datatype.before_edit( ldda ) + ldda.datatype.before_setting_metadata( ldda ) if "dbkey" in ldda.datatype.metadata_spec and not ldda.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey diff -r 8feff3bc14bc -r 83dc9642a59e lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Mon Dec 07 16:04:33 2009 -0500 +++ b/lib/galaxy/web/controllers/root.py Tue Dec 08 09:05:35 2009 -0500 @@ -301,7 +301,7 @@ setattr( data.metadata, name, other ) else: setattr( data.metadata, name, spec.unwrap( params.get (name, None) ) ) - data.datatype.after_edit( data ) + data.datatype.after_setting_metadata( data ) else: msg = ' (Metadata could not be changed because this dataset is currently being used as input or output. You must cancel or wait for these jobs to complete before changing metadata.)' trans.sa_session.flush() @@ -321,8 +321,9 @@ trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool, trans, incoming = { 'input1':data } ) else: msg = 'Attributes updated' + data.datatype.before_setting_metadata( data ) data.set_meta() - data.datatype.after_edit( data ) + data.datatype.after_setting_metadata( data ) trans.sa_session.flush() return trans.show_ok_message( msg, refresh_frames=['history'] ) elif params.convert_data: @@ -345,8 +346,7 @@ trans.sa_session.refresh( data.dataset ) else: return trans.show_error_message( "You are not authorized to change this dataset's permissions" ) - data.datatype.before_edit( data ) - + data.datatype.before_setting_metadata( data ) if "dbkey" in data.datatype.metadata_spec and not data.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey @@ -521,6 +521,7 @@ data_file.close() data.state = data.states.OK data.set_size() + data.datatype.before_setting_metadata( data ) data.init_meta() data.set_meta() trans.sa_session.flush() diff -r 8feff3bc14bc -r 83dc9642a59e tools/data_source/hbvar_filter.py --- a/tools/data_source/hbvar_filter.py Mon Dec 07 16:04:33 2009 -0500 +++ b/tools/data_source/hbvar_filter.py Tue Dec 08 09:05:35 2009 -0500 @@ -46,6 +46,7 @@ fp.close() #Set meta data, format file to be valid interval type if isinstance(data.datatype, datatypes.interval.Interval): + data.datatype.before_setting_metadata( data ) data.set_meta(first_line_is_header=True) #check for missing meta data, if all there, comment first line and process file if not data.missing_meta(): diff -r 8feff3bc14bc -r 83dc9642a59e tools/maf/maf_to_bed_code.py --- a/tools/maf/maf_to_bed_code.py Mon Dec 07 16:04:33 2009 -0500 +++ b/tools/maf/maf_to_bed_code.py Tue Dec 08 09:05:35 2009 -0500 @@ -45,6 +45,7 @@ newdata.info = "The requested file is missing from the system." newdata.state = newdata.states.ERROR newdata.dbkey = dbkey + newdata.datatype.before_setting_metadata( newdata ) newdata.init_meta() newdata.set_meta() newdata.set_peek() diff -r 8feff3bc14bc -r 83dc9642a59e tools/samtools/sam_to_bam.py --- a/tools/samtools/sam_to_bam.py Mon Dec 07 16:04:33 2009 -0500 +++ b/tools/samtools/sam_to_bam.py Tue Dec 08 09:05:35 2009 -0500 @@ -79,18 +79,35 @@ tmp_aligns_file = tempfile.NamedTemporaryFile() tmp_aligns_file_name = tmp_aligns_file.name tmp_aligns_file.close() + # IMPORTANT NOTE: for some reason the samtools view command gzips the resulting bam file without warning, + # and the docs do not currently state that this occurs ( very bad ). command = "samtools view -bt %s -o %s %s 2>/dev/null" % ( fai_index_file_path, tmp_aligns_file_name, options.input1 ) proc = subprocess.Popen( args=command, shell=True ) proc.wait() - shutil.move( tmp_aligns_file_name, options.output1 ) except Exception, e: stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - # NOTE: samtools requires the Bam file to be sorted, but this occurs in Bam().set_meta() to ensure that uploaded Bam files are sorted as well. + try: + # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command + # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted + # into memory ( controlled by option -m ). + tmp_sorted_aligns_file = tempfile.NamedTemporaryFile() + tmp_sorted_aligns_file_name = tmp_sorted_aligns_file.name + tmp_sorted_aligns_file.close() + command = "samtools sort %s %s 2>/dev/null" % ( tmp_aligns_file_name, tmp_sorted_aligns_file_name ) + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + except Exception, e: + stop_err( 'Error sorting alignments from (%s), %s' % ( tmp_aligns_file_name, str( e ) ) ) + # Move tmp_aligns_file_name to our output dataset location + sorted_bam_file = '%s.bam' % tmp_sorted_aligns_file_name + shutil.move( sorted_bam_file, options.output1 ) if options.ref_file != "None": # Remove the symlink from /tmp/dataset_13.dat to ~/database/files/000/dataset_13.dat os.unlink( fai_index_file_path ) # Remove the index file index_file_name = '%s.fai' % fai_index_file_path os.unlink( index_file_name ) + # Remove the tmp_aligns_file_name + os.unlink( tmp_aligns_file_name ) if __name__=="__main__": __main__()