details: http://www.bx.psu.edu/hg/galaxy/rev/022a8c94883f changeset: 3157:022a8c94883f user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Dec 08 11:46:13 2009 -0500 description: Better approach to altering initial content of output dataset if necessary. The upload tool will now call the data type's groom_output_dataset() method ( a better name than before_setting_metadata since it is not related to metadata ). This will now also run on the cluster. diffstat: lib/galaxy/datatypes/binary.py | 75 +++++++++---------------- lib/galaxy/datatypes/chrominfo.py | 3 - lib/galaxy/datatypes/coverage.py | 5 +- lib/galaxy/datatypes/data.py | 9 +-- lib/galaxy/datatypes/genetics.py | 64 --------------------- lib/galaxy/datatypes/images.py | 14 +---- lib/galaxy/datatypes/interval.py | 18 ------ lib/galaxy/datatypes/qualityscore.py | 13 +---- lib/galaxy/datatypes/sequence.py | 29 --------- lib/galaxy/datatypes/tabular.py | 9 --- lib/galaxy/datatypes/tracks.py | 3 - lib/galaxy/datatypes/xml.py | 3 - lib/galaxy/jobs/__init__.py | 1 - lib/galaxy/tools/__init__.py | 3 - lib/galaxy/web/controllers/library.py | 2 - lib/galaxy/web/controllers/library_admin.py | 2 - lib/galaxy/web/controllers/root.py | 3 - tools/data_source/hbvar_filter.py | 1 - tools/data_source/upload.py | 4 + tools/maf/maf_to_bed_code.py | 1 - 20 files changed, 38 insertions(+), 224 deletions(-) diffs (833 lines): diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/binary.py Tue Dec 08 11:46:13 2009 -0500 @@ -17,9 +17,6 @@ class Binary( data.Data ): """Binary data""" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: @@ -36,9 +33,6 @@ """Class describing an ab1 binary sequence file""" file_ext = "ab1" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: export_url = "/history_add_to?" + urlencode( {'history_id':dataset.history_id,'ext':'ab1','name':'ab1 sequence','info':'Sequence file','dbkey':dataset.dbkey} ) @@ -58,39 +52,32 @@ file_ext = "bam" MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) - def before_setting_metadata( self, dataset ): - """ Ensures that the Bam file contents are sorted. This function is called on the dataset before set_meta() is called.""" - sorted = False - try: - index_file = dataset.metadata.bam_index - except: - index_file = None - if index_file: - # If an index file already exists on disk, then the data must have previously been sorted - # since samtools requires a sorted Bam file in order to create an index. - sorted = os.path.exists( index_file.file_name ) - if not sorted: - # Use samtools to sort the Bam file - tmp_dir = tempfile.gettempdir() - # Create a symlink from the temporary directory to the dataset file so that samtools can mess with it. - tmp_dataset_file_name = os.path.join( tmp_dir, os.path.basename( dataset.file_name ) ) - # Here tmp_dataset_file_name looks something like /tmp/dataset_XX.dat - os.symlink( dataset.file_name, tmp_dataset_file_name ) - # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. - # TODO: This command may also create temporary files <out.prefix>.%d.bam when the - # whole alignment cannot be fitted into memory ( controlled by option -m ). We're - # not handling this case here. - tmp_sorted_dataset_file = tempfile.NamedTemporaryFile( prefix=tmp_dataset_file_name ) - tmp_sorted_dataset_file_name = tmp_sorted_dataset_file.name - tmp_sorted_dataset_file.close() - command = "samtools sort %s %s 2>/dev/null" % ( tmp_dataset_file_name, tmp_sorted_dataset_file_name ) - proc = subprocess.Popen( args=command, shell=True ) - proc.wait() - tmp_sorted_bam_file_name = '%s.bam' % tmp_sorted_dataset_file_name - # Move tmp_sorted_bam_file_name to our output dataset location - shutil.move( tmp_sorted_bam_file_name, dataset.file_name ) - # Remove all remaining temporary files - os.unlink( tmp_dataset_file_name ) + def groom_dataset_content( self, file_name ): + """ + Ensures that the Bam file contents are sorted. This function is called + on an output dataset after the content is initially generated. + """ + # Use samtools to sort the Bam file + tmp_dir = tempfile.gettempdir() + # Create a symlink from the temporary directory to the dataset file so that samtools can mess with it. + tmp_dataset_file_name = os.path.join( tmp_dir, os.path.basename( file_name ) ) + # Here tmp_dataset_file_name looks something like /tmp/dataset_XX.dat + os.symlink( file_name, tmp_dataset_file_name ) + # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. + # TODO: This command may also create temporary files <out.prefix>.%d.bam when the + # whole alignment cannot be fitted into memory ( controlled by option -m ). We're + # not handling this case here. + tmp_sorted_dataset_file = tempfile.NamedTemporaryFile( prefix=tmp_dataset_file_name ) + tmp_sorted_dataset_file_name = tmp_sorted_dataset_file.name + tmp_sorted_dataset_file.close() + command = "samtools sort %s %s 2>/dev/null" % ( tmp_dataset_file_name, tmp_sorted_dataset_file_name ) + proc = subprocess.Popen( args=command, shell=True ) + proc.wait() + tmp_sorted_bam_file_name = '%s.bam' % tmp_sorted_dataset_file_name + # Move tmp_sorted_bam_file_name to our output dataset location + shutil.move( tmp_sorted_bam_file_name, file_name ) + # Remove all remaining temporary files + os.unlink( tmp_dataset_file_name ) def init_meta( self, dataset, copy_from=None ): Binary.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, **kwd ): @@ -151,9 +138,6 @@ """Class describing a zip archive of binary sequence files""" file_ext = "binseq.zip" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: zip_file = zipfile.ZipFile( dataset.file_name, "r" ) @@ -176,9 +160,6 @@ """Class describing an scf binary sequence file""" file_ext = "scf" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'scf','name':'scf sequence','info':'Sequence file','dbkey':dataset.dbkey}) @@ -196,11 +177,9 @@ class Sff( Binary ): """ Standard Flowgram Format (SFF) """ file_ext = "sff" + def __init__( self, **kwd ): Binary.__init__( self, **kwd ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): # The first 4 bytes of any sff file is '.sff', and the file is binary. For details # about the format, see http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/chrominfo.py --- a/lib/galaxy/datatypes/chrominfo.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/chrominfo.py Tue Dec 08 11:46:13 2009 -0500 @@ -12,6 +12,3 @@ MetadataElement( name="chrom", default=1, desc="Chrom column", param=metadata.ColumnParameter ) MetadataElement( name="length", default=2, desc="Length column", param=metadata.ColumnParameter ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/coverage.py --- a/lib/galaxy/datatypes/coverage.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/coverage.py Tue Dec 08 11:46:13 2009 -0500 @@ -28,10 +28,7 @@ MetadataElement( name="forwardCol", default=3, desc="Forward or aggregate read column", param=metadata.ColumnParameter ) MetadataElement( name="reverseCol", desc="Optional reverse read column", param=metadata.ColumnParameter, optional=True, no_value=0 ) MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) - - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass + def get_track_window(self, dataset, data, start, end): """ Assumes we have a numpy file. diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/data.py Tue Dec 08 11:46:13 2009 -0500 @@ -84,6 +84,9 @@ except OSError, e: log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name)) return '' + def groom_dataset_content( self, file_name ): + """This function is called on an output dataset file after the content is initially generated.""" + pass def init_meta( self, dataset, copy_from=None ): # Metadata should be left mostly uninitialized. Dataset will # handle returning default values when metadata is not set. @@ -256,9 +259,6 @@ if return_output: return converted_dataset return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is set.""" - pass def after_setting_metadata( self, dataset ): """This function is called on the dataset after metadata is set.""" dataset.clear_associated_files( metadata_safe = True ) @@ -346,9 +346,6 @@ def get_mime(self): """Returns the mime type of the datatype""" return 'text/plain' - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is set.""" - pass def set_meta( self, dataset, **kwd ): """ Set the number of lines of data in dataset, diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/genetics.py --- a/lib/galaxy/datatypes/genetics.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/genetics.py Tue Dec 08 11:46:13 2009 -0500 @@ -47,9 +47,6 @@ self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) def as_ucsc_display_file( self, dataset, **kwd ): return open( dataset.file_name ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -205,9 +202,6 @@ """Initialize featurelistt datatype""" Tabular.__init__( self, **kwd ) self.column_names = [] - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] @@ -246,9 +240,6 @@ self.column_names[0] = 'FID' self.column_names[1] = 'IID' # this is what Plink wants as at 2009 - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff(self,filename): """ """ @@ -273,9 +264,6 @@ rgTabList.__init__( self, **kwd ) for i,s in enumerate(['#FeatureId', 'Chr', 'Genpos', 'Mappos']): self.column_names[i] = s - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Rgenetics(Html): """ @@ -329,9 +317,6 @@ f.write("\n".join( rval )) f.write('\n') f.close() - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, **kwd ): """ for lped/pbed eg @@ -373,9 +358,6 @@ """ file_ext="snpmatrix" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "Binary RGenetics file" @@ -405,9 +387,6 @@ Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Pphe(Rgenetics): """ @@ -418,9 +397,6 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Lmap(Rgenetics): """ @@ -428,10 +404,6 @@ """ file_ext="lmap" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass - class Fphe(Rgenetics): """ fake class to distinguish different species of Rgenetics data collections @@ -441,9 +413,6 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.fphe', description = 'FBAT Phenotype File', substitute_name_with_metadata = 'base_name' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Phe(Rgenetics): """ @@ -454,9 +423,6 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Fped(Rgenetics): """ @@ -467,9 +433,6 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Pbed(Rgenetics): """ @@ -482,9 +445,6 @@ self.add_composite_file( '%s.bim', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.bed', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.fam', substitute_name_with_metadata = 'base_name', is_binary = True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Eigenstratgeno(Rgenetics): """ @@ -497,9 +457,6 @@ self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True ) self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Eigenstratpca(Rgenetics): """ @@ -510,27 +467,18 @@ def __init__( self, **kwd ): Rgenetics.__init__(self, **kwd) self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Snptest(Rgenetics): """ fake class to distinguish different species of Rgenetics data collections """ file_ext="snptest" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Pheno(Tabular): """ base class for pheno files """ file_ext = 'pheno' - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class RexpBase( Html ): """ @@ -698,9 +646,6 @@ f.write("\n".join( rval )) f.write('\n') f.close() - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def init_meta( self, dataset, copy_from=None ): """Add metadata elements""" if copy_from: @@ -789,9 +734,6 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.affybatch', description = 'AffyBatch R object saved to file', substitute_name_with_metadata = 'base_name', is_binary=True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class Eset( RexpBase ): """derived class for BioC data structures in Galaxy """ @@ -801,9 +743,6 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.eset', description = 'ESet R object saved to file', substitute_name_with_metadata = 'base_name', is_binary = True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass class MAlist( RexpBase ): """derived class for BioC data structures in Galaxy """ @@ -813,9 +752,6 @@ RexpBase.__init__(self, **kwd) self.add_composite_file( '%s.malist', description = 'MAlist R object saved to file', substitute_name_with_metadata = 'base_name', is_binary = True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass if __name__ == '__main__': import doctest, sys diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/images.py Tue Dec 08 11:46:13 2009 -0500 @@ -15,9 +15,6 @@ class Image( data.Data ): """Class describing an image""" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = 'Image in %s format' % dataset.extension @@ -54,9 +51,6 @@ """Class describing a GMAJ Applet""" file_ext = "gmaj.zip" copy_safe_peek = False - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: if hasattr( dataset, 'history_id' ): @@ -108,9 +102,7 @@ class Html( data.Text ): """Class describing an html file""" file_ext = "html" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass + def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "HTML file" @@ -145,9 +137,7 @@ """Class describing a LAJ Applet""" file_ext = "laj" copy_safe_peek = False - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass + def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: if hasattr( dataset, 'history_id' ): diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/interval.py Tue Dec 08 11:46:13 2009 -0500 @@ -75,9 +75,6 @@ else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, first_line_is_header = False, **kwd ): Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 0 ) @@ -343,9 +340,6 @@ MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) ###do we need to repeat these? they are the same as should be inherited from interval type - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): """Sets the metadata information for datasets previously determined to be in bed format.""" i = 0 @@ -504,9 +498,6 @@ """Initialize datatype, by adding GBrowse display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'c_elegans', 'display in Wormbase', 'as_gbrowse_display_file', 'gbrowse_links' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -644,9 +635,6 @@ def __init__(self, **kwd): """Initialize datatype, by adding GBrowse display app""" Gff.__init__(self, **kwd) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -810,9 +798,6 @@ return ret_val def make_html_table( self, dataset ): return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): i = 0 for i, line in enumerate( file ( dataset.file_name ) ): @@ -904,9 +889,6 @@ """Initialize interval datatype, by adding UCSC display app""" Tabular.__init__(self, **kwd) self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, overwrite = True, **kwd ): Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 ) def display_peek( self, dataset ): diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/qualityscore.py --- a/lib/galaxy/datatypes/qualityscore.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/qualityscore.py Tue Dec 08 11:46:13 2009 -0500 @@ -15,9 +15,6 @@ """ file_ext = "qualsolid" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ >>> fname = get_test_fname( 'sequence.fasta' ) @@ -67,9 +64,6 @@ """ file_ext = "qual454" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ >>> fname = get_test_fname( 'sequence.fasta' ) @@ -108,9 +102,4 @@ until we know more about quality score formats """ file_ext = "qualsolexa" - - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass - - \ No newline at end of file + \ No newline at end of file diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/sequence.py Tue Dec 08 11:46:13 2009 -0500 @@ -21,9 +21,6 @@ """Add metadata elements""" MetadataElement( name="sequences", default=0, desc="Number of sequences", readonly=True, visible=False, optional=True, no_value=0 ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, **kwd ): """ Set the number of sequences and the number of data lines in dataset. @@ -59,17 +56,10 @@ """Add metadata elements""" MetadataElement( name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True, no_value=None ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass - class Fasta( Sequence ): """Class representing a FASTA sequence""" file_ext = "fasta" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ Determines whether the file is in fasta format @@ -122,9 +112,6 @@ """ Class representing the SOLID Color-Space sequence ( csfasta ) """ file_ext = "csfasta" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ Color-space sequence: @@ -166,9 +153,6 @@ """Class representing a generic FASTQ sequence""" file_ext = "fastq" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_meta( self, dataset, **kwd ): """ Set the number of sequences and the number of data lines @@ -220,10 +204,6 @@ """Class representing a FASTQ sequence ( the Sanger variant )""" file_ext = "fastqsanger" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass - try: from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) @@ -316,9 +296,6 @@ MetadataElement( name="species_chromosomes", desc="Species Chromosomes", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) MetadataElement( name="maf_index", desc="MAF Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def init_meta( self, dataset, copy_from=None ): Alignment.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, **kwd ): @@ -425,9 +402,6 @@ file_ext = "axt" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ Determines whether the file is in axt format @@ -480,9 +454,6 @@ # here simply for backward compatibility ( although it is still in the datatypes registry ). Subclassing # from data.Text eliminates managing metadata elements inherited from the Alignemnt class. - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def sniff( self, filename ): """ Determines whether the file is in lav format diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/tabular.py Tue Dec 08 11:46:13 2009 -0500 @@ -23,9 +23,6 @@ MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False, no_value=0 ) MetadataElement( name="column_types", default=[], desc="Column types", param=metadata.ColumnTypesParameter, readonly=True, visible=False, no_value=[] ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def init_meta( self, dataset, copy_from=None ): data.Text.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): @@ -227,9 +224,6 @@ 'Superorder', 'Order', 'Suborder', 'Superfamily', 'Family', 'Subfamily', 'Tribe', 'Subtribe', 'Genus', 'Subgenus', 'Species', 'Subspecies' ] - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] @@ -259,9 +253,6 @@ self.column_names = ['QNAME', 'FLAG', 'RNAME', 'POS', 'MAPQ', 'CIGAR', 'MRNM', 'MPOS', 'ISIZE', 'SEQ', 'QUAL', 'OPT' ] - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def make_html_table( self, dataset, skipchars=[] ): """Create HTML table, used for displaying peek""" out = ['<table cellspacing="0" cellpadding="3">'] diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/tracks.py --- a/lib/galaxy/datatypes/tracks.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/tracks.py Tue Dec 08 11:46:13 2009 -0500 @@ -23,9 +23,6 @@ def __init__(self, **kwargs): super( GeneTrack, self ).__init__( **kwargs ) self.add_display_app( 'genetrack', 'View in', '', 'genetrack_link' ) - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def get_display_links( self, dataset, type, app, base_url, target_frame='galaxy_main', **kwd ): return data.Data.get_display_links( self, dataset, type, app, base_url, target_frame=target_frame, **kwd ) def genetrack_link( self, hda, type, app, base_url ): diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/datatypes/xml.py --- a/lib/galaxy/datatypes/xml.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/datatypes/xml.py Tue Dec 08 11:46:13 2009 -0500 @@ -11,9 +11,6 @@ """NCBI Blast XML Output data""" file_ext = "blastxml" - def before_setting_metadata( self, dataset ): - """This function is called on the dataset before metadata is edited.""" - pass def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/jobs/__init__.py --- a/lib/galaxy/jobs/__init__.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/jobs/__init__.py Tue Dec 08 11:46:13 2009 -0500 @@ -537,7 +537,6 @@ #it would be quicker to just copy the metadata from the originating output dataset, #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta() if not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ): - dataset.datatype.before_setting_metadata( dataset ) # Only set metadata values if they are missing... dataset.set_meta( overwrite = False ) else: diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/tools/__init__.py Tue Dec 08 11:46:13 2009 -0500 @@ -1418,7 +1418,6 @@ if data.extension != data_type: data = app.datatypes_registry.change_datatype( data, data_type ) elif not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ): - data.datatype.before_setting_metadata( data ) data.set_meta() if data.missing_meta(): data = app.datatypes_registry.change_datatype( data, 'tabular' ) @@ -1473,7 +1472,6 @@ self.sa_session.flush() child_dataset.set_size() child_dataset.name = "Secondary Dataset (%s)" % ( designation ) - child_dataset.datatype.before_setting_metadata( child_dataset ) child_dataset.init_meta() child_dataset.set_meta() child_dataset.set_peek() @@ -1533,7 +1531,6 @@ primary_data.set_size() primary_data.name = outdata.name primary_data.info = outdata.info - primary_dataset.datatype.before_setting_metadata( primary_dataset ) primary_data.init_meta( copy_from=outdata ) primary_data.dbkey = dbkey primary_data.set_meta() diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/web/controllers/library.py --- a/lib/galaxy/web/controllers/library.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/web/controllers/library.py Tue Dec 08 11:46:13 2009 -0500 @@ -487,7 +487,6 @@ if name not in [ 'name', 'info', 'dbkey' ]: if spec.get( 'default' ): setattr( ldda.metadata, name, spec.unwrap( spec.get( 'default' ) ) ) - ldda.datatype.before_setting_metadata( ldda ) ldda.datatype.set_meta( ldda ) ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() @@ -521,7 +520,6 @@ msg=msg, messagetype=messagetype ) if trans.app.security_agent.can_modify_library_item( user, roles, ldda ): - ldda.datatype.before_setting_metadata( ldda ) if "dbkey" in ldda.datatype.metadata_spec and not ldda.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/web/controllers/library_admin.py --- a/lib/galaxy/web/controllers/library_admin.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/web/controllers/library_admin.py Tue Dec 08 11:46:13 2009 -0500 @@ -493,7 +493,6 @@ if name not in [ 'name', 'info', 'dbkey' ]: if spec.get( 'default' ): setattr( ldda.metadata, name, spec.unwrap( spec.get( 'default' ) ) ) - ldda.datatype.before_setting_metadata( ldda ) ldda.datatype.set_meta( ldda ) ldda.datatype.after_setting_metadata( ldda ) trans.sa_session.flush() @@ -517,7 +516,6 @@ widgets=widgets, msg=msg, messagetype=messagetype ) - ldda.datatype.before_setting_metadata( ldda ) if "dbkey" in ldda.datatype.metadata_spec and not ldda.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey diff -r 83dc9642a59e -r 022a8c94883f lib/galaxy/web/controllers/root.py --- a/lib/galaxy/web/controllers/root.py Tue Dec 08 09:05:35 2009 -0500 +++ b/lib/galaxy/web/controllers/root.py Tue Dec 08 11:46:13 2009 -0500 @@ -321,7 +321,6 @@ trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool, trans, incoming = { 'input1':data } ) else: msg = 'Attributes updated' - data.datatype.before_setting_metadata( data ) data.set_meta() data.datatype.after_setting_metadata( data ) trans.sa_session.flush() @@ -346,7 +345,6 @@ trans.sa_session.refresh( data.dataset ) else: return trans.show_error_message( "You are not authorized to change this dataset's permissions" ) - data.datatype.before_setting_metadata( data ) if "dbkey" in data.datatype.metadata_spec and not data.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey @@ -521,7 +519,6 @@ data_file.close() data.state = data.states.OK data.set_size() - data.datatype.before_setting_metadata( data ) data.init_meta() data.set_meta() trans.sa_session.flush() diff -r 83dc9642a59e -r 022a8c94883f tools/data_source/hbvar_filter.py --- a/tools/data_source/hbvar_filter.py Tue Dec 08 09:05:35 2009 -0500 +++ b/tools/data_source/hbvar_filter.py Tue Dec 08 11:46:13 2009 -0500 @@ -46,7 +46,6 @@ fp.close() #Set meta data, format file to be valid interval type if isinstance(data.datatype, datatypes.interval.Interval): - data.datatype.before_setting_metadata( data ) data.set_meta(first_line_is_header=True) #check for missing meta data, if all there, comment first line and process file if not data.missing_meta(): diff -r 83dc9642a59e -r 022a8c94883f tools/data_source/upload.py --- a/tools/data_source/upload.py Tue Dec 08 09:05:35 2009 -0500 +++ b/tools/data_source/upload.py Tue Dec 08 11:46:13 2009 -0500 @@ -10,6 +10,7 @@ import galaxy.model from galaxy.datatypes import sniff from galaxy.datatypes.binary import * +from galaxy.datatypes.registry import Registry from galaxy import util from galaxy.util.json import * @@ -264,6 +265,9 @@ name = dataset.name, line_count = line_count ) json_file.write( to_json_string( info ) + "\n" ) + # Groom the dataset content if necessary + datatype = Registry().get_datatype_by_extension( ext ) + datatype.groom_dataset_content( output_path ) def add_composite_file( dataset, json_file, output_path, files_path ): if dataset.composite_files: diff -r 83dc9642a59e -r 022a8c94883f tools/maf/maf_to_bed_code.py --- a/tools/maf/maf_to_bed_code.py Tue Dec 08 09:05:35 2009 -0500 +++ b/tools/maf/maf_to_bed_code.py Tue Dec 08 11:46:13 2009 -0500 @@ -45,7 +45,6 @@ newdata.info = "The requested file is missing from the system." newdata.state = newdata.states.ERROR newdata.dbkey = dbkey - newdata.datatype.before_setting_metadata( newdata ) newdata.init_meta() newdata.set_meta() newdata.set_peek()