[hg] galaxy 3025: Fixes, cleanup and new functional tests for da...

16 Nov 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/23c1b9799bce
changeset: 3025:23c1b9799bce
user:      Greg Von Kuster <greg@bx.psu.edu>
date:      Fri Nov 13 14:13:03 2009 -0500
description:
Fixes, cleanup and new functional tests for data  types and upload:
- moved all supported binary data types to the new binary.py
- changed GeneTrack data type to subclass from Text rather than Binary
- added Sff data type to datatypes_conf.xml.sample
- merged test_sniffinad_and_metadata_settings.py test scritp into test_get_data.py
- added several additional functional test for data types to test_get_data.py
- fixed some bugs in upload.py when uploading binary data types

diffstat:

 datatypes_conf.xml.sample                              |   12 +-
 lib/galaxy/datatypes/binary.py                         |  156 +++++
 lib/galaxy/datatypes/data.py                           |   88 +--
 lib/galaxy/datatypes/genetics.py                       |   69 +--
 lib/galaxy/datatypes/images.py                         |  120 ----
 lib/galaxy/datatypes/registry.py                       |   16 +-
 lib/galaxy/datatypes/tracks.py                         |    8 +-
 test/functional/test_get_data.py                       |  620 +++++++++++++++++++---
 test/functional/test_sniffing_and_metadata_settings.py |  262 ---------
 tools/data_source/upload.py                            |   45 +-
 10 files changed, 748 insertions(+), 648 deletions(-)

diffs (1914 lines):

diff -r 4f9b630f9976 -r 23c1b9799bce datatypes_conf.xml.sample

--- a/datatypes_conf.xml.sample	Fri Nov 13 10:39:15 2009 -0500
+++ b/datatypes_conf.xml.sample	Fri Nov 13 14:13:03 2009 -0500
@@ -1,15 +1,15 @@
 <?xml version="1.0"?>
 <datatypes>
   <registration converters_path="lib/galaxy/datatypes/converters">
-    <datatype extension="ab1" type="galaxy.datatypes.images:Ab1" mimetype="application/octet-stream" display_in_upload="true"/>
+    <datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true"/>
     <datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true"/>
-    <datatype extension="bam" type="galaxy.datatypes.images:Bam" mimetype="application/octet-stream"/>
+    <datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream"/>
     <datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true">
       <converter file="bed_to_gff_converter.xml" target_datatype="gff"/>
       <converter file="interval_to_coverage.xml" target_datatype="coverage"/>
       <converter file="bed_to_interval_index_converter.xml" target_datatype="interval_index"/>
     </datatype>
-    <datatype extension="binseq.zip" type="galaxy.datatypes.images:Binseq" mimetype="application/zip" display_in_upload="true"/>
+    <datatype extension="binseq.zip" type="galaxy.datatypes.binary:Binseq" mimetype="application/zip" display_in_upload="true"/>
     <datatype extension="len" type="galaxy.datatypes.chrominfo:ChromInfo" display_in_upload="true">
       <!-- no converters yet -->
     </datatype>
@@ -49,12 +49,13 @@
     <datatype extension="qualsolid" type="galaxy.datatypes.qualityscore:QualityScoreSOLiD" display_in_upload="true"/>
     <datatype extension="qual454" type="galaxy.datatypes.qualityscore:QualityScore454" display_in_upload="true"/>
     <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"/>
-    <datatype extension="scf" type="galaxy.datatypes.images:Scf" mimetype="application/octet-stream" display_in_upload="true"/>
+    <datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/>
+    <datatype extension="sff" type="galaxy.datatypes.binary:Sff" mimetype="application/octet-stream" display_in_upload="true"/>
     <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/>
     <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/>
     <datatype extension="txt" type="galaxy.datatypes.data:Text" display_in_upload="true"/>
     <datatype extension="blastxml" type="galaxy.datatypes.xml:BlastXml" display_in_upload="true"/>
-    <datatype extension="txtseq.zip" type="galaxy.datatypes.images:Txtseq" mimetype="application/zip" display_in_upload="true"/>
+    <datatype extension="txtseq.zip" type="galaxy.datatypes.data:Txtseq" mimetype="application/zip" display_in_upload="true"/>
     <datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true">
       <converter file="wiggle_to_array_tree_converter.xml" target_datatype="array_tree"/>
     </datatype>
@@ -190,6 +191,7 @@
       defined format first, followed by next-most rigidly defined, 
       and so on.
     -->
+    <sniffer type="galaxy.datatypes.binary:Sff"/>
     <sniffer type="galaxy.datatypes.xml:BlastXml"/>
     <sniffer type="galaxy.datatypes.sequence:Maf"/>
     <sniffer type="galaxy.datatypes.sequence:Lav"/>
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/binary.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/binary.py	Fri Nov 13 14:13:03 2009 -0500
@@ -0,0 +1,156 @@
+"""
+Binary classes
+"""
+
+import data, logging, binascii
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+from galaxy.datatypes.sniff import *
+from urllib import urlencode, quote_plus
+import zipfile
+import os, subprocess, tempfile
+
+log = logging.getLogger(__name__)
+
+sniffable_binary_formats = [ 'sff' ]
+# Currently these supported binary data types must be manually set on upload
+unsniffable_binary_formats = [ 'ab1', 'scf' ]
+
+class Binary( data.Data ):
+    """Binary data"""
+    def set_peek( self, dataset ):
+        """Set the peek and blurb text"""
+        if not dataset.dataset.purged:
+            dataset.peek = 'binary data'
+            dataset.blurb = 'data'
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+class Ab1( Binary ):
+    """Class describing an ab1 binary sequence file"""
+    file_ext = "ab1"
+    def set_peek( self, dataset ):
+        if not dataset.dataset.purged:
+            export_url = "/history_add_to?" + urlencode( {'history_id':dataset.history_id,'ext':'ab1','name':'ab1 sequence','info':'Sequence file','dbkey':dataset.dbkey} )
+            dataset.peek  = "Binary ab1 sequence file"
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+class Bam( Binary ):
+    """Class describing a BAM binary file"""
+    file_ext = "bam"
+    MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )    
+    def init_meta( self, dataset, copy_from=None ):
+        Binary.init_meta( self, dataset, copy_from=copy_from )
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        """
+        Sets index for BAM file.
+        """
+        index_file = dataset.metadata.bam_index
+        if not index_file:
+            index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset )
+        tmp_dir = tempfile.gettempdir()
+        tmpf1 = tempfile.NamedTemporaryFile( dir=tmp_dir )
+        tmpf1bai = '%s.bai' % tmpf1.name
+        try:
+            os.system( 'cd %s' % tmp_dir )
+            os.system( 'cp %s %s' % ( dataset.file_name, tmpf1.name ) )
+            os.system( 'samtools index %s' % tmpf1.name )
+            os.system( 'cp %s %s' % ( tmpf1bai, index_file.file_name ) )
+        except Exception, ex:
+            sys.stderr.write( 'There was a problem creating the index for the BAM file\n%s\n' + str( ex ) )
+        tmpf1.close()
+        if os.path.exists( tmpf1bai ):
+            os.remove( tmpf1bai )
+        dataset.metadata.bam_index = index_file
+    def set_peek( self, dataset ):
+        if not dataset.dataset.purged:
+            export_url = "/history_add_to?" + urlencode( {'history_id':dataset.history_id,'ext':'bam','name':'bam alignments','info':'Alignments file','dbkey':dataset.dbkey} )
+            dataset.peek  = "Binary bam alignments file" 
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
+    def get_mime( self ):
+        """Returns the mime type of the datatype"""
+        return 'application/octet-stream'
+
+class Binseq( Binary ):
+    """Class describing a zip archive of binary sequence files"""
+    file_ext = "binseq.zip"
+    def set_peek( self, dataset ):
+        if not dataset.dataset.purged:
+            zip_file = zipfile.ZipFile( dataset.file_name, "r" )
+            num_files = len( zip_file.namelist() )
+            dataset.peek  = "Archive of %s binary sequence files" % ( str( num_files ) )
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) )
+    def get_mime( self ):
+        """Returns the mime type of the datatype"""
+        return 'application/zip'
+
+class Scf( Binary ):
+    """Class describing an scf binary sequence file"""
+    file_ext = "scf"
+    def set_peek( self, dataset ):
+        if not dataset.dataset.purged:
+            export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'scf','name':'scf sequence','info':'Sequence file','dbkey':dataset.dbkey})
+            dataset.peek  = "Binary scf sequence file" 
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+class Sff( Binary ):
+    """ Standard Flowgram Format (SFF) """
+    file_ext = "sff"
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+    def sniff( self, filename ):
+        # The first 4 bytes of any sff file is '.sff', and the file is binary. For details
+        # about the format, see http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format
+        try:
+            header = open( filename ).read(4)
+            if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ):
+                return True
+            return False
+        except Exception, e:
+            return False
+    def set_peek( self, dataset ):
+        if not dataset.dataset.purged:
+            export_url = "/history_add_to?" + urlencode( {'history_id':dataset.history_id,'ext':'sff','name':'sff file','info':'sff file','dbkey':dataset.dbkey} )
+            dataset.peek  = "Binary sff file" 
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary sff file (%s)" % ( data.nice_size( dataset.get_size() ) )
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/lib/galaxy/datatypes/data.py	Fri Nov 13 14:13:03 2009 -0500
@@ -1,4 +1,4 @@
-import logging, os, sys, time, tempfile, binascii
+import logging, os, sys, time, tempfile
 from galaxy import util
 from galaxy.util.odict import odict
 from galaxy.util.bunch import Bunch
@@ -40,20 +40,18 @@
     
     """
     __metaclass__ = DataMeta
-    
-    """Add metadata elements"""
+    # Add metadata elements
     MetadataElement( name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?" )
-    
-    """Stores the set of display applications, and viewing methods, supported by this datatype """
+    # Stores the set of display applications, and viewing methods, supported by this datatype
     supported_display_apps = {}
-    
-    """If False, the peek is regenerated whenever a dataset of this type is copied"""
+    # If False, the peek is regenerated whenever a dataset of this type is copied
     copy_safe_peek = True
-    
-    is_binary = True #The dataset contains binary data --> do not space_to_tab or convert newlines, etc. Allow binary file uploads of this type when True.
-    
-    allow_datatype_change = True #Allow user to change between this datatype and others. If False, this datatype cannot be changed from or into.
-    
+    # The dataset contains binary data --> do not space_to_tab or convert newlines, etc.
+    # Allow binary file uploads of this type when True.
+    is_binary = True
+    # Allow user to change between this datatype and others. If False, this datatype
+    # cannot be changed from or into.
+    allow_datatype_change = True
     #Composite datatypes
     composite_type = None
     composite_files = odict()
@@ -270,8 +268,6 @@
     def add_composite_file( self, name, **kwds ):
         #self.composite_files = self.composite_files.copy()
         self.composite_files[ name ] = self.__new_composite_file( name, **kwds )
-        
-    
     def __substitute_composite_key( self, key, composite_file, dataset = None ):
         if composite_file.substitute_name_with_metadata:
             if dataset:
@@ -303,7 +299,6 @@
         return files
     def generate_auto_primary_file( self, dataset = None ):
         raise Exception( "generate_auto_primary_file is not implemented for this datatype." )
-    
     @property
     def has_resolution(self):
         return False
@@ -364,23 +359,37 @@
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
 
-class Binary( Data ):
-    """Binary data"""
+class Txtseq( Data ):
+    """Class describing a zip archive of text sequence files"""
+    file_ext = "txtseq.zip"
     def set_peek( self, dataset ):
-        """Set the peek and blurb text"""
         if not dataset.dataset.purged:
-            dataset.peek = 'binary data'
-            dataset.blurb = 'data'
+            zip_file = zipfile.ZipFile( dataset.file_name, "r" )
+            num_files = len( zip_file.namelist() )
+            dataset.peek  = "Archive of %s text sequence files" % ( str( num_files ) )
+            dataset.blurb = data.nice_size( dataset.get_size() )
         else:
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
+    def display_peek(self, dataset):
+        try:
+            return dataset.peek
+        except:
+            return "Text sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) )
+    def get_mime(self):
+        """Returns the mime type of the datatype"""
+        return 'application/zip'
+
+class Newick( Text ):
+    pass
+
+# ------------- Utility methods --------------
 
 def get_test_fname( fname ):
     """Returns test data filename"""
     path, name = os.path.split(__file__)
     full_path = os.path.join( path, 'test', fname )
     return full_path
-
 def nice_size(size):
     """
     Returns a readably formatted string with the size
@@ -406,7 +415,6 @@
             out  = "%.1f %s" % (size, word)
             return out
     return '??? bytes'
-
 def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ):
     """
     Returns the first LINE_COUNT lines wrapped to WIDTH
@@ -443,7 +451,6 @@
     else:
         text = unicode( '\n'.join( lines ), 'utf-8' )
     return text
-
 def get_line_count(file_name):
     """Returns the number of lines in a file that are neither null nor comments"""
     count = 0
@@ -452,38 +459,3 @@
         if line and line[0] != '#':
             count += 1
     return count
-
-class Newick( Text ):
-    pass
-
-class Sff( Binary ):
-    """ Standard Flowgram Format (SFF) """
-    file_ext = "sff"
-    def __init__( self, **kwd ):
-        Binary.__init__(self, **kwd)
-    def init_meta( self, dataset, copy_from=None ):
-        Binary.init_meta( self, dataset, copy_from=copy_from )
-    def sniff( self, filename ):
-        '''
-        The first 4 bytes of any sff file is '.sff'
-        
-        >>> fname = get_test_fname( '1.sff' )
-        >>> Sff().sniff( fname )
-        True
-        '''
-        header = open( filename ).read(4)
-        if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ):
-            return True
-        return False
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            dataset.peek  = "Binary sff file" 
-            dataset.blurb = nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "sff file (%s)" % ( nice_size( dataset.get_size() ) )
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/genetics.py
--- a/lib/galaxy/datatypes/genetics.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/lib/galaxy/datatypes/genetics.py	Fri Nov 13 14:13:03 2009 -0500
@@ -48,10 +48,8 @@
         """Initialize datatype, by adding GBrowse display app"""
         Interval.__init__(self, **kwd)
         self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' )
-
     def as_ucsc_display_file( self, dataset, **kwd ):
         return open( dataset.file_name )
-
     def set_meta( self, dataset, overwrite = True, **kwd ):
         i = 0
         for i, line in enumerate( file ( dataset.file_name ) ):
@@ -66,7 +64,6 @@
                     except:
                         pass
         Interval.set_meta( self, dataset, overwrite = overwrite, skip = i )
-        
     def make_html_table( self, dataset, skipchars=[] ):
         """Create HTML table, used for displaying peek"""
         out = ['<table cellspacing="0" cellpadding="3">']
@@ -82,7 +79,6 @@
         except Exception, exc:
             out = "Can't create peek %s" % exc
         return out
-        
     def get_estimated_display_viewport( self, dataset ):
         """
         Return a chrom, start, stop tuple for viewing a file.  There are slight differences between gff 2 and gff 3
@@ -118,7 +114,6 @@
             return ( seqid, str( start ), str( stop ) )
         else:
             return ( '', '', '' )
-        
     def gbrowse_links( self, dataset, type, app, base_url ):
         ret_val = []
         if dataset.has_data:
@@ -132,7 +127,6 @@
                         link = "%s?start=%s&stop=%s&ref=%s&dbkey=%s" % ( site_url, start, stop, seqid, dataset.dbkey )
                         ret_val.append( ( site_name, link ) )
         return ret_val
-    
     def ucsc_links( self, dataset, type, app, base_url ):
         ret_val = []
         if dataset.has_data:
@@ -162,8 +156,6 @@
             else:
                 gal_Log.debug('@@@ gg ucsc_links - no viewport_tuple')
         return ret_val
-
-
     def sniff( self, filename ):
         """
         Determines whether the file is in gff format
@@ -202,20 +194,16 @@
         except:
             return False
 
-    
-
 class rgTabList(Tabular):
     """ for sampleid and for featureid lists of exclusions or inclusions in the clean tool
     featureid subsets on statistical criteria -> specialized display such as gg
     """    
     file_ext = "rgTList"
 
-
     def __init__(self, **kwd):
         """Initialize featurelistt datatype"""
         Tabular.__init__( self, **kwd )
         self.column_names = []
-
     def make_html_table( self, dataset, skipchars=[] ):
         """Create HTML table, used for displaying peek"""
         out = ['<table cellspacing="0" cellpadding="3">']
@@ -236,7 +224,6 @@
             out = "Can't create peek %s" % exc
         return out
 
-
 class rgSampleList(rgTabList):
     """ for sampleid exclusions or inclusions in the clean tool
         output from QC eg excess het, gender error, ibd pair member,eigen outlier,excess mendel errors,...
@@ -252,7 +239,6 @@
         self.column_names[0] = 'FID'
         self.column_names[1] = 'IID'
         # this is what Plink wants as at 2009
-   
     def sniff(self,filename):
         """
         """
@@ -276,26 +262,22 @@
         rgTabList.__init__( self, **kwd )
         for i,s in enumerate(['#FeatureId', 'Chr', 'Genpos', 'Mappos']):
             self.column_names[i] = s
- 
 
 class Rgenetics(Html):      
     """class to use for rgenetics"""
    
-    MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="rgenetics", 
-    readonly=True, set_in_upload=True)
+    MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default="rgenetics", readonly=True, set_in_upload=True)
     
     composite_type = 'auto_primary_file'
     allow_datatype_change = False
     file_ext = 'rgenetics'
 
-
     def missing_meta( self, dataset=None, **kwargs):
         """Checks for empty meta values"""
         for key, value in dataset.metadata.items():
             if not value:
                 return True
         return False
-
     def generate_primary_file( self, dataset = None ):
         rval = ['<html><head><title>Rgenetics Galaxy Composite Dataset </title></head><p/>']
         rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
@@ -306,7 +288,6 @@
             rval.append( '<li><a href="%s" type="application/binary">%s</a>%s' % ( composite_name, composite_name, opt_text ) )
         rval.append( '</ul></div></html>' )
         return "\n".join( rval )
-
     def regenerate_primary_file(self,dataset):
         """cannot do this until we are setting metadata 
         """
@@ -332,12 +313,8 @@
         f.write("\n".join( rval ))
         f.write('\n')
         f.close()
-
     def set_meta( self, dataset, **kwd ):
-
-        """for lped/pbed eg
-
-        """
+        """for lped/pbed eg"""
         if kwd.get('overwrite') == False:
             if verbose:
                 gal_Log.debug('@@@ rgenetics set_meta called with overwrite = False')
@@ -349,9 +326,10 @@
                gal_Log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0], dataset.name))
             return False
         try:
-	     flist = os.listdir(efp)
-	except:
-            if verbose: gal_Log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name))
+            flist = os.listdir(efp)
+        except:
+            if verbose:
+                gal_Log.debug('@@@rgenetics set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name))
             return False
         if len(flist) == 0:
             if verbose:
@@ -372,7 +350,6 @@
                dataset.blurb = 'Composite file - Rgenetics Galaxy toolkit'
         return True
 
-
 class SNPMatrix(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -385,7 +362,6 @@
         else:
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
-            
     def sniff(self,filename):
         """ need to check the file header hex code
         """
@@ -397,7 +373,6 @@
         else:
             return True
 
-
 class Lped(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -408,7 +383,6 @@
         self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = True )
         self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = True )
 
-
 class Pphe(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -418,7 +392,6 @@
         Rgenetics.__init__(self, **kwd)
         self.add_composite_file( '%s.pphe', description = 'Plink Phenotype File', substitute_name_with_metadata = 'base_name' )
 
-
 class Lmap(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -442,8 +415,6 @@
         Rgenetics.__init__(self, **kwd)
         self.add_composite_file( '%s.phe', description = 'Phenotype File', substitute_name_with_metadata = 'base_name' )
 
-
-
 class Fped(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -453,7 +424,6 @@
         Rgenetics.__init__(self, **kwd)
         self.add_composite_file( '%s.fped', description = 'FBAT format pedfile', substitute_name_with_metadata = 'base_name' )
 
-
 class Pbed(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
@@ -475,8 +445,6 @@
         self.add_composite_file( '%s.eigenstratgeno', substitute_name_with_metadata = 'base_name', is_binary = True )
         self.add_composite_file( '%s.ind', substitute_name_with_metadata = 'base_name', is_binary = True )
         self.add_composite_file( '%s.map', substitute_name_with_metadata = 'base_name', is_binary = True )
-        
-
 
 class Eigenstratpca(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
@@ -487,20 +455,17 @@
         Rgenetics.__init__(self, **kwd)
         self.add_composite_file( '%s.eigenstratpca', description = 'Eigenstrat PCA file', substitute_name_with_metadata = 'base_name' )
 
-
 class Snptest(Rgenetics):
     """fake class to distinguish different species of Rgenetics data collections
     """
     file_ext="snptest"
 
-
 class Pheno(Tabular):
     """
     base class for pheno files
     """
     file_ext = 'pheno'
 
-
 class RexpBase( Html ):
     """base class for BioC data structures in Galaxy 
     must be constructed with the pheno data in place since that
@@ -518,18 +483,15 @@
     composite_type = 'auto_primary_file'
     allow_datatype_change = False
     
-    
     def __init__( self, **kwd ):
         Html.__init__(self,**kwd)
         self.add_composite_file( '%s.pheno', description = 'Phenodata tab text file', 
           substitute_name_with_metadata = 'base_name', is_binary=True)
-
     def generate_primary_file( self, dataset = None ):
         """ This is called only at upload to write the html file
         cannot rename the datasets here - they come with the default unfortunately
         """
         return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>'
-    
     def get_phecols(self, phenolist=[], maxConc=20):
         """
         sept 2009: cannot use whitespace to split - make a more complex structure here
@@ -601,8 +563,6 @@
             res = [('no usable phenotype columns found',[('?',0),]),]     
         return res
 
-    
-
     def get_pheno(self,dataset):
         """expects a .pheno file in the extra_files_dir - ugh
         note that R is wierd and adds the row.name in
@@ -620,7 +580,6 @@
         else:
             p = []
         return '\n'.join(p)
-
     def set_peek( self, dataset ):
         """expects a .pheno file in the extra_files_dir - ugh
         note that R is wierd and does not include the row.name in
@@ -636,7 +595,6 @@
         else:
             dataset.peek = 'file does not exist\n'
             dataset.blurb = 'file purged from disk'
-
     def get_peek( self, dataset ):
         """expects a .pheno file in the extra_files_dir - ugh
         """
@@ -646,7 +604,6 @@
         except:
             p = ['##failed to find %s' % pp]
         return ''.join(p[:5])
-
     def get_file_peek(self,filename):
         """
         can't really peek at a filename - need the extra_files_path and such?
@@ -657,7 +614,6 @@
         except:
             pass
         return ''.join(h[:5])
-
     def regenerate_primary_file(self,dataset):
         """cannot do this until we are setting metadata 
         """
@@ -672,18 +628,14 @@
         f.write("\n".join( rval ))
         f.write('\n')
         f.close()
-
-    """Add metadata elements"""
     def init_meta( self, dataset, copy_from=None ):
+        """Add metadata elements"""
         if copy_from:
             dataset.metadata = copy_from.metadata     
-
     def set_meta( self, dataset, **kwd ):         
-
         """
         NOTE we apply the tabular machinary to the phenodata extracted
         from a BioC eSet or affybatch.
-
         """
         try:
             flist = os.listdir(dataset.extra_files_path)
@@ -727,7 +679,6 @@
         if not dataset.blurb:
                dataset.blurb = 'R loadable BioC expression object for the Rexpression Galaxy toolkit'
         return True
-            
     def make_html_table( self, pp='nothing supplied from peek\n'):
         """Create HTML table, used for displaying peek"""
         out = ['<table cellspacing="0" cellpadding="3">',]
@@ -750,16 +701,13 @@
         except Exception, exc:
             out = "Can't create html table %s" % str( exc )
         return out
-    
     def display_peek( self, dataset ):
         """Returns formatted html of peek"""
         out=self.make_html_table(dataset.peek)
         return out
-    
     def get_mime(self):
         """Returns the mime type of the datatype"""
         return 'text/html'
-    
 
 class Affybatch( RexpBase ):
     """derived class for BioC data structures in Galaxy """
@@ -790,9 +738,6 @@
         self.add_composite_file( '%s.malist', description = 'MAlist R object saved to file', 
         substitute_name_with_metadata = 'base_name', is_binary = True )
 
-
 if __name__ == '__main__':
     import doctest, sys
     doctest.testmod(sys.modules[__name__])
-
-
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/images.py
--- a/lib/galaxy/datatypes/images.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/lib/galaxy/datatypes/images.py	Fri Nov 13 14:13:03 2009 -0500
@@ -13,82 +13,6 @@
 
 log = logging.getLogger(__name__)
 
-class Ab1( data.Data ):
-    """Class describing an ab1 binary sequence file"""
-    file_ext = "ab1"
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'ab1','name':'ab1 sequence','info':'Sequence file','dbkey':dataset.dbkey})
-            dataset.peek  = "Binary ab1 sequence file"
-            dataset.blurb = data.nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "Binary ab1 sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
-
-class Scf( data.Data ):
-    """Class describing an scf binary sequence file"""
-    file_ext = "scf"
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'scf','name':'scf sequence','info':'Sequence file','dbkey':dataset.dbkey})
-            dataset.peek  = "Binary scf sequence file" 
-            dataset.blurb = data.nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "Binary scf sequence file (%s)" % ( data.nice_size( dataset.get_size() ) )
-
-class Binseq( data.Data ):
-    """Class describing a zip archive of binary sequence files"""
-    file_ext = "binseq.zip"
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            zip_file = zipfile.ZipFile( dataset.file_name, "r" )
-            num_files = len( zip_file.namelist() )
-            dataset.peek  = "Archive of %s binary sequence files" % ( str( num_files ) )
-            dataset.blurb = data.nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "Binary sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) )
-    def get_mime(self):
-        """Returns the mime type of the datatype"""
-        return 'application/zip'
-
-class Txtseq( data.Data ):
-    """Class describing a zip archive of text sequence files"""
-    file_ext = "txtseq.zip"
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            zip_file = zipfile.ZipFile( dataset.file_name, "r" )
-            num_files = len( zip_file.namelist() )
-            dataset.peek  = "Archive of %s text sequence files" % ( str( num_files ) )
-            dataset.blurb = data.nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "Text sequence file archive (%s)" % ( data.nice_size( dataset.get_size() ) )
-    def get_mime(self):
-        """Returns the mime type of the datatype"""
-        return 'application/zip'
-
 class Image( data.Data ):
     """Class describing an image"""
     def set_peek( self, dataset ):
@@ -236,47 +160,3 @@
             return dataset.peek
         except:
             return "peek unavailable"
-
-class Bam( data.Binary ):
-    """Class describing a BAM binary file"""
-    file_ext = "bam"
-    MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )    
-    def init_meta( self, dataset, copy_from=None ):
-        data.Binary.init_meta( self, dataset, copy_from=copy_from )
-    def set_meta( self, dataset, overwrite = True, **kwd ):
-        """
-        Sets index for BAM file.
-        """
-        index_file = dataset.metadata.bam_index
-        if not index_file:
-            index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset )
-        tmp_dir = tempfile.gettempdir()
-        tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir)
-        tmpf1bai = '%s.bai' % tmpf1.name
-        try:
-            os.system('cd %s' % tmp_dir)
-            os.system('cp %s %s' % (dataset.file_name, tmpf1.name))
-            os.system('samtools index %s' % tmpf1.name)
-            os.system('cp %s %s' % (tmpf1bai, index_file.file_name))
-        except Exception, ex:
-            sys.stderr.write('There was a problem creating the index for the BAM file\n%s\n' + str(ex))
-        tmpf1.close()
-        if os.path.exists(tmpf1bai):
-            os.remove(tmpf1bai)
-        dataset.metadata.bam_index = index_file
-    def set_peek( self, dataset ):
-        if not dataset.dataset.purged:
-            export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'bam','name':'bam alignments','info':'Alignments file','dbkey':dataset.dbkey})
-            dataset.peek  = "Binary bam alignments file" 
-            dataset.blurb = data.nice_size( dataset.get_size() )
-        else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
-    def display_peek(self, dataset):
-        try:
-            return dataset.peek
-        except:
-            return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
-    def get_mime(self):
-        """Returns the mime type of the datatype"""
-        return 'application/octet-stream'
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/registry.py
--- a/lib/galaxy/datatypes/registry.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/lib/galaxy/datatypes/registry.py	Fri Nov 13 14:13:03 2009 -0500
@@ -3,7 +3,7 @@
 """
 import os, tempfile
 import logging
-import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo
+import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary
 import galaxy.util
 from galaxy.util.odict import odict
 
@@ -109,11 +109,11 @@
         #default values
         if len(self.datatypes_by_extension) < 1:
             self.datatypes_by_extension = { 
-                'ab1'         : images.Ab1(),
+                'ab1'         : binary.Ab1(),
                 'axt'         : sequence.Axt(),
-                'bam'         : images.Bam(),
+                'bam'         : binary.Bam(),
                 'bed'         : interval.Bed(), 
-                'binseq.zip'  : images.Binseq(),
+                'binseq.zip'  : binary.Binseq(),
                 'blastxml'    : xml.BlastXml(),
                 'coverage'    : coverage.LastzCoverage(),
                 'customtrack' : interval.CustomTrack(),
@@ -132,12 +132,12 @@
                 'qualsolexa'  : qualityscore.QualityScoreSolexa(),
                 'qual454'     : qualityscore.QualityScore454(),
                 'sam'         : tabular.Sam(), 
-                'scf'         : images.Scf(),
-                'sff'         : data.Sff(),
+                'scf'         : binary.Scf(),
+                'sff'         : binary.Sff(),
                 'tabular'     : tabular.Tabular(),
                 'taxonomy'    : tabular.Taxonomy(),
                 'txt'         : data.Text(),
-                'txtseq.zip'  : images.Txtseq(),
+                'txtseq.zip'  : data.Txtseq(),
                 'wig'         : interval.Wiggle()
             }
             self.mimetypes_by_extension = { 
@@ -174,7 +174,7 @@
         # because some formats are much more flexibly defined than others.
         if len(self.sniff_order) < 1:
             self.sniff_order = [
-                data.Sff(),
+                binary.Sff(),
                 xml.BlastXml(),
                 sequence.Maf(),
                 sequence.Lav(),
diff -r 4f9b630f9976 -r 23c1b9799bce lib/galaxy/datatypes/tracks.py
--- a/lib/galaxy/datatypes/tracks.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/lib/galaxy/datatypes/tracks.py	Fri Nov 13 14:13:03 2009 -0500
@@ -2,11 +2,7 @@
 Datatype classes for tracks/track views within galaxy.
 """
 
-import data
-import logging
-import re
-import binascii
-from cgi import escape
+import tabular, binascii, logging
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 import galaxy.model
@@ -17,7 +13,7 @@
 
 log = logging.getLogger(__name__)
 
-class GeneTrack( data.Binary ):
+class GeneTrack( tabular.Tabular ):
     file_ext = "genetrack"
     
     MetadataElement( name="genetrack", default="data.genetrack", desc="HDF index", readonly=True, visible=True, no_value=0 )
diff -r 4f9b630f9976 -r 23c1b9799bce test/functional/test_get_data.py
--- a/test/functional/test_get_data.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/test/functional/test_get_data.py	Fri Nov 13 14:13:03 2009 -0500
@@ -4,128 +4,538 @@
 from base.twilltestcase import TwillTestCase
 
 class UploadData( TwillTestCase ):
-    def test_000_upload_files_from_disk( self ):
-        """Test uploading data files from disk"""
+    def test_0005_upload_file( self ):
+        """Test uploading 1.bed, NOT setting the file format"""
         self.logout()
         self.login( email='test@bx.psu.edu' )
         global admin_user
         admin_user = sa_session.query( galaxy.model.User ) \
                                .filter( galaxy.model.User.table.c.email=='test@bx.psu.edu' ) \
                                .one()
-        history1 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
         self.upload_file( '1.bed' )
-        hda1 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda1 is not None, "Problem retrieving hda1 from database"
-        self.verify_dataset_correctness( '1.bed', hid=str( hda1.hid ) )
-        self.upload_file( '2.bed', dbkey='hg17' )
-        hda2 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda2 is not None, "Problem retrieving hda2 from database"
-        self.verify_dataset_correctness( '2.bed', hid=str( hda2.hid ) )
-        self.upload_file( '3.bed', dbkey='hg17', ftype='bed' )
-        hda3 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda3 is not None, "Problem retrieving hda3 from database"
-        self.verify_dataset_correctness( '3.bed', hid=str( hda3.hid ) )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.bed', hid=str( hda.hid ) )
+        self.check_history_for_string( "<th>1.Chrom</th><th>2.Start</th><th>3.End</th>" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0010_upload_file( self ):
+        """Test uploading 4.bed.gz, manually setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
         self.upload_file( '4.bed.gz', dbkey='hg17', ftype='bed' )
-        hda4 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda4 is not None, "Problem retrieving hda4 from database"
-        self.verify_dataset_correctness( '4.bed', hid=str( hda4.hid ) )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '4.bed', hid=str( hda.hid ) )
+        self.check_history_for_string( "<th>1.Chrom</th><th>2.Start</th><th>3.End</th>" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0015_upload_file( self ):
+        """Test uploading 1.scf, manually setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
         self.upload_file( '1.scf', ftype='scf' )
-        hda5 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda5 is not None, "Problem retrieving hda5 from database"
-        self.verify_dataset_correctness( '1.scf', hid=str( hda5.hid ) )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.scf', hid=str( hda.hid ) )
+        self.check_history_for_string( "Binary scf sequence file</pre>" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0020_upload_file( self ):
+        """Test uploading 1.scf, NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.scf' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( "File Format' to 'Scf' when uploading scf files" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0025_upload_file( self ):
+        """Test uploading 1.scf.zip, manually setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
         self.upload_file( '1.scf.zip', ftype='binseq.zip' )
-        hda6 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda6 is not None, "Problem retrieving hda6 from database"
-        self.verify_dataset_correctness( '1.scf.zip', hid=str( hda6.hid ) )
-        self.delete_history( id=self.security.encode_id( history1.id ) )
-    def test_005_url_paste( self ):
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.scf.zip', hid=str( hda.hid ) )
+        self.check_history_for_string( "Archive of 1 binary sequence files</pre>" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0030_upload_file( self ):
+        """Test uploading 1.scf.zip, NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.scf.zip' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( "'File Format' for archive consisting of binary files - use 'Binseq.zip'" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0035_upload_file( self ):
+        """Test uploading 1.sam NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.sam' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.sam', hid=str( hda.hid ) )
+        self.check_history_for_string( "<th>1.QNAME</th><th>2.FLAG</th><th>3.RNAME</th><th>4.POS</th>" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0040_upload_file( self ):
+        """Test uploading 1.sff, NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.sff' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.sff', hid=str( hda.hid ) )
+        self.check_history_for_string( 'format: <span class="sff">sff' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0045_upload_file( self ):
+        """Test uploading 454Score.pdf, NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '454Score.pdf' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( "The uploaded file contains inappropriate content" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0050_upload_file( self ):
+        """Test uploading 454Score.png, NOT setting the file format"""
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '454Score.png' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( "The uploaded file contains inappropriate content" )
+    def test_0055_upload_file( self ):
+        """Test uploading lped composite datatype file, manually setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        # lped data types include a ped_file and a map_file ( which is binary )
+        self.upload_composite_datatype_file( 'lped', ped_file='tinywga.ped', map_file='tinywga.map', base_name='rgenetics' )
+        # Get the latest hid for testing
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        # We'll test against the resulting ped file and map file for correctness
+        self.verify_composite_datatype_file_content( 'rgenetics.ped', str( hda.id ) )
+        self.verify_composite_datatype_file_content( 'rgenetics.map', str( hda.id ) )
+        self.check_history_for_string( "Uploaded Composite Dataset (lped)" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0060_upload_file( self ):
+        """Test uploading pbed composite datatype file, manually setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        # pbed data types include a bim_file, a bed_file and a fam_file
+        self.upload_composite_datatype_file( 'pbed', bim_file='tinywga.bim', bed_file='tinywga.bed', fam_file='tinywga.fam', base_name='rgenetics' )
+        # Get the latest hid for testing
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        # We'll test against the resulting ped file and map file for correctness
+        self.verify_composite_datatype_file_content( 'rgenetics.bim', str( hda.id ) )
+        self.verify_composite_datatype_file_content( 'rgenetics.bed', str( hda.id ) )
+        self.verify_composite_datatype_file_content( 'rgenetics.fam', str( hda.id ) )
+        self.check_history_for_string( "Uploaded Composite Dataset (pbed)" )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0065_upload_file( self ):
+        """Test uploading asian_chars_1.txt, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'asian_chars_1.txt' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( 'asian_chars_1.txt', hid=str( hda.hid ) )
+        self.check_history_for_string( 'uploaded multi-byte char file' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0070_upload_file( self ):
+        """Test uploading 2gen.fastq, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '2gen.fastq' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '2gen.fastq', hid=str( hda.hid ) )
+        self.check_history_for_string( '2gen.fastq format: <span class="fastq">fastq</span>, database: \? Info: uploaded fastq file' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0075_upload_file( self ):
+        """Test uploading 1.wig, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.wig' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.wig', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.wig format: <span class="wig">wig</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.wig" value="\?"' )
+        self.check_metadata_for_string( 'Change data type selected value="wig" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0080_upload_file( self ):
+        """Test uploading 1.tabular, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.tabular' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.tabular', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.tabular format: <span class="tabular">tabular</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.tabular" value="\?"' )
+        self.check_metadata_for_string( 'Change data type selected value="tabular" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0085_upload_file( self ):
+        """Test uploading qualscores.qualsolid, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'qualscores.qualsolid' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( 'qualscores.qualsolid', hid=str( hda.hid ) )
+        self.check_history_for_string( '2.5 Kb, format: <span class="qualsolid">qualsolid</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'Change data type value="qualsolid" selected="yes">qualsolid' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0090_upload_file( self ):
+        """Test uploading qualscores.qual454, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'qualscores.qual454' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( 'qualscores.qual454', hid=str( hda.hid ) )
+        self.check_history_for_string( '5.6 Kb, format: <span class="qual454">qual454</span>, database: \?' )
+        self.check_metadata_for_string( 'Change data type value="qual454" selected="yes">qual454' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0095_upload_file( self ):
+        """Test uploading 3.maf, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '3.maf' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '3.maf', hid=str( hda.hid ) )
+        self.check_history_for_string( '3.maf format: <span class="maf">maf</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="3.maf" value="\?"' )
+        self.check_metadata_for_string( 'Convert to new format <option value="interval">Convert MAF to Genomic Intervals <option value="fasta">Convert MAF to Fasta' )
+        self.check_metadata_for_string( 'Change data type selected value="maf" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0100_upload_file( self ):
+        """Test uploading 1.lav, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.lav' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.lav', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.lav format: <span class="lav">lav</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.lav" value="\?"' )
+        self.check_metadata_for_string( 'Change data type selected value="lav" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0105_upload_file( self ):
+        """Test uploading 1.interval, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.interval' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.interval', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.interval format: <span class="interval">interval</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.interval" value="\?"' )
+        self.check_metadata_for_string( 'Chrom column: <option value="1" selected> Start column: <option value="2" selected>' )
+        self.check_metadata_for_string( 'End column: <option value="3" selected> Strand column <option value="6" selected>' )
+        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert Genomic Intervals To BED' )
+        self.check_metadata_for_string( 'Change data type selected value="interval" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0110_upload_file( self ):
+        """Test uploading 5.gff3, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '5.gff3' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '5.gff3', hid=str( hda.hid ) )
+        self.check_history_for_string( '5.gff3 format: <span class="gff3">gff3</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="5.gff3" value="\?"' )
+        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert GFF to BED' )
+        self.check_metadata_for_string( 'Change data type selected value="gff3" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0115_upload_file( self ):
+        """Test uploading html_file.txt, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'html_file.txt' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( 'The uploaded file contains inappropriate content' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0120_upload_file( self ):
+        """Test uploading 5.gff, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '5.gff' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '5.gff', hid=str( hda.hid ) )
+        self.check_history_for_string( '5.gff format: <span class="gff">gff</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="5.gff" value="\?"' )
+        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert GFF to BED' )
+        self.check_metadata_for_string( 'Change data type selected value="gff" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0125_upload_file( self ):
+        """Test uploading 1.fasta, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.fasta' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.fasta', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.fasta format: <span class="fasta">fasta</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.fasta" value="\?" Change data type selected value="fasta" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0130_upload_file( self ):
+        """Test uploading 1.customtrack, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.customtrack' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.customtrack', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.customtrack format: <span class="customtrack">customtrack</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.customtrack" value="\?" Change data type selected value="customtrack" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0135_upload_file( self ):
+        """Test uploading shrimp_cs_test1.csfasta, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'shrimp_cs_test1.csfasta' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( 'shrimp_cs_test1.csfasta', hid=str( hda.hid ) )
+        self.check_history_for_string( '162.6 Kb, format: <span class="csfasta">csfasta</span>, <td>>2_14_26_F3,-1282216.0</td>' )
+        self.check_metadata_for_string( 'value="shrimp_cs_test1.csfasta" value="\?" Change data type value="csfasta" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0140_upload_file( self ):
+        """Test uploading megablast_xml_parser_test1.gz, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( 'megablast_xml_parser_test1.gz' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.check_history_for_string( 'NCBI Blast XML data format: <span class="blastxml">blastxml</span>' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0145_upload_file( self ):
+        """Test uploading 1.axt, NOT setting the file format"""
+        # Logged in as admin_user
+        self.check_history_for_string( 'Your history is empty' )
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
+        self.upload_file( '1.axt' )
+        hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
+                        .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
+                        .first()
+        assert hda is not None, "Problem retrieving hda from database"
+        self.verify_dataset_correctness( '1.axt', hid=str( hda.hid ) )
+        self.check_history_for_string( '1.axt format: <span class="axt">axt</span>, database: \? Info: uploaded file' )
+        self.check_metadata_for_string( 'value="1.axt" value="\?" Change data type selected value="axt" selected="yes"' )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_0150_url_paste( self ):
         """Test url paste behavior"""
         # Logged in as admin_user
         # Deleting the current history should have created a new history
         self.check_history_for_string( 'Your history is empty' )
-        history2 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
+        history = sa_session.query( galaxy.model.History ) \
+                            .filter( and_( galaxy.model.History.table.c.deleted==False,
+                                           galaxy.model.History.table.c.user_id==admin_user.id ) ) \
+                            .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
+                            .first()
         self.upload_url_paste( 'hello world' )
         self.check_history_for_string( 'Pasted Entry' )
         self.check_history_for_string( 'hello world' )
         self.upload_url_paste( u'hello world' )
         self.check_history_for_string( 'Pasted Entry' )
         self.check_history_for_string( 'hello world' )
-        self.delete_history( id=self.security.encode_id( history2.id ) )
-    def test_010_upload_lped_composite_datatype_files( self ):
-        """Test uploading lped composite datatype files"""
-        # Logged in as admin_user
-        self.check_history_for_string( 'Your history is empty' )
-        history3 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
-        # lped data types include a ped_file and a map_file ( which is binary )
-        self.upload_composite_datatype_file( 'lped', ped_file='tinywga.ped', map_file='tinywga.map', base_name='rgenetics' )
-        # Get the latest hid for testing
-        hda1 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda1 is not None, "Problem retrieving hda1 from database"
-        # We'll test against the resulting ped file and map file for correctness
-        self.verify_composite_datatype_file_content( 'rgenetics.ped', str( hda1.id ) )
-        self.verify_composite_datatype_file_content( 'rgenetics.map', str( hda1.id ) )
-        self.delete_history( id=self.security.encode_id( history3.id ) )
-    def test_015_upload_pbed_composite_datatype_files( self ):
-        """Test uploading pbed composite datatype files"""
-        # Logged in as admin_user
-        self.check_history_for_string( 'Your history is empty' )
-        history4 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
-        # pbed data types include a bim_file, a bed_file and a fam_file
-        self.upload_composite_datatype_file( 'pbed', bim_file='tinywga.bim', bed_file='tinywga.bed', fam_file='tinywga.fam', base_name='rgenetics' )
-        # Get the latest hid for testing
-        hda1 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda1 is not None, "Problem retrieving hda1 from database"
-        # We'll test against the resulting ped file and map file for correctness
-        self.verify_composite_datatype_file_content( 'rgenetics.bim', str( hda1.id ) )
-        self.verify_composite_datatype_file_content( 'rgenetics.bed', str( hda1.id ) )
-        self.verify_composite_datatype_file_content( 'rgenetics.fam', str( hda1.id ) )
-        self.delete_history( id=self.security.encode_id( history4.id ) )
-    def test_020_upload_multibyte_character_file( self ):
-        """Test uploading multi-byte character file"""
-        # Logged in as admin_user
-        self.check_history_for_string( 'Your history is empty' )
-        history5 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
-        self.upload_file( 'asian_chars_1.txt' )
-        hda1 = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                         .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                         .first()
-        assert hda1 is not None, "Problem retrieving hda1 from database"
-        self.verify_dataset_correctness( 'asian_chars_1.txt', hid=str( hda1.hid ) )
-        self.check_history_for_string( 'uploaded multi-byte char file' )
-        self.delete_history( id=self.security.encode_id( history5.id ) )
+        self.delete_history( id=self.security.encode_id( history.id ) )
+    def test_9999_clean_up( self ):
+        self.logout()
\ No newline at end of file
diff -r 4f9b630f9976 -r 23c1b9799bce test/functional/test_sniffing_and_metadata_settings.py
--- a/test/functional/test_sniffing_and_metadata_settings.py	Fri Nov 13 10:39:15 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,262 +0,0 @@
-import galaxy.model
-from galaxy.model.orm import *
-from galaxy.model.mapping import context as sa_session
-from base.twilltestcase import TwillTestCase
-
-class SniffingAndMetaDataSettings( TwillTestCase ):
-    def test_000_axt_datatype( self ):
-        """Testing correctly sniffing axt data type upon upload"""
-        self.logout()
-        self.login( email='test@bx.psu.edu' )
-        global admin_user
-        admin_user = sa_session.query( galaxy.model.User ).filter( galaxy.model.User.table.c.email=='test@bx.psu.edu' ).one()
-        self.new_history( name='history1' )
-        global history1
-        history1 = sa_session.query( galaxy.model.History ) \
-                             .filter( and_( galaxy.model.History.table.c.deleted==False,
-                                            galaxy.model.History.table.c.user_id==admin_user.id ) ) \
-                             .order_by( desc( galaxy.model.History.table.c.create_time ) ) \
-                             .first()
-        assert history1 is not None, "Problem retrieving history1 from database"
-        self.upload_file( '1.axt' )
-        self.verify_dataset_correctness( '1.axt' )
-        self.check_history_for_string( '1.axt format: <span class="axt">axt</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.axt" value="\?" Change data type selected value="axt" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving axt hda from the database"
-        if not latest_hda.name == '1.axt' and not latest_hda.extension == 'axt':
-            raise AssertionError, "axt data type was not correctly sniffed."
-    def test_005_bed_datatype( self ):
-        """Testing correctly sniffing bed data type upon upload"""
-        self.upload_file( '1.bed' )
-        self.verify_dataset_correctness( '1.bed' )
-        self.check_history_for_string( '1.bed format: <span class="bed">bed</span>, database: \? Info: uploaded file')
-        self.check_metadata_for_string( 'value="1.bed" value="\?"' )
-        self.check_metadata_for_string( 'Chrom column: <option value="1" selected> Start column: <option value="2" selected>' )
-        self.check_metadata_for_string( 'End column: <option value="3" selected> Strand column <option value="6" selected>' )
-        self.check_metadata_for_string( 'Convert to new format value="bed">Convert Genomic Intervals To BED <option value="gff">Convert BED to GFF' )
-        self.check_metadata_for_string( 'Change data type selected value="bed" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving bed hda from the database"
-        if not latest_hda.name == '1.bed' and not latest_hda.extension == 'bed':
-            raise AssertionError, "bed data type was not correctly sniffed."
-    def test_010_blastxml_datatype( self ):
-        """Testing correctly sniffing blastxml data type upon upload"""
-        self.upload_file( 'megablast_xml_parser_test1.gz' )
-        self.check_history_for_string( 'NCBI Blast XML data format: <span class="blastxml">blastxml</span>' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving blastxml hda from the database"
-        if not latest_hda.name == 'megablast_xml_parser_test1' and not latest_hda.extension == 'blastxml':
-            raise AssertionError, "blastxml data type was not correctly sniffed."
-    def test_015_csfasta_datatype( self ):
-        """Testing correctly sniffing csfasta data type upon upload"""
-        self.upload_file( 'shrimp_cs_test1.csfasta' )
-        self.verify_dataset_correctness( 'shrimp_cs_test1.csfasta' )
-        self.check_history_for_string( '162.6 Kb, format: <span class="csfasta">csfasta</span>, <td>>2_14_26_F3,-1282216.0</td>' )
-        self.check_metadata_for_string( 'value="shrimp_cs_test1.csfasta" value="\?" Change data type value="csfasta" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving csfasta hda from the database"
-        if not latest_hda.name == 'shrimp_cs_test1.csfasta' and not latest_hda.extension == 'csfasta':
-            raise AssertionError, "csfasta data type was not correctly sniffed."
-    def test_020_customtrack_datatype( self ):
-        """Testing correctly sniffing customtrack data type upon upload"""
-        self.upload_file( '1.customtrack' )
-        self.verify_dataset_correctness( '1.customtrack' )
-        self.check_history_for_string( '1.customtrack format: <span class="customtrack">customtrack</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.customtrack" value="\?" Change data type selected value="customtrack" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving customtrack hda from the database"
-        if not latest_hda.name == '1.customtrack' and not latest_hda.extension == 'customtrack':
-            raise AssertionError, "customtrack data type was not correctly sniffed."
-    def test_025_fasta_datatype( self ):
-        """Testing correctly sniffing fasta data type upon upload"""
-        self.upload_file( '1.fasta' )
-        self.verify_dataset_correctness( '1.fasta' )
-        self.check_history_for_string( '1.fasta format: <span class="fasta">fasta</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.fasta" value="\?" Change data type selected value="fasta" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving fasta hda from the database"
-        if not latest_hda.name == '1.fasta' and not latest_hda.extension == 'fasta':
-            raise AssertionError, "fasta data type was not correctly sniffed."
-    def test_035_gff_datatype( self ):
-        """Testing correctly sniffing gff data type upon upload"""
-        self.upload_file( '5.gff' )
-        self.verify_dataset_correctness( '5.gff' )
-        self.check_history_for_string( '5.gff format: <span class="gff">gff</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="5.gff" value="\?"' )
-        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert GFF to BED' )
-        self.check_metadata_for_string( 'Change data type selected value="gff" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving gff hda from the database"
-        if not latest_hda.name == '5.gff' and not latest_hda.extension == 'gff':
-            raise AssertionError, "gff data type was not correctly sniffed."
-    def test_040_gff3_datatype( self ):
-        """Testing correctly sniffing gff3 data type upon upload"""
-        self.upload_file( '5.gff3' )
-        self.verify_dataset_correctness( '5.gff3' )
-        self.check_history_for_string( '5.gff3 format: <span class="gff3">gff3</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="5.gff3" value="\?"' )
-        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert GFF to BED' )
-        self.check_metadata_for_string( 'Change data type selected value="gff3" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving gff3 hda from the database"
-        if not latest_hda.name == '5.gff3' and not latest_hda.extension == 'gff3':
-            raise AssertionError, "gff3 data type was not correctly sniffed."
-        # TODO: the following test generates a data.hid == None, breaking this and all following tests
-        # I am not currently able to track down why, and uploading inappropriate files outside of the
-        # functional test framework seems to generate valid hids, so this needs to be tracked down and fixed
-        # ASAP, un-commenting this test.
-        #def test_045_html_datatype( self ):
-        #"""Testing correctly sniffing html data type upon upload"""
-        #self.upload_file( 'html_file.txt' )
-        #self.check_history_for_string( 'An error occurred running this job: No data: you attempted to upload an inappropriate file.' )
-        #latest_hda = galaxy.model.HistoryDatasetAssociation.query() \
-        #    .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ).first()
-        #assert latest_hda is not None, "Problem retrieving html hda from the database"
-        #if not latest_hda.name == 'html_file.txt' and not latest_hda.extension == 'data':
-        #    raise AssertionError, "html data type was not correctly sniffed."
-    def test_050_interval_datatype( self ):
-        """Testing correctly sniffing interval data type upon upload"""
-        self.upload_file( '1.interval' )
-        self.verify_dataset_correctness( '1.interval' )
-        self.check_history_for_string( '1.interval format: <span class="interval">interval</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.interval" value="\?"' )
-        self.check_metadata_for_string( 'Chrom column: <option value="1" selected> Start column: <option value="2" selected>' )
-        self.check_metadata_for_string( 'End column: <option value="3" selected> Strand column <option value="6" selected>' )
-        self.check_metadata_for_string( 'Convert to new format <option value="bed">Convert Genomic Intervals To BED' )
-        self.check_metadata_for_string( 'Change data type selected value="interval" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving interval hda from the database"
-        if not latest_hda.name == '1.interval' and not latest_hda.extension == 'interval':
-            raise AssertionError, "interval data type was not correctly sniffed."
-    def test_055_lav_datatype( self ):
-        """Testing correctly sniffing lav data type upon upload"""
-        self.upload_file( '1.lav' )
-        self.verify_dataset_correctness( '1.lav' )
-        self.check_history_for_string( '1.lav format: <span class="lav">lav</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.lav" value="\?"' )
-        self.check_metadata_for_string( 'Change data type selected value="lav" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving lav hda from the database"
-        if not latest_hda.name == '1.lav' and not latest_hda.extension == 'lav':
-            raise AssertionError, "lav data type was not correctly sniffed."
-    def test_060_maf_datatype( self ):
-        """Testing correctly sniffing maf data type upon upload"""
-        self.upload_file( '3.maf' )
-        self.verify_dataset_correctness( '3.maf' )
-        self.check_history_for_string( '3.maf format: <span class="maf">maf</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="3.maf" value="\?"' )
-        self.check_metadata_for_string( 'Convert to new format <option value="interval">Convert MAF to Genomic Intervals <option value="fasta">Convert MAF to Fasta' )
-        self.check_metadata_for_string( 'Change data type selected value="maf" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving maf hda from the database"
-        if not latest_hda.name == '3.maf' and not latest_hda.extension == 'maf':
-            raise AssertionError, "maf data type was not correctly sniffed."
-    def test_065_qual454_datatype( self ):
-        """Testing correctly sniffing qual454 data type upon upload"""
-        self.upload_file( 'qualscores.qual454' )        
-        self.verify_dataset_correctness( 'qualscores.qual454' )
-        self.check_history_for_string( '5.6 Kb, format: <span class="qual454">qual454</span>, database: \?' )
-        self.check_metadata_for_string( 'Change data type value="qual454" selected="yes">qual454' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving qual454 hda from the database"
-        if not latest_hda.name == 'qualscores.qual454' and not latest_hda.extension == 'qual454':
-            raise AssertionError, "qual454 data type was not correctly sniffed."
-    def test_070_qualsolid_datatype( self ):
-        """Testing correctly sniffing qualsolid data type upon upload"""
-        self.upload_file( 'qualscores.qualsolid' )        
-        self.verify_dataset_correctness('qualscores.qualsolid' )
-        self.check_history_for_string('2.5 Kb, format: <span class="qualsolid">qualsolid</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'Change data type value="qualsolid" selected="yes">qualsolid' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving qualsolid hda from the database"
-        if not latest_hda.name == 'qualscores.qualsolid' and not latest_hda.extension == 'qualsolid':
-            raise AssertionError, "qualsolid data type was not correctly sniffed."
-    def test_075_tabular_datatype( self ):
-        """Testing correctly sniffing tabular data type upon upload"""
-        self.upload_file( '1.tabular' )
-        self.verify_dataset_correctness( '1.tabular' )
-        self.check_history_for_string( '1.tabular format: <span class="tabular">tabular</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.tabular" value="\?"' )
-        self.check_metadata_for_string( 'Change data type selected value="tabular" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving tabular hda from the database"
-        if not latest_hda.name == '1.tabular' and not latest_hda.extension == 'tabular':
-            raise AssertionError, "tabular data type was not correctly sniffed."
-    def test_080_wig_datatype( self ):
-        """Testing correctly sniffing wig data type upon upload"""
-        self.upload_file( '1.wig' )
-        self.verify_dataset_correctness( '1.wig' )
-        self.check_history_for_string( '1.wig format: <span class="wig">wig</span>, database: \? Info: uploaded file' )
-        self.check_metadata_for_string( 'value="1.wig" value="\?"' )
-        self.check_metadata_for_string( 'Change data type selected value="wig" selected="yes"' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving wig hda from the database"
-        if not latest_hda.name == '1.wig' and not latest_hda.extension == 'wig':
-            raise AssertionError, "wig data type was not correctly sniffed."
-    def test_090_sam_datatype( self ):
-        """Testing correctly sniffing sam format upon upload"""
-        self.upload_file( '1.sam' )
-        self.verify_dataset_correctness( '1.sam' )
-        self.check_history_for_string( '1.sam format: <span class="sam">sam</span>, database: \? Info: uploaded sam file' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving sam hda from the database"
-        if not latest_hda.name == '1.sam' and not latest_hda.extension == 'sam':
-            raise AssertionError, "sam data type was not correctly sniffed."
-    def test_095_fastq_datatype( self ):
-        """Testing correctly sniffing fastq ( generic ) data type upon upload"""
-        self.upload_file( '2gen.fastq' )
-        self.verify_dataset_correctness( '2gen.fastq' )
-        self.check_history_for_string( '2gen.fastq format: <span class="fastq">fastq</span>, database: \? Info: uploaded fastq file' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving fastq hda from the database"
-        if not latest_hda.name == '2gen.fastq' and not latest_hda.extension == 'fastq':
-            raise AssertionError, "fastq data type was not correctly sniffed."
-    def test_0100_sff_datatype( self ):
-        """Testing correctly sniffing sff format upon upload"""
-        self.upload_file( '1.sff' )
-        self.verify_dataset_correctness( '1.sff' )
-        self.check_history_for_string( 'format: <span class="sff">sff' )
-        latest_hda = sa_session.query( galaxy.model.HistoryDatasetAssociation ) \
-                               .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ) \
-                               .first()
-        assert latest_hda is not None, "Problem retrieving sff hda from the database"
-        if not latest_hda.name == '1.sff' and not latest_hda.extension == 'sff':
-            raise AssertionError, "sff data type was not correctly sniffed."
-    def test_9999_clean_up( self ):
-        self.delete_history( id=self.security.encode_id( history1.id ) )
-        self.logout()
diff -r 4f9b630f9976 -r 23c1b9799bce tools/data_source/upload.py
--- a/tools/data_source/upload.py	Fri Nov 13 10:39:15 2009 -0500
+++ b/tools/data_source/upload.py	Fri Nov 13 14:13:03 2009 -0500
@@ -9,6 +9,7 @@
 # need to import model before sniff to resolve a circular import dependency
 import galaxy.model
 from galaxy.datatypes import sniff
+from galaxy.datatypes.binary import sniffable_binary_formats, unsniffable_binary_formats
 from galaxy import util
 from galaxy.util.json import *
 
@@ -200,25 +201,29 @@
                 ext = dataset.file_type
         if not data_type:
             if check_binary( dataset.path ):
-                if dataset.is_binary is not None:
-                    data_type = 'binary'
-                    ext = dataset.file_type
-                else:
-                    parts = dataset.name.split( "." )
-                    if len( parts ) > 1:
-                        ext = parts[1].strip().lower()
-                        if not( ext == 'ab1' or ext == 'scf' ):
-                            file_err( 'The uploaded file contains inappropriate content', dataset, json_file )
+                data_type = 'binary'
+                binary_ok = False
+                parts = dataset.name.split( "." )
+                if len( parts ) > 1:
+                    ext = parts[1].strip().lower()
+                    if ext in unsniffable_binary_formats and dataset.file_type == ext:
+                        binary_ok = True
+                    elif ext in unsniffable_binary_formats and dataset.file_type != ext:
+                        err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext )
+                        file_err( err_msg, dataset, json_file )
+                        return
+                    if not binary_ok and ext in sniffable_binary_formats:
+                        # Sniff the file to confirm it's data type
+                        tmp_ext = sniff.guess_ext( dataset.path )
+                        if tmp_ext == ext:
+                            binary_ok = True
+                        else:
+                            err_msg = "The content of the file does not match its type (%s)." % ext.capitalize()
+                            file_err( err_msg, dataset, json_file )
                             return
-                        if ext == 'ab1' and dataset.file_type != 'ab1':
-                            file_err( "You must manually set the 'File Format' to 'Ab1' when uploading ab1 files.", dataset, json_file )
-                            return
-                        elif ext == 'scf' and dataset.file_type != 'scf':
-                            file_err( "You must manually set the 'File Format' to 'Scf' when uploading scf files.", dataset, json_file )
-                            return
-                    else:
-                        ext = 'binary'
-                    data_type = 'binary'
+                    if not binary_ok:
+                        file_err( 'The uploaded file contains inappropriate content', dataset, json_file )
+                        return
         if not data_type:
             # We must have a text file
             if check_html( dataset.path ):
@@ -234,10 +239,6 @@
             else:
                 ext = dataset.file_type
             data_type = ext
-        elif data_type == 'binary' and ext == 'auto':
-            # currently we are only sniffing sff binary files
-            ext = sniff.guess_ext( dataset.path )
-            data_type = ext
     # Save job info for the framework
     if ext == 'auto' and dataset.ext:
         ext = dataset.ext

    

Greg Von Kuster

tags

participants (1)