
details: http://www.bx.psu.edu/hg/galaxy/rev/a105810a5da8 changeset: 2848:a105810a5da8 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Thu Oct 08 10:45:31 2009 -0400 description: Removed sniff method from fastqsanger datatype 4 file(s) affected in this change: datatypes_conf.xml.sample lib/galaxy/datatypes/registry.py lib/galaxy/datatypes/sequence.py test/functional/test_sniffing_and_metadata_settings.py diffs (113 lines): diff -r 39ccea195b8e -r a105810a5da8 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Thu Oct 08 10:37:26 2009 -0400 +++ b/datatypes_conf.xml.sample Thu Oct 08 10:45:31 2009 -0400 @@ -23,7 +23,7 @@ <converter file="fasta_to_tabular_converter.xml" target_datatype="tabular"/> </datatype> <datatype extension="fastq" type="galaxy.datatypes.sequence:Fastq" display_in_upload="true"/> - <datatype extension="fastqsanger" type="galaxy.datatypes.sequence:FastqSanger" display_in_upload="true"/> + <datatype extension="fastqsanger" type="galaxy.datatypes.sequence:FastqSanger"/> <datatype extension="genetrack" type="galaxy.datatypes.tracks:GeneTrack"/> <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"> <converter file="gff_to_bed_converter.xml" target_datatype="bed"/> @@ -197,7 +197,6 @@ <sniffer type="galaxy.datatypes.qualityscore:QualityScoreSOLiD"/> <sniffer type="galaxy.datatypes.qualityscore:QualityScore454"/> <sniffer type="galaxy.datatypes.sequence:Fasta"/> - <sniffer type="galaxy.datatypes.sequence:FastqSanger"/> <sniffer type="galaxy.datatypes.sequence:Fastq"/> <sniffer type="galaxy.datatypes.interval:Wiggle"/> <sniffer type="galaxy.datatypes.images:Html"/> diff -r 39ccea195b8e -r a105810a5da8 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Thu Oct 08 10:37:26 2009 -0400 +++ b/lib/galaxy/datatypes/registry.py Thu Oct 08 10:45:31 2009 -0400 @@ -179,7 +179,6 @@ qualityscore.QualityScoreSOLiD(), qualityscore.QualityScore454(), sequence.Fasta(), - sequence.FastqSanger(), sequence.Fastq(), interval.Wiggle(), images.Html(), diff -r 39ccea195b8e -r a105810a5da8 lib/galaxy/datatypes/sequence.py --- a/lib/galaxy/datatypes/sequence.py Thu Oct 08 10:37:26 2009 -0400 +++ b/lib/galaxy/datatypes/sequence.py Thu Oct 08 10:45:31 2009 -0400 @@ -174,58 +174,10 @@ except: return False - class FastqSanger( Fastq ): """Class representing a FASTQ sequence ( the Sanger variant )""" file_ext = "fastqsanger" - def sniff( self, filename ): - """ - Determines whether the file is in fastqsanger format (Sanger Variant) - For details, see http://maq.sourceforge.net/fastq.shtml - - Note: There are three kinds of FASTQ files, known as "Sanger" (sometimes called "Standard"), Solexa, and Illumina - These differ in the representation of the quality scores - - >>> fname = get_test_fname( '1.fastqsanger' ) - >>> FastqSanger().sniff( fname ) - True - >>> fname = get_test_fname( '2.fastqsanger' ) - >>> FastqSanger().sniff( fname ) - True - """ - headers = get_headers( filename, None ) - bases_regexp = re.compile( "^[NGTAC]*$" ) - try: - if len( headers ) >= 4 and headers[0][0] and headers[0][0][0] == "@" and headers[2][0] and headers[2][0][0] == "+" and headers[1][0]: - # look through first 20 blocks and make sure bases valid and qualities valid - for i in range( 1, 80, 4 ): - try: - # check that bases are legitimate - if not bases_regexp.match( headers[i][0] ): - return False - # check length of qualities (matching bases) - if len( headers[i+2][0] ) != len( headers[1][0] ): - return False - # check qualities within fastqsanger range - if not self.check_qual_values_within_range( headers[i+2][0] ): - return False - except IndexError: - pass - return True - return False - except: - return False - def check_qual_values_within_range( self, qual_seq ): - under59 = False - for val in qual_seq: - if ord(val) < 33 or ord(val) > 126: - return False - if not under59 and ord(val) < 59: - under59 = True - if under59: - return True - return False try: from galaxy import eggs diff -r 39ccea195b8e -r a105810a5da8 test/functional/test_sniffing_and_metadata_settings.py --- a/test/functional/test_sniffing_and_metadata_settings.py Thu Oct 08 10:37:26 2009 -0400 +++ b/test/functional/test_sniffing_and_metadata_settings.py Thu Oct 08 10:45:31 2009 -0400 @@ -206,16 +206,6 @@ assert latest_hda is not None, "Problem retrieving wig hda from the database" if not latest_hda.name == '1.wig' and not latest_hda.extension == 'wig': raise AssertionError, "wig data type was not correctly sniffed." - def test_085_fastqsanger_datatype( self ): - """Testing correctly sniffing fastqsanger ( the Sanger variant ) data type upon upload""" - self.upload_file( '1.fastqsanger' ) - self.verify_dataset_correctness( '1.fastqsanger' ) - self.check_history_for_string( '1.fastqsanger format: <span class="fastqsanger">fastqsanger</span>, database: \? Info: uploaded fastqsanger file' ) - latest_hda = galaxy.model.HistoryDatasetAssociation.query() \ - .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ).first() - assert latest_hda is not None, "Problem retrieving fastqsanger hda from the database" - if not latest_hda.name == '1.fastqsanger' and not latest_hda.extension == 'fastqsanger': - raise AssertionError, "fastqsanger data type was not correctly sniffed." def test_090_sam_datatype( self ): """Testing correctly sniffing sam format upon upload""" self.upload_file( '1.sam' )