details: http://www.bx.psu.edu/hg/galaxy/rev/670f8800a2bf changeset: 2568:670f8800a2bf user: Kelly Vincent <kpvincent@bx.psu.edu> date: Wed Aug 19 09:13:21 2009 -0400 description: Added two new datatypes (sam and bam) and an associated test 7 file(s) affected in this change: datatypes_conf.xml.sample lib/galaxy/datatypes/images.py lib/galaxy/datatypes/registry.py lib/galaxy/datatypes/tabular.py lib/galaxy/datatypes/test/1.sam test-data/1.sam test/functional/test_sniffing_and_metadata_settings.py diffs (362 lines): diff -r b56291fad13d -r 670f8800a2bf datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Mon Aug 17 09:54:57 2009 -0400 +++ b/datatypes_conf.xml.sample Wed Aug 19 09:13:21 2009 -0400 @@ -3,6 +3,7 @@ <registration converters_path="lib/galaxy/datatypes/converters"> <datatype extension="ab1" type="galaxy.datatypes.images:Ab1" mimetype="application/octet-stream" display_in_upload="true"/> <datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true"/> + <datatype extension="bam" type="galaxy.datatypes.images:Bam" mimetype="application/octet-stream"/> <datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true"> <converter file="bed_to_gff_converter.xml" target_datatype="gff"/> <converter file="interval_to_coverage.xml" target_datatype="coverage"/> @@ -49,6 +50,7 @@ <datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/> <datatype extension="qualsolid" type="galaxy.datatypes.qualityscore:QualityScoreSOLiD" display_in_upload="true"/> <datatype extension="qual454" type="galaxy.datatypes.qualityscore:QualityScore454" display_in_upload="true"/> + <datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"/> <datatype extension="scf" type="galaxy.datatypes.images:Scf" mimetype="application/octet-stream" display_in_upload="true"/> <datatype extension="taxonomy" type="galaxy.datatypes.tabular:Taxonomy" display_in_upload="true"/> <datatype extension="tabular" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true"/> @@ -205,5 +207,6 @@ <sniffer type="galaxy.datatypes.interval:Gff"/> <sniffer type="galaxy.datatypes.interval:Gff3"/> <sniffer type="galaxy.datatypes.interval:Interval"/> + <sniffer type="galaxy.datatypes.tabular:Sam"/> </sniffers> </datatypes> diff -r b56291fad13d -r 670f8800a2bf lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py Mon Aug 17 09:54:57 2009 -0400 +++ b/lib/galaxy/datatypes/images.py Wed Aug 19 09:13:21 2009 -0400 @@ -4,6 +4,8 @@ import data import logging +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * from urllib import urlencode, quote_plus import zipfile @@ -187,7 +189,7 @@ return 'text/html' def sniff( self, filename ): """ - Determines wether the file is in html format + Determines whether the file is in html format >>> fname = get_test_fname( 'complete.bed' ) >>> Html().sniff( fname ) @@ -233,3 +235,25 @@ return dataset.peek except: return "peek unavailable" + +class Bam( data.Binary ): + """Class describing a BAM binary file""" + file_ext = "bam" + MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True ) + def set_peek( self, dataset ): + if not dataset.dataset.purged: + export_url = "/history_add_to?" + urlencode({'history_id':dataset.history_id,'ext':'bam','name':'bam alignments','info':'Alignments file','dbkey':dataset.dbkey}) + dataset.peek = "Binary bam alignments file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek(self, dataset): + try: + return dataset.peek + except: + return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) ) + def get_mime(self): + """Returns the mime type of the datatype""" + return 'application/octet-stream' + \ No newline at end of file diff -r b56291fad13d -r 670f8800a2bf lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py Mon Aug 17 09:54:57 2009 -0400 +++ b/lib/galaxy/datatypes/registry.py Wed Aug 19 09:13:21 2009 -0400 @@ -111,6 +111,7 @@ self.datatypes_by_extension = { 'ab1' : images.Ab1(), 'axt' : sequence.Axt(), + 'bam' : images.Bam(), 'bed' : interval.Bed(), 'binseq.zip' : images.Binseq(), 'blastxml' : xml.BlastXml(), @@ -130,6 +131,7 @@ 'qualsolid' : qualityscore.QualityScoreSOLiD(), 'qualsolexa' : qualityscore.QualityScoreSolexa(), 'qual454' : qualityscore.QualityScore454(), + 'sam' : tabular.Sam(), 'scf' : images.Scf(), 'tabular' : tabular.Tabular(), 'taxonomy' : tabular.Taxonomy(), @@ -140,6 +142,7 @@ self.mimetypes_by_extension = { 'ab1' : 'application/octet-stream', 'axt' : 'text/plain', + 'bam' : 'application/octet-stream', 'bed' : 'text/plain', 'binseq.zip' : 'application/zip', 'blastxml' : 'text/plain', @@ -157,6 +160,7 @@ 'qualsolid' : 'text/plain', 'qualsolexa' : 'text/plain', 'qual454' : 'text/plain', + 'sam' : 'text/plain', 'scf' : 'application/octet-stream', 'tabular' : 'text/plain', 'taxonomy' : 'text/plain', @@ -184,7 +188,8 @@ interval.CustomTrack(), interval.Gff(), interval.Gff3(), - interval.Interval() + interval.Interval(), + tabular.Sam() ] def append_to_sniff_order(): # Just in case any supported data types are not included in the config's sniff_order section. diff -r b56291fad13d -r 670f8800a2bf lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py Mon Aug 17 09:54:57 2009 -0400 +++ b/lib/galaxy/datatypes/tabular.py Wed Aug 19 09:13:21 2009 -0400 @@ -236,3 +236,84 @@ out = "Can't create peek %s" % exc return out +class Sam( Tabular ): + file_ext = 'sam' + def __init__(self, **kwd): + """Initialize taxonomy datatype""" + Tabular.__init__( self, **kwd ) + self.column_names = ['QNAME', 'FLAG', 'RNAME', 'POS', 'MAPQ', 'CIGAR', + 'MRNM', 'MPOS', 'ISIZE', 'SEQ', 'QUAL', 'OPT' + ] + def make_html_table( self, dataset, skipchars=[] ): + """Create HTML table, used for displaying peek""" + out = ['<table cellspacing="0" cellpadding="3">'] + try: + # Generate column header + out.append( '<tr>' ) + for i, name in enumerate( self.column_names ): + out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) + # This data type requires at least 11 columns in the data + if dataset.metadata.columns - len( self.column_names ) > 0: + for i in range( len( self.column_names ), dataset.metadata.columns ): + out.append( '<th>%s</th>' % str( i+1 ) ) + out.append( '</tr>' ) + out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) + out.append( '</table>' ) + out = "".join( out ) + except Exception, exc: + out = "Can't create peek %s" % exc + return out + def sniff( self, filename ): + """ + Determines whether the file is in SAM format + + A file in SAM format consists of lines of tab-separated data. + The following header line may be the first line: + @QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL + or + @QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL OPT + Data in the OPT column is optional and can consist of tab-separated data + + For complete details see http://samtools.sourceforge.net/SAM1.pdf + + Rules for sniffing as True: + There must be 11 or more columns of data on each line + Columns 2 (FLAG), 4(POS), 5 (MAPQ), 8 (MPOS), and 9 (ISIZE) must be numbers (9 can be negative) + We will only check that up to the first 5 alignments are correctly formatted. + + >>> fname = get_test_fname( 'sequence.maf' ) + >>> Sam().sniff( fname ) + False + >>> fname = get_test_fname( '1.sam' ) + >>> Sam().sniff( fname ) + True + """ + try: + fh = open( filename ) + count = 0 + while True: + line = fh.readline() + line = line.strip() + if not line: + break #EOF + if line: + if line[0] != '@': + linePieces = line.split('\t') + if len(linePieces) < 11: + return False + try: + check = int(linePieces[1]) + check = int(linePieces[3]) + check = int(linePieces[4]) + check = int(linePieces[7]) + check = int(linePieces[8]) + except ValueError: + return False + count += 1 + if count == 5: + return True + if count < 5 and count > 0: + return True + except: + pass + return False diff -r b56291fad13d -r 670f8800a2bf lib/galaxy/datatypes/test/1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/test/1.sam Wed Aug 19 09:13:21 2009 -0400 @@ -0,0 +1,97 @@ +@QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL OPT +1378_11_329 69 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(# +1378_11_329 133 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$####$'$#$###$ +1378_17_1788 69 * 0 0 * * 0 0 TGCCGTGTCTTGCTAACGCCGATT #'#$$#$###%%##$$$$###### +1378_17_1788 133 * 0 0 * * 0 0 TGGGTGGATGTGTTGTCGTTCATG #$#$###$#$#######$#$#### +1378_25_2035 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'# +1378_25_2035 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&% +1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210!!44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_28_770 181 chr11.nib:1-134452384 72131356 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$### +1378_33_1945 113 chr2.nib:1-242951149 181247988 0 23M chr12.nib:1-132349534 41710908 0 GAGAGAGAGAGAGAGAGAGAGAG PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_33_1945 177 chr12.nib:1-132349534 41710908 0 23M chr2.nib:1-242951149 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_34_789 69 * 0 0 * * 0 0 ATGGTGGCTGACGCGTTTGACTGT #$##%#$##$&$#%##$##$###$ +1378_34_789 133 * 0 0 * * 0 0 GGGCTTGCGTTAGTGAGAGGTTGT ###%$%$%%###$####$###$#& +1378_35_263 115 chr16.nib:1-88827254 19671878 0 23M = 19671877 -1 AGAGAGAGAGAGAGAGAGAGTCT 77543:<55#"4!&=964518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_35_263 179 chr16.nib:1-88827254 19671877 0 23M = 19671878 1 GAGAGAGAGAGAGAGAGAGAGTC LE7402DD34FL:27AKE>;432 XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:265 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_43_186 69 * 0 0 * * 0 0 ATACTAGTTGGGACGCGTTGTGCT #$(4%$########$#$###$$$# +1378_43_186 133 * 0 0 * * 0 0 GCTAGGGTTTGGGTTTGCGGTGGG $%#$########%##%#$###'#' +1378_51_1671 117 chr2.nib:1-242951149 190342418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$## +1378_51_1671 153 chr2.nib:1-242951149 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_56_324 117 chr2.nib:1-242951149 80324999 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/ +1378_56_324 153 chr2.nib:1-242951149 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_56_773 69 * 0 0 * * 0 0 TGTCGTGAGGTCACTTATCCCCAT &%#%##%%#####$%##$%##$ +1378_56_773 133 * 0 0 * * 0 0 TCTGGTCGGTTTCGGGGAGTGGAA ##%%#&$###$#$##%$####%%$ +1378_62_2027 69 * 0 0 * * 0 0 CTTCCACGATCTGCTCGCTGTGGT (#&&$##$$#$%#%$$$#$###'# +1378_62_2027 133 * 0 0 * * 0 0 GTTGGCCTGGCCTGCCGTGCTGCG *##),/%##$)#%##1$#'%.# +1378_62_2029 69 * 0 0 * * 0 0 TCTGGGCTGTCTTCGGGTCGGTGT $%$$####$##$$#)##%%#$### +1378_62_2029 133 * 0 0 * * 0 0 GGCGGTGTGTGGTGCGGCTGTGCG /$$$=(####%####)$$%$-&%# +1378_67_1795 81 chr16.nib:1-88827254 26739130 0 23M chrY.nib:1-57772954 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_67_1795 161 chrY.nib:1-57772954 57401793 37 23M chr16.nib:1-88827254 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_68_466 69 * 0 0 * * 0 0 GTGATCGTCGGTGCCAGTCCCTGT #(%)+##$#$#%#+$%##$##### +1378_68_466 133 * 0 0 * * 0 0 GTGTCATCTGAGGTAAAGCATTGT /##$09#$#.=$#$76+$%1'### +1378_68_1692 117 chr13.nib:1-114142980 36365609 0 24M = 36365609 0 TTGAACCGGGCACGGGTCTTCTGG #$#######%###$##%&'%)### +1378_68_1692 153 chr13.nib:1-114142980 36365609 37 10M1D13M = 36365609 0 CTGCACATACAGAATATTCATAG 0010/!"0/!!021/132231// XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:10^T13 +1378_80_664 69 * 0 0 * * 0 0 CTGCTTTGATCCCCGGTGGAGCAC 7#%###$$6#######$##$$$## +1378_80_664 133 * 0 0 * * 0 0 TGTCTGCGTTGTATCTCTGGTGTA %##%,%$$#&$$###$#$%##'%# +1378_85_1786 69 * 0 0 * * 0 0 ATACTATGTCGATCTGTAAAAAAA )&.)#3%@$&%-,2#&+.-%0&./ +1378_85_1786 133 * 0 0 * * 0 0 CCCTAGGAGCGTATACCGGACGAG ,'&/%/@,&1,&'/)&,6&&1)(( +1378_86_1011 69 * 0 0 * * 0 0 CTACGTTATTGCTCTGTTTGTCCT ######$%##$$$%###%#$#### +1378_86_1011 133 * 0 0 * * 0 0 AGGCGATGGGATATTATTTTACTT :$###)%##$9$###1$$#$2### +1378_86_1789 89 chr12.nib:1-132349534 39007065 37 23M = 39007065 0 GCTTTCCATAGATGTGTAATTTC J2K]]Z5!GN?@U]]]VX]UYYP XT:A:U CM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:23 +1378_86_1789 181 chr12.nib:1-132349534 39007065 0 24M = 39007065 0 ACAACTTAAATAATCATGGACCGG 02,5$$0&6#%?*,$'#%&/15.1 +1378_91_1596 69 * 0 0 * * 0 0 TTAGCGGTTGACTATCTGCTGACA *&+'#9'(%*'#//,&<),/)'*# +1378_91_1596 133 * 0 0 * * 0 0 GCTTTTTCATTCGGTGCCTTTGGA '>%/3%=()8'#.%?50$&5>%)% +1378_94_1595 69 chr7.nib:1-158821424 127518258 0 24M = 127518258 0 CGTGCGACAGCCCATGTTTTCAGA -=..5,3826&*+.+#+#%%6;%# +1378_94_1595 137 chr7.nib:1-158821424 127518258 37 23M = 127518258 0 TGAGATAAACACCTAACATGCTC M]]FN]]\V]]]Q>T]KIG:LVN XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_95_1039 69 * 0 0 * * 0 0 CGGCGTCCATCTTCGCCTTGAGAT $##.#$##$$#%$#$%%$###$)$ +1378_95_1039 133 * 0 0 * * 0 0 GTTCTGTGCCAGGTGAGGTACGGA #,./#$&)6##+,'#$$0(##$ +1378_95_1767 65 chr11.nib:1-134452384 65333552 25 23M chr3.nib:1-199501827 123725482 0 CAACTGGTGGCATCTGGACAAAC W[[TZYY]]RO<BI7!!:!!>@2 XT:A:U CM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_95_1767 129 chr3.nib:1-199501827 123725482 37 6M1I16M chr11.nib:1-134452384 65333552 0 ATTTATCTGTCTCATTCATTATT <AGB8B"!V]]UO/&JB4DE88E XT:A:U CM:i:2 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_96_1037 69 * 0 0 * * 0 0 ATCCCCCAAGATGCCTGTTGATTG $#$'##$$$#%$$#%###+##$#$ +1378_96_1037 133 * 0 0 * * 0 0 CTGCTGGGCCATTTGACTTACTCA '$#+#(##-%5##+*##-.$$$ +1378_96_1764 81 chr15.nib:1-100338915 89251272 25 23M chr7.nib:1-158821424 19412615 0 AGAAATGGTCGCACCCTCTGGTT E*2ZEHX\SN]O>SYRL):LIOL XT:A:U CM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_96_1764 161 chr7.nib:1-158821424 19412615 37 23M chr15.nib:1-100338915 89251272 0 GTATAGCCCACAACGCCTAATAT ZMBS]UW]UYR\]QPZ[SMYL7C XT:A:U CM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_98_1574 69 * 0 0 * * 0 0 GTTCTGCCGGTGTCTGTGGCGGGC $$#+&$$####%$$$###$%#%%# +1378_98_1574 133 * 0 0 * * 0 0 AGGCGAGTGTGGGGGTTGTTTGAG +%%$#)##%##$####%###$%$# +1378_107_1647 69 * 0 0 * * 0 0 AGGCCTACTACGCGTCATTGATAG $$#$(.#%#$$####&$%##($ +1378_107_1647 133 * 0 0 * * 0 0 GGTCTGGTTCTATGTTGGTCGACT ###'$$#$$$(#%###(#$##$%# +1378_111_829 69 chr9.nib:1-140273252 82506894 0 24M = 82506894 0 TGCGGCACTTGCTTCTTCGTATTT %#%##%#$%#$#%###$$##$$ +1378_111_829 137 chr9.nib:1-140273252 82506894 37 4M1I18M = 82506894 0 GATGCGTAATCTAGTAAAATAAG 0/362//00/5516500210451 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_111_1900 69 * 0 0 * * 0 0 TCCCCTCGCTCGGCTCTGTGCTGT $&%*$#(#)##$#'##%(##$#$% +1378_111_1900 133 * 0 0 * * 0 0 GCACGCCTTTGGGCTAAGCCGTAA )$)'#%$########$'#&%$#(# +1378_112_1483 69 * 0 0 * * 0 0 TGTCCAGCTATGCGGCTTCCTCCT %#$+#%#$#####%####%$## +1378_112_1483 133 * 0 0 * * 0 0 TGGAGTGGTGTGTTTGCTGAGCCA #$#)#############$#%#%'% +1378_125_1287 69 * 0 0 * * 0 0 TGTCTCTGGGGGGCCTGGTTAGGT $##13$'%#$###$$###$$$# +1378_125_1287 133 * 0 0 * * 0 0 TGACGTGGGTTGTCCCGTGAGATT ##$%%#$###$##$$#&%##$(%% +1378_126_468 117 chr11.nib:1-134452384 72541052 0 24M = 72541052 0 TGCCTCTATACAGATTAGTCCTCT )7,7..?97594@8=,=?813@>7 +1378_126_468 153 chr11.nib:1-134452384 72541052 0 23M = 72541052 0 AGGCAAGACTCTGTCTCAAAAAA PK5G]]PDT\]SEXY[]]]]]]] XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:4 X1:i:15713 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_127_664 69 * 0 0 * * 0 0 AGAGGTTGGTGTCTTGTCGCAGCT ##'#$######$$%######$$$# +1378_127_664 133 * 0 0 * * 0 0 TCGCTTTGCCTATGTTTGTTCGGA #%$%##$%#%%###$$###)-' +1378_129_463 97 chr8.nib:1-146274826 29931771 37 23M chr19.nib:1-63811651 5702213 0 GTAGCTCTGTTTCACATTAGGGG J>AQ[G>C?NM:GD=)*PLORIF XT:A:U CM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:23 +1378_129_463 145 chr19.nib:1-63811651 5702213 0 23M chr8.nib:1-146274826 29931771 0 AAAAAAAAAAAAAAAAAAAAAAA JOI:AHGD==@KQB78HF>KA8> XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:583698 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_129_875 69 * 0 0 * * 0 0 TTTCTATGGCTTACGCTGTCTGCC #$($##%####%$#$#####$### +1378_129_875 133 * 0 0 * * 0 0 GACCTTTACGTATTGGGGGTTGGC ###)###+###$##$#&%##$,#$ +1378_140_1251 69 * 0 0 * * 0 0 ATCCTAGCGCGGTGTCTTGGGGAC #$%1#$$$##$##$#$#$##$%$$ +1378_140_1251 133 * 0 0 * * 0 0 TTTCCTTCGTGTGCGTGCGGAGTG #%#%$##$$$######.$$$%#%( +1378_141_809 69 * 0 0 * * 0 0 TGTCCTCCAGTGTCTGTTGGGTGT %&,-##$$#(%###$#$$'###'# +1378_141_809 133 * 0 0 * * 0 0 TCTCGTGGTTTCTTTTTTATGTGT ##%)##$$#####%$#$#%%#'## +1378_144_983 69 * 0 0 * * 0 0 AGCGCCCGGTTGGTGCGGCTCGTC -$(&%*$#*#))#$$$#%%$#$## +1378_144_983 133 * 0 0 * * 0 0 GTTCGTTCGTGGTGTACGAGGGTG #(#%#####($#%##$$#%##%#) +1378_153_270 69 * 0 0 * * 0 0 AGTCCTTGTCCCCTGGGTTTTCCC +''$#&%$%#$##&$$($#$$# +1378_153_270 133 * 0 0 * * 0 0 GGCCGTGTGCGGGTGTAGATTGGA %$##($#######$&$$$$%## +1378_155_1689 65 chrX.nib:1-154913754 106941539 37 23M = 106940385 -1154 ATCTCCTCTTCCTTCCATTCCAC \]]]Y]]]]]UV]]]ZYZZ]]RV XT:A:U CM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_155_1689 129 chrX.nib:1-154913754 106940385 37 23M = 106941539 1154 GACTATGAGGTTTTCATTCAACA ]]]]\\]]]YW]]]WRZ]]WIOK XT:A:U CM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_157_1580 69 * 0 0 * * 0 0 TGGGCCTCGGTGCCCTTGGTCTGT #%)$##'#$$$####%#$#$## +1378_157_1580 133 * 0 0 * * 0 0 GGGATTGAAGGGATGTATGCTAGG #%$&%#$$'%$%#$##*#%$$$$# +1378_161_317 69 * 0 0 * * 0 0 TTGGCCGGCAACCCCGGTACCTAA 7<,<'@)@>.)2@/')'&(?/-<( +1378_161_317 133 * 0 0 * * 0 0 AATCCATACCCACAAAAGCAGGCC .&%','(@''?7//+&)+2.+)0) +1378_177_735 113 chr2.nib:1-242951149 222173182 25 23M = 222173882 700 TTGTTCAGCGCCGATTGTCAATC KPNICFMS]]]Z]]]]Y]]]]]] XT:A:U CM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:1G21 +1378_177_735 177 chr2.nib:1-242951149 222173882 37 23M = 222173182 -700 AGAATTCCTAACAAAATGTGAAG ES6-]]]]]]]]]]]]]]]]]]] XT:A:U CM:i:1 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:23 +1378_181_1684 69 * 0 0 * * 0 0 CGACTCCCGCATTCACGGTCAAGT &*#,##$#&$*$$#$#$$$#%$## +1378_181_1684 133 * 0 0 * * 0 0 TTTCTGTTGTGGTTTTGTTGGGGT $##'$%'##%##$%$#$$####$* +1378_187_1407 69 * 0 0 * * 0 0 TGGCGTCCACTCGTGGGTCTATCG $#$'%#$%$%&$%#####$#$#%# +1378_187_1407 133 * 0 0 * * 0 0 TTGGGTGAAATCTTGTCGAGTGGA #####$$###$#####%##%%) +1378_203_721 97 chr1.nib:1-247249719 245680524 25 23M chr2.nib:1-242951149 213173999 0 GTAAAATTTGTGGAGATTTAAGT ]VEFFEZ]XPW]TOVINQ,;T!! XT:A:U CM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_203_721 145 chr2.nib:1-242951149 213173999 37 4M1I18M chr1.nib:1-247249719 245680524 0 ACCTAACAAAATTGTTCAATATG F>8AWT<AV]Q9B"+]O@IF=K] XT:A:U CM:i:2 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_206_2039 113 chr4.nib:1-191273063 103793427 0 23M chr18.nib:1-76117153 57165542 0 ACACACACACACACACACACACA NKWZVWZ]]XV[]]]]]]]]]]] XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:1292040 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_206_2039 177 chr18.nib:1-76117153 57165542 0 23M chr4.nib:1-191273063 103793427 0 CACACACACACACACACACACAC NAJ[SPT[]]]W[]]]]]]]]]] XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:1292040 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 diff -r b56291fad13d -r 670f8800a2bf test-data/1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.sam Wed Aug 19 09:13:21 2009 -0400 @@ -0,0 +1,29 @@ +@QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL OPT +1378_11_329 69 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(# +1378_11_329 133 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$####$'$#$###$ +1378_17_1788 69 * 0 0 * * 0 0 TGCCGTGTCTTGCTAACGCCGATT #'#$$#$###%%##$$$$###### +1378_17_1788 133 * 0 0 * * 0 0 TGGGTGGATGTGTTGTCGTTCATG #$#$###$#$#######$#$#### +1378_25_2035 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'# +1378_25_2035 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&% +1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210!!44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_28_770 181 chr11.nib:1-134452384 72131356 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$### +1378_33_1945 113 chr2.nib:1-242951149 181247988 0 23M chr12.nib:1-132349534 41710908 0 GAGAGAGAGAGAGAGAGAGAGAG PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_33_1945 177 chr12.nib:1-132349534 41710908 0 23M chr2.nib:1-242951149 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_34_789 69 * 0 0 * * 0 0 ATGGTGGCTGACGCGTTTGACTGT #$##%#$##$&$#%##$##$###$ +1378_34_789 133 * 0 0 * * 0 0 GGGCTTGCGTTAGTGAGAGGTTGT ###%$%$%%###$####$###$#& +1378_35_263 115 chr16.nib:1-88827254 19671878 0 23M = 19671877 -1 AGAGAGAGAGAGAGAGAGAGTCT 77543:<55#"4!&=964518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_35_263 179 chr16.nib:1-88827254 19671877 0 23M = 19671878 1 GAGAGAGAGAGAGAGAGAGAGTC LE7402DD34FL:27AKE>;432 XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:265 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 +1378_43_186 69 * 0 0 * * 0 0 ATACTAGTTGGGACGCGTTGTGCT #$(4%$########$#$###$$$# +1378_43_186 133 * 0 0 * * 0 0 GCTAGGGTTTGGGTTTGCGGTGGG $%#$########%##%#$###'#' +1378_51_1671 117 chr2.nib:1-242951149 190342418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$## +1378_51_1671 153 chr2.nib:1-242951149 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_56_324 117 chr2.nib:1-242951149 80324999 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/ +1378_56_324 153 chr2.nib:1-242951149 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22 +1378_56_773 69 * 0 0 * * 0 0 TGTCGTGAGGTCACTTATCCCCAT &%#%##%%#####$%##$%##$ +1378_56_773 133 * 0 0 * * 0 0 TCTGGTCGGTTTCGGGGAGTGGAA ##%%#&$###$#$##%$####%%$ +1378_62_2027 69 * 0 0 * * 0 0 CTTCCACGATCTGCTCGCTGTGGT (#&&$##$$#$%#%$$$#$###'# +1378_62_2027 133 * 0 0 * * 0 0 GTTGGCCTGGCCTGCCGTGCTGCG *##),/%##$)#%##1$#'%.# +1378_62_2029 69 * 0 0 * * 0 0 TCTGGGCTGTCTTCGGGTCGGTGT $%$$####$##$$#)##%%#$### +1378_62_2029 133 * 0 0 * * 0 0 GGCGGTGTGTGGTGCGGCTGTGCG /$$$=(####%####)$$%$-&%# +1378_67_1795 81 chr16.nib:1-88827254 26739130 0 23M chrY.nib:1-57772954 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23 +1378_67_1795 161 chrY.nib:1-57772954 57401793 37 23M chr16.nib:1-88827254 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23 \ No newline at end of file diff -r b56291fad13d -r 670f8800a2bf test/functional/test_sniffing_and_metadata_settings.py --- a/test/functional/test_sniffing_and_metadata_settings.py Mon Aug 17 09:54:57 2009 -0400 +++ b/test/functional/test_sniffing_and_metadata_settings.py Wed Aug 19 09:13:21 2009 -0400 @@ -226,6 +226,16 @@ assert latest_hda is not None, "Problem retrieving fastqsanger hda from the database" if not latest_hda.name == '1.fastqsanger' and not latest_hda.extension == 'fastqsanger': raise AssertionError, "fastqsanger data type was not correctly sniffed." + def test_090_sam_datatype( self ): + """Testing correctly sniffing sam format upon upload""" + self.upload_file( '1.sam' ) + self.verify_dataset_correctness( '1.sam' ) + self.check_history_for_string( '1.sam format: <span class="sam">sam</span>, database: \? Info: uploaded sam file' ) + latest_hda = galaxy.model.HistoryDatasetAssociation.query() \ + .order_by( desc( galaxy.model.HistoryDatasetAssociation.table.c.create_time ) ).first() + assert latest_hda is not None, "Problem retrieving sam hda from the database" + if not latest_hda.name == '1.sam' and not latest_hda.extension == 'sam': + raise AssertionError, "sam data type was not correctly sniffed." def test_9999_clean_up( self ): self.delete_history( id=self.security.encode_id( history1.id ) ) self.logout()