4 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/2eb6dddb3866/ changeset: 2eb6dddb3866 user: fangly date: 2011-10-05 10:04:58 summary: Paired-end code that properly ignores description part of FASTQ headers affected #: 1 file diff -r 087a766b3eca312d49caffa6b821d304658825ae -r 2eb6dddb3866adef30b72e92e747d9ece4e11da9 lib/galaxy_utils/sequence/fastq.py --- a/lib/galaxy_utils/sequence/fastq.py +++ b/lib/galaxy_utils/sequence/fastq.py @@ -514,9 +514,13 @@ self.apply_galaxy_conventions = apply_galaxy_conventions def close( self ): return self.file.close() - def get( self, sequence_id ): - if not isinstance( sequence_id, basestring ): - sequence_id = sequence_id.identifier + def get( self, sequence_identifier ): + # Input is either a sequence ID or a sequence object + if not isinstance( sequence_identifier, basestring ): + # Input was a sequence object (not a sequence ID). Get the sequence ID + sequence_identifier = sequence_identifier.identifier + # Get only the ID part of the sequence header + sequence_id, sequence_sep, sequence_desc = sequence_identifier.partition(' ') rval = None if sequence_id in self.offset_dict: initial_offset = self.file.tell() @@ -525,7 +529,7 @@ del self.offset_dict[ sequence_id ] self.file.seek( seq_offset ) rval = self.reader.next() - #assert rval.identifier == sequence_id, 'seq id mismatch' #should be able to remove this + #assert rval.id == sequence_id, 'seq id mismatch' #should be able to remove this self.file.seek( initial_offset ) else: while True: @@ -535,13 +539,14 @@ except StopIteration: self.eof = True break #eof, id not found, will return None - if fastq_read.identifier == sequence_id: + fastq_read_id, fastq_read_sep, fastq_read_desc = fastq_read.identifier.partition(' ') + if fastq_read_id == sequence_id: rval = fastq_read break else: - if fastq_read.identifier not in self.offset_dict: - self.offset_dict[ fastq_read.identifier ] = [] - self.offset_dict[ fastq_read.identifier ].append( offset ) + if fastq_read_id not in self.offset_dict: + self.offset_dict[ fastq_read_id ] = [] + self.offset_dict[ fastq_read_id ].append( offset ) if rval is not None and self.apply_galaxy_conventions: rval.apply_galaxy_conventions() return rval @@ -582,16 +587,18 @@ self.format = format self.force_quality_encoding = force_quality_encoding def join( self, read1, read2 ): - if read1.identifier.endswith( '/2' ) and read2.identifier.endswith( '/1' ): + read1_id, read1_sep, read1_desc = read1.identifier.partition(' ') + read2_id, read2_sep, read2_desc = read2.identifier.partition(' ') + if read1_id.endswith( '/2' ) and read2_id.endswith( '/1' ): #swap 1 and 2 tmp = read1 read1 = read2 read2 = tmp del tmp - if read1.identifier.endswith( '/1' ) and read2.identifier.endswith( '/2' ): - identifier = read1.identifier[:-2] - else: - identifier = read1.identifier + if read1_id.endswith( '/1' ) and read2_id.endswith( '/2' ): + read1_id = read1_id[:-2] + + identifier = read1_id + ' ' + read1_desc #use force quality encoding, if not present force to encoding of first read force_quality_encoding = self.force_quality_encoding @@ -621,17 +628,18 @@ rval.quality = "%s %s" % ( new_read1.quality.strip(), new_read2.quality.strip() ) return rval def get_paired_identifier( self, fastq_read ): - identifier = fastq_read.identifier - if identifier[-2] == '/': - if identifier[-1] == "1": - identifier = "%s2" % identifier[:-1] - elif identifier[-1] == "2": - identifier = "%s1" % identifier[:-1] - return identifier + read_id, read_sep, read_desc = fastq_read.identifier.partition(' ') + if read_id[-2] == '/': + if read_id[-1] == "1": + read_id = "%s2" % read_id[:-1] + elif read_id[-1] == "2": + read_id = "%s1" % read_id[:-1] + return read_id def is_first_mate( self, sequence_id ): is_first = None if not isinstance( sequence_id, basestring ): sequence_id = sequence_id.identifier + sequence_id, sequence_sep, sequence_desc = sequence_id.partition(' ') if sequence_id[-2] == '/': if sequence_id[-1] == "1": is_first = True https://bitbucket.org/galaxy/galaxy-central/commits/34e7cf3bcef0/ changeset: 34e7cf3bcef0 user: fangly date: 2011-11-30 02:38:52 summary: Avoid trailing whitespace affected #: 1 file diff -r 2eb6dddb3866adef30b72e92e747d9ece4e11da9 -r 34e7cf3bcef0eb7bf7d0684e8ac5d91e03750d8c lib/galaxy_utils/sequence/fastq.py --- a/lib/galaxy_utils/sequence/fastq.py +++ b/lib/galaxy_utils/sequence/fastq.py @@ -597,8 +597,10 @@ del tmp if read1_id.endswith( '/1' ) and read2_id.endswith( '/2' ): read1_id = read1_id[:-2] - - identifier = read1_id + ' ' + read1_desc + + identifier = read1_id + if read1_desc: + identifier = identifier + ' ' + read1_desc #use force quality encoding, if not present force to encoding of first read force_quality_encoding = self.force_quality_encoding https://bitbucket.org/galaxy/galaxy-central/commits/7d4a431f7188/ changeset: 7d4a431f7188 user: fangly date: 2011-11-30 03:01:07 summary: Updated tests for FASTQ interlacer/deinterlacer tool affected #: 2 files diff -r 34e7cf3bcef0eb7bf7d0684e8ac5d91e03750d8c -r 7d4a431f7188d71d5e0ba2655a10145ecbdb4468 test-data/paired_end_2.fastqsanger --- a/test-data/paired_end_2.fastqsanger +++ b/test-data/paired_end_2.fastqsanger @@ -1,6 +1,6 @@ -@1539:931/2 +@1539:931/2 this read has a description GCGCGTAACGTTTCACCTCGAGATCGTTGTCGGCCGCAATCTCCTGGGGGCGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT -+1539:931/2 ++1539:931/2 this read has a description aee_dcadeeWcaaadJbdaff[fffc]dcfe[dRc^\[^QVOZXXZSPFWNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @2971:937/2 CTCGCACGGCCGCCTCGACCACTTGGTCTGGCGTCATGCGCAATTTTTTCTCCATGTGGAACGGGCTGGTGGCGATGAACGTATGAATATGCCCCCGCGCT diff -r 34e7cf3bcef0eb7bf7d0684e8ac5d91e03750d8c -r 7d4a431f7188d71d5e0ba2655a10145ecbdb4468 test-data/paired_end_merged.fastqsanger --- a/test-data/paired_end_merged.fastqsanger +++ b/test-data/paired_end_merged.fastqsanger @@ -2,9 +2,9 @@ NACATCAACACTCAGTAACGGCTGGCGCAAAATGGCATTGATTAACGAAGACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG +1539:931/1 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB -@1539:931/2 +@1539:931/2 this read has a description GCGCGTAACGTTTCACCTCGAGATCGTTGTCGGCCGCAATCTCCTGGGGGCGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT -+1539:931/2 ++1539:931/2 this read has a description aee_dcadeeWcaaadJbdaff[fffc]dcfe[dRc^\[^QVOZXXZSPFWNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @2971:937/1 NCGGAGACTTCGAGGCCATCCAGTCGATTGCCAAAGTCATCAAGGGGTCGACGATCTGCTCCCTTGCCCGTTCCAACGAGAATGAAATCCGCCGCGCGTGG https://bitbucket.org/galaxy/galaxy-central/commits/7848d6fd1b7a/ changeset: 7848d6fd1b7a user: jgoecks date: 2013-01-17 20:12:57 summary: Merged in fangly/galaxy-central (pull request #8: Paired-end code mishandles description of FASTQ headers) affected #: 3 files diff -r 1b95e5b076fee018402e5c94534a2e65ea6c5315 -r 7848d6fd1b7a3ef8330ad1b31f5a3521094ad706 lib/galaxy_utils/sequence/fastq.py --- a/lib/galaxy_utils/sequence/fastq.py +++ b/lib/galaxy_utils/sequence/fastq.py @@ -514,9 +514,13 @@ self.apply_galaxy_conventions = apply_galaxy_conventions def close( self ): return self.file.close() - def get( self, sequence_id ): - if not isinstance( sequence_id, basestring ): - sequence_id = sequence_id.identifier + def get( self, sequence_identifier ): + # Input is either a sequence ID or a sequence object + if not isinstance( sequence_identifier, basestring ): + # Input was a sequence object (not a sequence ID). Get the sequence ID + sequence_identifier = sequence_identifier.identifier + # Get only the ID part of the sequence header + sequence_id, sequence_sep, sequence_desc = sequence_identifier.partition(' ') rval = None if sequence_id in self.offset_dict: initial_offset = self.file.tell() @@ -525,7 +529,7 @@ del self.offset_dict[ sequence_id ] self.file.seek( seq_offset ) rval = self.reader.next() - #assert rval.identifier == sequence_id, 'seq id mismatch' #should be able to remove this + #assert rval.id == sequence_id, 'seq id mismatch' #should be able to remove this self.file.seek( initial_offset ) else: while True: @@ -535,13 +539,14 @@ except StopIteration: self.eof = True break #eof, id not found, will return None - if fastq_read.identifier == sequence_id: + fastq_read_id, fastq_read_sep, fastq_read_desc = fastq_read.identifier.partition(' ') + if fastq_read_id == sequence_id: rval = fastq_read break else: - if fastq_read.identifier not in self.offset_dict: - self.offset_dict[ fastq_read.identifier ] = [] - self.offset_dict[ fastq_read.identifier ].append( offset ) + if fastq_read_id not in self.offset_dict: + self.offset_dict[ fastq_read_id ] = [] + self.offset_dict[ fastq_read_id ].append( offset ) if rval is not None and self.apply_galaxy_conventions: rval.apply_galaxy_conventions() return rval @@ -582,16 +587,20 @@ self.format = format self.force_quality_encoding = force_quality_encoding def join( self, read1, read2 ): - if read1.identifier.endswith( '/2' ) and read2.identifier.endswith( '/1' ): + read1_id, read1_sep, read1_desc = read1.identifier.partition(' ') + read2_id, read2_sep, read2_desc = read2.identifier.partition(' ') + if read1_id.endswith( '/2' ) and read2_id.endswith( '/1' ): #swap 1 and 2 tmp = read1 read1 = read2 read2 = tmp del tmp - if read1.identifier.endswith( '/1' ) and read2.identifier.endswith( '/2' ): - identifier = read1.identifier[:-2] - else: - identifier = read1.identifier + if read1_id.endswith( '/1' ) and read2_id.endswith( '/2' ): + read1_id = read1_id[:-2] + + identifier = read1_id + if read1_desc: + identifier = identifier + ' ' + read1_desc #use force quality encoding, if not present force to encoding of first read force_quality_encoding = self.force_quality_encoding @@ -621,17 +630,18 @@ rval.quality = "%s %s" % ( new_read1.quality.strip(), new_read2.quality.strip() ) return rval def get_paired_identifier( self, fastq_read ): - identifier = fastq_read.identifier - if identifier[-2] == '/': - if identifier[-1] == "1": - identifier = "%s2" % identifier[:-1] - elif identifier[-1] == "2": - identifier = "%s1" % identifier[:-1] - return identifier + read_id, read_sep, read_desc = fastq_read.identifier.partition(' ') + if read_id[-2] == '/': + if read_id[-1] == "1": + read_id = "%s2" % read_id[:-1] + elif read_id[-1] == "2": + read_id = "%s1" % read_id[:-1] + return read_id def is_first_mate( self, sequence_id ): is_first = None if not isinstance( sequence_id, basestring ): sequence_id = sequence_id.identifier + sequence_id, sequence_sep, sequence_desc = sequence_id.partition(' ') if sequence_id[-2] == '/': if sequence_id[-1] == "1": is_first = True diff -r 1b95e5b076fee018402e5c94534a2e65ea6c5315 -r 7848d6fd1b7a3ef8330ad1b31f5a3521094ad706 test-data/paired_end_2.fastqsanger --- a/test-data/paired_end_2.fastqsanger +++ b/test-data/paired_end_2.fastqsanger @@ -1,6 +1,6 @@ -@1539:931/2 +@1539:931/2 this read has a description GCGCGTAACGTTTCACCTCGAGATCGTTGTCGGCCGCAATCTCCTGGGGGCGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT -+1539:931/2 ++1539:931/2 this read has a description aee_dcadeeWcaaadJbdaff[fffc]dcfe[dRc^\[^QVOZXXZSPFWNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @2971:937/2 CTCGCACGGCCGCCTCGACCACTTGGTCTGGCGTCATGCGCAATTTTTTCTCCATGTGGAACGGGCTGGTGGCGATGAACGTATGAATATGCCCCCGCGCT diff -r 1b95e5b076fee018402e5c94534a2e65ea6c5315 -r 7848d6fd1b7a3ef8330ad1b31f5a3521094ad706 test-data/paired_end_merged.fastqsanger --- a/test-data/paired_end_merged.fastqsanger +++ b/test-data/paired_end_merged.fastqsanger @@ -2,9 +2,9 @@ NACATCAACACTCAGTAACGGCTGGCGCAAAATGGCATTGATTAACGAAGACTTCCCGCGCGTGAAGGCGCCGGCAAACGAGGCTCGGGAAGGGGCTCCCG +1539:931/1 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB -@1539:931/2 +@1539:931/2 this read has a description GCGCGTAACGTTTCACCTCGAGATCGTTGTCGGCCGCAATCTCCTGGGGGCGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT -+1539:931/2 ++1539:931/2 this read has a description aee_dcadeeWcaaadJbdaff[fffc]dcfe[dRc^\[^QVOZXXZSPFWNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB @2971:937/1 NCGGAGACTTCGAGGCCATCCAGTCGATTGCCAAAGTCATCAAGGGGTCGACGATCTGCTCCCTTGCCCGTTCCAACGAGAATGAAATCCGCCGCGCGTGG Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.