commit/galaxy-central: jgoecks: Fix problems with computing indices for GFF/GTF datasets.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/c265358e1062/ changeset: c265358e1062 user: jgoecks date: 2011-10-01 00:38:30 summary: Fix problems with computing indices for GFF/GTF datasets. affected #: 1 file (-1 bytes) --- a/lib/galaxy/datatypes/util/gff_util.py Fri Sep 30 15:07:11 2011 -0400 +++ b/lib/galaxy/datatypes/util/gff_util.py Fri Sep 30 18:38:30 2011 -0400 @@ -133,6 +133,7 @@ self.last_line = None self.cur_offset = 0 self.seed_interval = None + self.seed_interval_line_len = 0 def parse_row( self, line ): interval = GFFInterval( self, line.split( "\t" ), self.chrom_col, self.feature_col, \ @@ -160,12 +161,12 @@ # For debugging, uncomment this to propogate parsing exceptions up. # I.e. the underlying reason for an unexpected StopIteration exception # can be found by uncommenting this. - #raise e + # raise e # # Get next GFFFeature # - raw_size = 0 + raw_size = self.seed_interval_line_len # If there is no seed interval, set one. Also, if there are no more # intervals to read, this is where iterator dies. @@ -184,8 +185,9 @@ return_val = self.seed_interval return_val.raw_size = len( self.current_line ) self.seed_interval = None + self.seed_interval_line_len = 0 return return_val - + # Initialize feature identifier from seed. feature_group = self.seed_interval.attributes.get( 'group', None ) # For GFF # For GFF3 @@ -220,21 +222,28 @@ if isinstance( interval, Comment ): continue - # If interval not associated with feature, break. + # Determine if interval is part of feature. + part_of = True group = interval.attributes.get( 'group', None ) # GFF test: if group and feature_group != group: - break + part_of = False # GFF3 test: parent_id = interval.attributes.get( 'Parent', None ) cur_id = interval.attributes.get( 'ID', None ) if ( cur_id and cur_id != feature_id ) or ( parent_id and parent_id != feature_id ): - break + part_of = False # GTF test: gene_id = interval.attributes.get( 'gene_id', None ) transcript_id = interval.attributes.get( 'transcript_id', None ) if ( transcript_id and transcript_id != feature_transcript_id ) or \ ( gene_id and gene_id != feature_gene_id ): + part_of = False + + # If interval is not part of feature, clean up and break. + if not part_of: + # Adjust raw size because current line is not part of feature. + raw_size -= len( self.current_line ) break # Interval associated with feature. @@ -242,6 +251,7 @@ # Last interval read is the seed for the next interval. self.seed_interval = interval + self.seed_interval_line_len = len( self.current_line ) # Return feature. feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col, \ @@ -255,7 +265,6 @@ return feature - def convert_bed_coords_to_gff( interval ): """ Converts an interval object's coordinates from BED format to GFF format. Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket