1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/760d34e6c692/ changeset: r5464:760d34e6c692 user: kanwei date: 2011-04-28 01:08:31 summary: trackster: Restore interval_index as Gff indexer for now due to sorting issues affected #: 3 files (2.1 KB) --- a/datatypes_conf.xml.sample Wed Apr 27 17:33:08 2011 -0400 +++ b/datatypes_conf.xml.sample Wed Apr 27 19:08:31 2011 -0400 @@ -66,8 +66,7 @@ </datatype><datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"><converter file="gff_to_bed_converter.xml" target_datatype="bed"/> - <converter file="gff_to_bgzip_converter.xml" target_datatype="bgzip"/> - <converter file="gff_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> + <converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/><converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/><display file="ensembl/ensembl_gff.xml" inherit="True"/><!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> --> @@ -75,10 +74,7 @@ <datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/><datatype extension="gif" type="galaxy.datatypes.images:Image" mimetype="image/gif"/><datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/> - <datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"> - <converter file="gff_to_bgzip_converter.xml" target_datatype="bgzip"/> - <converter file="gff_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> - </datatype> + <datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"/><datatype extension="html" type="galaxy.datatypes.images:Html" mimetype="text/html"/><datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true"><converter file="interval_to_bed_converter.xml" target_datatype="bed"/> --- a/lib/galaxy/datatypes/interval.py Wed Apr 27 17:33:08 2011 -0400 +++ b/lib/galaxy/datatypes/interval.py Wed Apr 27 19:08:31 2011 -0400 @@ -772,7 +772,7 @@ return False def get_track_type( self ): - return "FeatureTrack", {"data": "tabix", "index": "summary_tree"} + return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} class Gff3( Gff ): --- a/lib/galaxy/visualization/tracks/data_providers.py Wed Apr 27 17:33:08 2011 -0400 +++ b/lib/galaxy/visualization/tracks/data_providers.py Wed Apr 27 19:08:31 2011 -0400 @@ -360,13 +360,7 @@ f = open( self.original_dataset.file_name ) return f, BigWigFile(file=f) -class TabixDataProvider( TracksDataProvider ): - """ - Tabix index data provider for the Galaxy track browser. - """ - - col_name_data_attr_mapping = { 4 : { 'index': 4 , 'name' : 'Score' } } - +class FilterableMixin: def get_filters( self ): """ Returns a dataset's filters. """ @@ -422,6 +416,13 @@ filters = [ { 'name': 'Score', 'type': 'int', 'index': filter_col } ] return filters + +class TabixDataProvider( TracksDataProvider, FilterableMixin ): + """ + Tabix index data provider for the Galaxy track browser. + """ + + col_name_data_attr_mapping = { 4 : { 'index': 4 , 'name' : 'Score' } } def get_iterator( self, chrom, start, end ): start, end = int(start), int(end) @@ -447,24 +448,64 @@ iterator = self.get_iterator( chrom, start, end ) return self.process_data(iterator, **kwargs) -class GffDataProvider( TabixDataProvider ): - # FIXME: Doesn't implement write_data_to_file - def process_data( self, iterator, **kwargs ): +class GffDataProvider( TracksDataProvider, FilterableMixin ): + col_name_data_attr_mapping = { 4 : { 'index': 4 , 'name' : 'Score' } } + + def write_data_to_file( self, chrom, start, end, filename ): + source = open( self.original_dataset.file_name ) + index = Indexes( self.converted_dataset.file_name ) + out = open( filename, 'w' ) + for start, end, offset in index.find(chrom, start, end): + source.seek( offset ) + + reader = GFFReaderWrapper( source, fix_strand=True ) + feature = reader.next() + for interval in feature.intervals: + out.write(interval.raw_line + '\n') + out.close() + + def get_data( self, chrom, start, end, **kwargs ): + start, end = int(start), int(end) + source = open( self.original_dataset.file_name ) + index = Indexes( self.converted_dataset.file_name ) + results = [] count = 0 - rval = [] message = None - for feature in GFFReaderWrapper( iterator, fix_strand=True ): + + # If chrom is not found in indexes, try removing the first three + # characters (e.g. 'chr') and see if that works. This enables the + # provider to handle chrome names defined as chrXXX and as XXX. + chrom = str(chrom) + if chrom not in index.indexes and chrom[3:] in index.indexes: + chrom = chrom[3:] + + # + # Build data to return. Payload format is: + # [ <guid/offset>, <start>, <end>, <name>, <score>, <strand>, <thick_start>, + # <thick_end>, <blocks> ] + # + # First three entries are mandatory, others are optional. + # + filter_cols = from_json_string( kwargs.get( "filter_cols", "[]" ) ) + no_detail = ( "no_detail" in kwargs ) + for start, end, offset in index.find(chrom, start, end): if count >= MAX_VALS: message = ERROR_MAX_VALS % "features" break count += 1 + source.seek( offset ) + # TODO: can we use column metadata to fill out payload? + # TODO: use function to set payload data - payload = package_gff_feature( feature ) - hashed = hash("\t".join(feature.fields)) - payload.insert( 0, hashed ) - rval.append( payload ) - - return { 'data': rval, 'message': message } + # GFF dataset. + reader = GFFReaderWrapper( source, fix_strand=True ) + feature = reader.next() + payload = package_gff_feature( feature, no_detail, filter_cols ) + payload.insert( 0, offset ) + + results.append( payload ) + + return { 'data': results, 'message': message } class BedDataProvider( TabixDataProvider ): """ @@ -611,7 +652,8 @@ # type. First key is converted dataset type; if result is another dict, second key # is original dataset type. TODO: This needs to be more flexible. dataset_type_name_to_data_provider = { - "tabix": { Vcf: VcfDataProvider, Bed: BedDataProvider, Gff: GffDataProvider, Gtf: GffDataProvider, "default" : TabixDataProvider }, + "tabix": { Vcf: VcfDataProvider, Bed: BedDataProvider, "default" : TabixDataProvider }, + "interval_index": { Gff: GffDataProvider, Gtf: GffDataProvider }, "bai": BamDataProvider, "summary_tree": SummaryTreeDataProvider, "bigwig": BigWigDataProvider, Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.