# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks@emory.edu> # Date 1279113986 14400 # Node ID 3a6b81352293854ff58970c19590ce14a218fb57 # Parent e2ba0e9c6852f2acba9d7119c09b703d3bc954be New feature: GFF files can be viewed in trackster. Specific additions: (a) generalized bed-to-summary-tree converter and bed-to-interval-index converter to handle both BED and GFF files and renamed accordingly; (b) augmented trackster to provide payload data from both BED and GFF files. --- a/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division - -import sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.interval_index_file import Indexes - -def main(): - - input_fname = sys.argv[1] - out_fname = sys.argv[2] - index = Indexes() - offset = 0 - - for line in open(input_fname, "r"): - feature = line.strip().split() - if not feature or feature[0].startswith("track") or feature[0].startswith("#"): - offset += len(line) - continue - chrom = feature[0] - chrom_start = int(feature[1]) - chrom_end = int(feature[2]) - index.add( chrom, chrom_start, chrom_end, offset ) - offset += len(line) - - index.write( open(out_fname, "w") ) - -if __name__ == "__main__": - main() - --- /dev/null +++ b/lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml @@ -0,0 +1,14 @@ +<tool id="CONVERTER_gff_to_summary_tree_0" name="Convert GFF to Summary Tree" version="1.0.0"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1 --gff</command> + <inputs> + <page> + <param format="gff" name="input1" type="data" label="Choose GFF file"/> + </page> + </inputs> + <outputs> + <data format="summary_tree" name="output1"/> + </outputs> + <help> + </help> +</tool> --- a/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.xml +++ b/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.xml @@ -1,6 +1,6 @@ <tool id="CONVERTER_bed_to_interval_index_0" name="Convert BED to Interval Index" version="1.0.0"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">bed_to_interval_index_converter.py $input1 $output1</command> + <command interpreter="python">interval_to_interval_index_converter.py $input1 $output1</command><inputs><page><param format="bed" name="input1" type="data" label="Choose BED file"/> --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -52,6 +52,8 @@ </datatype><datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"><converter file="gff_to_bed_converter.xml" target_datatype="bed"/> + <converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/> + <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/><display file="ensembl/ensembl_gff.xml" inherit="True"/></datatype><datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/> --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +""" +Convert from interval file to interval index file. Default input file format is BED (0-based, half-open intervals). + +usage: %prog in_file out_file + -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval +""" + +from __future__ import division + +import sys, fileinput +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from galaxy.visualization.tracks.summary import * +from bx.cookbook import doc_optparse +from galaxy.tools.util.gff_util import convert_gff_coords_to_bed +from bx.interval_index_file import Indexes + +def main(): + + # Read options, args. + options, args = doc_optparse.parse( __doc__ ) + try: + gff_format = bool( options.gff ) + input_fname, out_fname = args + except: + doc_optparse.exception() + + # Do conversion. + # TODO: take column numbers from command line. + if gff_format: + chr_col, start_col, end_col = ( 0, 3, 4 ) + else: + chr_col, start_col, end_col = ( 0, 1, 2 ) + index = Indexes() + offset = 0 + for line in open(input_fname, "r"): + feature = line.strip().split() + if not feature or feature[0].startswith("track") or feature[0].startswith("#"): + offset += len(line) + continue + chrom = feature[ chr_col ] + chrom_start = int( feature[ start_col ] ) + chrom_end = int( feature[ end_col ] ) + if gff_format: + chrom_start, chrom_end = convert_gff_coords_to_bed( [chrom_start, chrom_end ] ) + index.add( chrom, chrom_start, chrom_end, offset ) + offset += len(line) + + index.write( open(out_fname, "w") ) + +if __name__ == "__main__": + main() + --- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml +++ b/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml @@ -1,6 +1,6 @@ <tool id="CONVERTER_bed_to_summary_tree_0" name="Convert BED to Summary Tree" version="1.0.0"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">bed_to_summary_tree_converter.py $input1 $output1</command> + <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1</command><inputs><page><param format="bed" name="input1" type="data" label="Choose BED file"/> --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +""" +Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals). + +usage: %prog in_file out_file + -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval +""" +from __future__ import division + +import sys, fileinput +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from galaxy.visualization.tracks.summary import * +from bx.intervals.io import * +from bx.cookbook import doc_optparse +from galaxy.tools.util.gff_util import GFFReaderWrapper + +def main(): + # Read options, args. + options, args = doc_optparse.parse( __doc__ ) + try: + gff_format = bool( options.gff ) + input_fname, out_fname = args + except: + doc_optparse.exception() + + # Do conversion. + # TODO: take column numbers from command line. + if gff_format: + reader_wrapper_class = GFFReaderWrapper + chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 ) + else: + reader_wrapper_class = NiceReaderWrapper + chr_col, start_col, end_col, strand_col = ( 0, 1, 2, 5 ) + reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ), + chrom_col=chr_col, + start_col=start_col, + end_col=end_col, + strand_col=strand_col, + fix_strand=True ) + st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) + for line in list( reader_wrapper ): + if type( line ) is GenomicInterval: + st.insert_range( line[ chr_col ], long( line[ start_col ] ), long( line[ end_col ] ) ) + + st.write(out_fname) + +if __name__ == "__main__": + main() --- a/lib/galaxy/datatypes/interval.py +++ b/lib/galaxy/datatypes/interval.py @@ -772,6 +772,10 @@ class Gff( Tabular, _RemoteCallMixin ): return True except: return False + + def get_track_type( self ): + return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} + class Gff3( Gff ): """Tab delimited data in Gff3 format""" --- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division - -import sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from galaxy.visualization.tracks.summary import * -from bx.arrays.bed import BedReader - -def main(): - - input_fname = sys.argv[1] - out_fname = sys.argv[2] - - reader = BedReader( open( input_fname ) ) - - st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30) - for chrom, chrom_start, chrom_end, name, score in reader: - st.insert_range(chrom, chrom_start, chrom_end) - - st.write(out_fname) - -if __name__ == "__main__": - main() --- /dev/null +++ b/lib/galaxy/datatypes/converters/gff_to_interval_index_converter.xml @@ -0,0 +1,14 @@ +<tool id="CONVERTER_gff_to_interval_index_0" name="Convert BED to Interval Index" version="1.0.0"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_interval_index_converter.py $input1 $output1 --gff</command> + <inputs> + <page> + <param format="gff" name="input1" type="data" label="Choose GFF file"/> + </page> + </inputs> + <outputs> + <data format="interval_index" name="output1"/> + </outputs> + <help> + </help> +</tool> --- a/lib/galaxy/visualization/tracks/data/interval_index.py +++ b/lib/galaxy/visualization/tracks/data/interval_index.py @@ -7,6 +7,7 @@ Payload format: [ uid (offset), start, e import pkg_resources; pkg_resources.require( "bx-python" ) from bx.interval_index_file import Indexes +from galaxy.datatypes.interval import Bed, Gff class IntervalIndexDataProvider( object ): def __init__( self, converted_dataset, original_dataset ): @@ -24,22 +25,31 @@ class IntervalIndexDataProvider( object source.seek(offset) feature = source.readline().split() payload = [ offset, start, end ] + # TODO: can we use column metadata to fill out payload? if "no_detail" not in kwargs: length = len(feature) - if length >= 4: - payload.append(feature[3]) # name - if length >= 6: # strand - payload.append(feature[5]) - - if length >= 8: - payload.append(int(feature[6])) - payload.append(int(feature[7])) + if isinstance( self.original_dataset.datatype, Gff ): + # GFF dataset. + if length >= 3: + payload.append( feature[2] ) # name + if length >= 7: + payload.append( feature[6] ) # strand + elif isinstance( self.original_dataset.datatype, Bed ): + # BED dataset. + if length >= 4: + payload.append(feature[3]) # name + if length >= 6: # strand + payload.append(feature[5]) + + if length >= 8: + payload.append(int(feature[6])) + payload.append(int(feature[7])) - if length >= 12: - block_sizes = [ int(n) for n in feature[10].split(',') if n != ''] - block_starts = [ int(n) for n in feature[11].split(',') if n != '' ] - blocks = zip(block_sizes, block_starts) - payload.append( [ (start + block[1], start + block[1] + block[0]) for block in blocks] ) + if length >= 12: + block_sizes = [ int(n) for n in feature[10].split(',') if n != ''] + block_starts = [ int(n) for n in feature[11].split(',') if n != '' ] + blocks = zip(block_sizes, block_starts) + payload.append( [ (start + block[1], start + block[1] + block[0]) for block in blocks] ) results.append(payload)