1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/20e573ed9f82/ changeset: 20e573ed9f82 user: jgoecks date: 2012-02-22 19:57:17 summary: Enable interval files to be converted to interval_index and summary_tree datatypes. affected #: 6 files diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -89,6 +89,8 @@ <converter file="interval_to_bedstrict_converter.xml" target_datatype="bedstrict"/><converter file="interval_to_bed6_converter.xml" target_datatype="bed6"/><converter file="interval_to_bed12_converter.xml" target_datatype="bed12"/> + <converter file="interval_to_interval_index_converter.xml" target_datatype="interval_index"/> + <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/><!-- <display file="ucsc/interval_as_bed.xml" inherit="True" /> --><display file="genetrack.xml" inherit="True"/><display file="ensembl/ensembl_interval_as_bed.xml" inherit="True"/> diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py @@ -1,38 +1,39 @@ #!/usr/bin/env python """ -Convert from interval file to interval index file. Default input file format is BED (0-based, half-open intervals). +Convert from interval file to interval index file. -usage: %prog in_file out_file - -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval +usage: %prog <options> in_file out_file + -c, --chr-col: chromosome column, default=1 + -s, --start-col: start column, default=2 + -e, --end-col: end column, default=3 """ from __future__ import division -import sys, fileinput +import sys, fileinput, optparse from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from galaxy.visualization.tracks.summary import * -from bx.cookbook import doc_optparse from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed from bx.interval_index_file import Indexes def main(): # Read options, args. - options, args = doc_optparse.parse( __doc__ ) - try: - gff_format = bool( options.gff ) - input_fname, out_fname = args - except: - doc_optparse.exception() + parser = optparse.OptionParser() + parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 ) + parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 ) + parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 ) + (options, args) = parser.parse_args() + input_fname, output_fname = args + + # Make column indices 0-based. + options.chrom_col -= 1 + options.start_col -= 1 + options.end_col -= 1 # Do conversion. - # TODO: take column numbers from command line. - if gff_format: - chr_col, start_col, end_col = ( 0, 3, 4 ) - else: - chr_col, start_col, end_col = ( 0, 1, 2 ) index = Indexes() offset = 0 for line in open(input_fname, "r"): @@ -40,15 +41,13 @@ if not feature or feature[0].startswith("track") or feature[0].startswith("#"): offset += len(line) continue - chrom = feature[ chr_col ] - chrom_start = int( feature[ start_col ] ) - chrom_end = int( feature[ end_col ] ) - if gff_format: - chrom_start, chrom_end = convert_gff_coords_to_bed( [chrom_start, chrom_end ] ) + chrom = feature[ options.chrom_col ] + chrom_start = int( feature[ options.start_col ] ) + chrom_end = int( feature[ options.end_col ] ) index.add( chrom, chrom_start, chrom_end, offset ) offset += len(line) - index.write( open(out_fname, "w") ) + index.write( open(output_fname, "w") ) if __name__ == "__main__": main() diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py.orig --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py.orig @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +""" +Convert from interval file to interval index file. Default input file format is BED (0-based, half-open intervals). + +usage: %prog in_file out_file + -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval +""" + +from __future__ import division + +import sys, fileinput +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from galaxy.visualization.tracks.summary import * +from bx.cookbook import doc_optparse +from galaxy.tools.util.gff_util import convert_gff_coords_to_bed +from bx.interval_index_file import Indexes +from galaxy.tools.util.gff_util import parse_gff_attributes + +def main(): + + # Read options, args. + options, args = doc_optparse.parse( __doc__ ) + try: + gff_format = bool( options.gff ) + input_fname, out_fname = args + except: + doc_optparse.exception() + + # Do conversion. + # TODO: take column numbers from command line. + if gff_format: + chr_col, start_col, end_col = ( 0, 3, 4 ) + else: + chr_col, start_col, end_col = ( 0, 1, 2 ) + index = Indexes() + offset = 0 + # Need to keep track of last gene, transcript id for indexing GTF files. + last_gene_id = None + last_transcript_id = None + for line in open(input_fname, "r"): + feature = line.strip().split('\t') + if not feature or feature[0].startswith("track") or feature[0].startswith("#"): + offset += len(line) + continue + chrom = feature[ chr_col ] + chrom_start = int( feature[ start_col ] ) + chrom_end = int( feature[ end_col ] ) + if gff_format: + chrom_start, chrom_end = convert_gff_coords_to_bed( [chrom_start, chrom_end ] ) + + # Only add feature if gene_id, transcript_id are different from last + # values. + if len( feature ) == 9: + attributes = parse_gff_attributes( feature[8] ) + gene_id = attributes.get( 'gene_id', None ) + transcript_id = attributes.get( 'transcript_id', None ) + if gene_id and transcript_id and gene_id == last_gene_id and \ + transcript_id == last_transcript_id: + # Feature has same gene_id, transcript as last feature, so + # do not add. + offset += len(line) + continue + else: + # gene_id, transcript_id set and are different from last + # values. + last_gene_id = gene_id + last_transcript_id = transcript_id + + #print "%s %s %s %s %i %i %i" % (feature[2], last_gene_id, last_transcript_id, chrom, chrom_start, chrom_end, offset) + index.add( chrom, chrom_start, chrom_end, offset ) + offset += len(line) + + index.write( open(out_fname, "w") ) + +if __name__ == "__main__": + main() + \ No newline at end of file diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 lib/galaxy/datatypes/converters/interval_to_interval_index_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.xml @@ -0,0 +1,19 @@ +<tool id="CONVERTER_interval_to_interval_index_0" name="Convert Interval to Interval Index" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_interval_index_converter.py + -c ${input1.metadata.chromCol} + -s ${input1.metadata.startCol} + -e ${input1.metadata.endCol} + $input1 $output1 + </command> + <inputs> + <page> + <param format="interval" name="input1" type="data" label="Choose Interval file"/> + </page> + </inputs> + <outputs> + <data format="interval_index" name="output1"/> + </outputs> + <help> + </help> +</tool> diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py @@ -3,36 +3,46 @@ """ Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals). -usage: %prog in_file out_file +usage: %prog <options> in_file out_file + -c, --chr-col: chromosome column, default=1 + -s, --start-col: start column, default=2 + -e, --end-col: end column, default=3 + -t, --strand-col: strand column, default=6 -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval """ from __future__ import division -import sys, fileinput +import sys, fileinput, optparse from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from galaxy.visualization.tracks.summary import * from bx.intervals.io import * -from bx.cookbook import doc_optparse from galaxy.datatypes.util.gff_util import * def main(): # Read options, args. - options, args = doc_optparse.parse( __doc__ ) - try: - gff_format = bool( options.gff ) - input_fname, out_fname = args - except: - doc_optparse.exception() + parser = optparse.OptionParser() + parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 ) + parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 ) + parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 ) + parser.add_option( '-t', '--strand-col', type='int', dest='strand_col', default=6 ) + parser.add_option( '-G', '--gff', dest="gff_format", action="store_true" ) + (options, args) = parser.parse_args() + input_fname, output_fname = args + + # Convert column indices to 0-based. + options.chrom_col -= 1 + options.start_col -= 1 + options.end_col -= 1 + options.strand_col -= 1 # Do conversion. - # TODO: take column numbers from command line. - if gff_format: + if options.gff_format: reader_wrapper_class = GFFReaderWrapper chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 ) else: reader_wrapper_class = NiceReaderWrapper - chr_col, start_col, end_col, strand_col = ( 0, 1, 2, 5 ) + chr_col, start_col, end_col, strand_col = ( options.chrom_col, options.start_col, options.end_col, options.strand_col ) reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ), chrom_col=chr_col, start_col=start_col, @@ -47,7 +57,7 @@ convert_gff_coords_to_bed( feature ) st.insert_range( feature.chrom, long( feature.start ), long( feature.end ) ) - st.write(out_fname) + st.write( output_fname ) if __name__ == "__main__": main() diff -r d2c3bbbccf664c012b93187b07a04eecf6fdf4c6 -r 20e573ed9f8248f659e3e40b862f475bd548ba47 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml @@ -0,0 +1,20 @@ +<tool id="CONVERTER_interval_to_summary_tree_0" name="Convert Interval to Summary Tree" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">interval_to_summary_tree_converter.py + -c ${input1.metadata.chromCol} + -s ${input1.metadata.startCol} + -e ${input1.metadata.endCol} + $input1 $output1 + </command> + + <inputs> + <page> + <param format="interval" name="input1" type="data" label="Choose Interval file"/> + </page> + </inputs> + <outputs> + <data format="summary_tree" name="output1"/> + </outputs> + <help> + </help> +</tool> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.