details: http://www.bx.psu.edu/hg/galaxy/rev/eb317734e391 changeset: 3135:eb317734e391 user: guru date: Tue Dec 01 11:49:07 2009 -0500 description: Adding back 'wiggle to interval' converter diffstat: datatypes_conf.xml.sample | 1 + lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py | 44 +++++++ lib/galaxy/datatypes/converters/wiggle_to_simple_converter.xml | 11 + test-data/3.wig | 37 ++++++ test-data/3_wig.bed | 26 ++++ tool_conf.xml.main | 1 + tool_conf.xml.sample | 1 + tools/filters/wiggle_to_simple.py | 43 +++++++ tools/filters/wiggle_to_simple.xml | 88 ++++++++++++++ 9 files changed, 252 insertions(+), 0 deletions(-) diffs (306 lines): diff -r 6e742ffb16a5 -r eb317734e391 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample Tue Dec 01 11:07:20 2009 -0500 +++ b/datatypes_conf.xml.sample Tue Dec 01 11:49:07 2009 -0500 @@ -58,6 +58,7 @@ <datatype extension="txtseq.zip" type="galaxy.datatypes.data:Txtseq" mimetype="application/zip" display_in_upload="true"/> <datatype extension="wig" type="galaxy.datatypes.interval:Wiggle" display_in_upload="true"> <converter file="wiggle_to_array_tree_converter.xml" target_datatype="array_tree"/> + <converter file="wiggle_to_simple_converter.xml" target_datatype="interval"/> </datatype> <datatype extension="array_tree" type="galaxy.datatypes.data:Data" /> <datatype extension="interval_index" type="galaxy.datatypes.data:Data" /> diff -r 6e742ffb16a5 -r eb317734e391 lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,44 @@ +#!/usr/bin/env python +#code is same as ~/tools/stats/wiggle_to_simple.py + +""" +Read a wiggle track and print out a series of lines containing +"chrom position score". Ignores track lines, handles bed, variableStep +and fixedStep wiggle lines. +""" +import sys +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +import bx.wiggle +from galaxy.tools.exception_handling import * + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def main(): + if len( sys.argv ) > 1: + in_file = open( sys.argv[1] ) + else: + in_file = open( sys.stdin ) + + if len( sys.argv ) > 2: + out_file = open( sys.argv[2], "w" ) + else: + out_file = sys.stdout + + try: + for fields in bx.wiggle.IntervalReader( UCSCOutWrapper( in_file ) ): + out_file.write( "%s\n" % "\t".join( map( str, fields ) ) ) + except UCSCLimitException: + # Wiggle data was truncated, at the very least need to warn the user. + print 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.' + except ValueError, e: + in_file.close() + out_file.close() + stop_err( str( e ) ) + + in_file.close() + out_file.close() + +if __name__ == "__main__": main() diff -r 6e742ffb16a5 -r eb317734e391 lib/galaxy/datatypes/converters/wiggle_to_simple_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/converters/wiggle_to_simple_converter.xml Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,11 @@ +<tool id="CONVERTER_wiggle_to_interval_0" name="Wiggle to Interval"> + <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <!-- Used on the metadata edit page. --> + <command interpreter="python">wiggle_to_simple_converter.py $input $out_file1 </command> + <inputs> + <param format="wig" name="input" type="data" label="Convert"/> + </inputs> + <outputs> + <data format="interval" name="out_file1" /> + </outputs> +</tool> diff -r 6e742ffb16a5 -r eb317734e391 test-data/3.wig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.wig Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,37 @@ +track type=wiggle_0 name="LaminB1 (Tig3)" description="NKI LaminB1 DamID Map (log2-ratio scores, Tig3 cells)" +# output date: 2009-07-22 15:55:44 UTC +# chrom specified: chr20 +# position specified: 1-62435964 +# data values >= 4 +# This data has been compressed with a minor loss in resolution. +# (Worst case: 0.0448672) The original source data +# (before querying and compression) is available at +# http://hgdownload.cse.ucsc.edu/downloads.html +variableStep chrom=chr20 span=60 +1953668 4.029 +9749454 4.033 +12219969 4.053 +12571904 4.548 +12663417 4.207 +15198014 4.041 +16613945 4.526 +16994767 4.08 +19456653 4.014 +37706076 4.293 +38010695 4.14 +38418792 4.133 +38595669 4.032 +40038834 4.144 +50693133 4.206 +51294426 4 +52890600 4.12 +57414708 4.026 +57623989 4.031 +58977191 5.678 +59059925 4.325 +variableStep chrom=chr12 span=60 +1953668 -4.029 +9749454 -4.033 +12219969 -4.053 +12571904 -4.548 +12663417 -4.207 diff -r 6e742ffb16a5 -r eb317734e391 test-data/3_wig.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3_wig.bed Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,26 @@ +chr20 1953667 1953727 + 4.029 +chr20 9749453 9749513 + 4.033 +chr20 12219968 12220028 + 4.053 +chr20 12571903 12571963 + 4.548 +chr20 12663416 12663476 + 4.207 +chr20 15198013 15198073 + 4.041 +chr20 16613944 16614004 + 4.526 +chr20 16994766 16994826 + 4.08 +chr20 19456652 19456712 + 4.014 +chr20 37706075 37706135 + 4.293 +chr20 38010694 38010754 + 4.14 +chr20 38418791 38418851 + 4.133 +chr20 38595668 38595728 + 4.032 +chr20 40038833 40038893 + 4.144 +chr20 50693132 50693192 + 4.206 +chr20 51294425 51294485 + 4.0 +chr20 52890599 52890659 + 4.12 +chr20 57414707 57414767 + 4.026 +chr20 57623988 57624048 + 4.031 +chr20 58977190 58977250 + 5.678 +chr20 59059924 59059984 + 4.325 +chr12 1953667 1953727 + -4.029 +chr12 9749453 9749513 + -4.033 +chr12 12219968 12220028 + -4.053 +chr12 12571903 12571963 + -4.548 +chr12 12663416 12663476 + -4.207 diff -r 6e742ffb16a5 -r eb317734e391 tool_conf.xml.main --- a/tool_conf.xml.main Tue Dec 01 11:07:20 2009 -0500 +++ b/tool_conf.xml.main Tue Dec 01 11:49:07 2009 -0500 @@ -47,6 +47,7 @@ <tool file="maf/maf_to_fasta.xml" /> <tool file="fasta_tools/tabular_to_fasta.xml" /> <tool file="fastx_toolkit/fastq_to_fasta.xml" /> + <tool file="filters/wiggle_to_simple.xml" /> </section> <section name="FASTA manipulation" id="fasta_manipulation"> <tool file="fasta_tools/fasta_compute_length.xml" /> diff -r 6e742ffb16a5 -r eb317734e391 tool_conf.xml.sample --- a/tool_conf.xml.sample Tue Dec 01 11:07:20 2009 -0500 +++ b/tool_conf.xml.sample Tue Dec 01 11:49:07 2009 -0500 @@ -71,6 +71,7 @@ <tool file="maf/maf_to_fasta.xml" /> <tool file="fasta_tools/tabular_to_fasta.xml" /> <tool file="fastx_toolkit/fastq_to_fasta.xml" /> + <tool file="filters/wiggle_to_simple.xml" /> </section> <section name="Extract Features" id="features"> <tool file="filters/ucsc_gene_bed_to_exon_bed.xml" /> diff -r 6e742ffb16a5 -r eb317734e391 tools/filters/wiggle_to_simple.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filters/wiggle_to_simple.py Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +""" +Read a wiggle track and print out a series of lines containing +"chrom position score". Ignores track lines, handles bed, variableStep +and fixedStep wiggle lines. +""" +import sys +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +import bx.wiggle +from galaxy.tools.exception_handling import * + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def main(): + if len( sys.argv ) > 1: + in_file = open( sys.argv[1] ) + else: + in_file = open( sys.stdin ) + + if len( sys.argv ) > 2: + out_file = open( sys.argv[2], "w" ) + else: + out_file = sys.stdout + + try: + for fields in bx.wiggle.IntervalReader( UCSCOutWrapper( in_file ) ): + out_file.write( "%s\n" % "\t".join( map( str, fields ) ) ) + except UCSCLimitException: + # Wiggle data was truncated, at the very least need to warn the user. + print 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.' + except ValueError, e: + in_file.close() + out_file.close() + stop_err( str( e ) ) + + in_file.close() + out_file.close() + +if __name__ == "__main__": main() diff -r 6e742ffb16a5 -r eb317734e391 tools/filters/wiggle_to_simple.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filters/wiggle_to_simple.xml Tue Dec 01 11:49:07 2009 -0500 @@ -0,0 +1,88 @@ +<tool id="wiggle2simple1" name="Wiggle-to-Interval"> + <description>converter</description> + <command interpreter="python">wiggle_to_simple.py $input $out_file1 </command> + <inputs> + <param format="wig" name="input" type="data" label="Convert"/> + </inputs> + <outputs> + <data format="interval" name="out_file1" /> + </outputs> + <tests> + <test> + <param name="input" value="2.wig" /> + <output name="out_file1" file="2.interval"/> + </test> + <test> + <param name="input" value="3.wig" /> + <output name="out_file1" file="3_wig.bed"/> + </test> + </tests> + <help> +**Syntax** + +This tool converts wiggle data into interval type. + +- **Wiggle format**: The .wig format is line-oriented. Wiggle data is preceded by a UCSC track definition line. Following the track definition line is the track data, which can be entered in three different formats described below. + + - **BED format** with no declaration line and four columns of data:: + + chromA chromStartA chromEndA dataValueA + chromB chromStartB chromEndB dataValueB + + - **variableStep** two column data; started by a declaration line and followed with chromosome positions and data values:: + + variableStep chrom=chrN [span=windowSize] + chromStartA dataValueA + chromStartB dataValueB + + - **fixedStep** single column data; started by a declaration line and followed with data values:: + + fixedStep chrom=chrN start=position step=stepInterval [span=windowSize] + dataValue1 + dataValue2 + +----- + +**Example** + +- input wiggle format file:: + + #track type=wiggle_0 name="Bed Format" description="BED format" + chr19 59302000 59302300 -1.0 + chr19 59302300 59302600 -0.75 + chr19 59302600 59302900 -0.50 + chr19 59302900 59303200 -0.25 + chr19 59303200 59303500 0.0 + #track type=wiggle_0 name="variableStep" description="variableStep format" + variableStep chrom=chr19 span=150 + 59304701 10.0 + 59304901 12.5 + 59305401 15.0 + 59305601 17.5 + #track type=wiggle_0 name="fixedStep" description="fixed step" visibility=full + fixedStep chrom=chr19 start=59307401 step=300 span=200 + 1000 + 900 + 800 + 700 + 600 + +- convert the above file to interval file:: + + chr19 59302000 59302300 + -1.0 + chr19 59302300 59302600 + -0.75 + chr19 59302600 59302900 + -0.5 + chr19 59302900 59303200 + -0.25 + chr19 59303200 59303500 + 0.0 + chr19 59304701 59304851 + 10.0 + chr19 59304901 59305051 + 12.5 + chr19 59305401 59305551 + 15.0 + chr19 59305601 59305751 + 17.5 + chr19 59307701 59307901 + 1000.0 + chr19 59308001 59308201 + 900.0 + chr19 59308301 59308501 + 800.0 + chr19 59308601 59308801 + 700.0 + chr19 59308901 59309101 + 600.0 + +</help> +</tool>