6 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/1ce68c239d0e/ Changeset: 1ce68c239d0e Branch: extract_genomic_dna_tool_enhancements User: BjoernGruening Date: 2013-10-18 18:31:44 Summary: Add the value (nameCol) in a given BED file to the FASTA header. Affected #: 2 files diff -r febd7622924885dd0729ce00924289cc1f0eb741 -r 1ce68c239d0ee59bd469c4520bf754eaa80ac04c tools/extract/extract_genomic_dna.py --- a/tools/extract/extract_genomic_dna.py +++ b/tools/extract/extract_genomic_dna.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ usage: %prog $input $out_file1 - -1, --cols=N,N,N,N: Columns for start, end, strand in input file + -1, --cols=N,N,N,N,N: Columns for start, end, strand in input file -d, --dbkey=N: Genome build of input file -o, --output_format=N: the data type of the output file -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc @@ -54,7 +54,13 @@ # options, args = doc_optparse.parse( __doc__ ) try: - chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols ) + if len(options.cols.split(',')) == 5: + # BED file + chrom_col, start_col, end_col, strand_col, name_col = parse_cols_arg( options.cols ) + else: + # gff file + chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols ) + name_col = False dbkey = options.dbkey output_format = options.output_format gff_format = options.gff @@ -144,6 +150,7 @@ start = feature.start end = feature.end strand = feature.strand + name = "" else: # Processing lines, either interval or GFF format. line = feature.rstrip( '\r\n' ) @@ -153,6 +160,8 @@ chrom = fields[chrom_col] start = int( fields[start_col] ) end = int( fields[end_col] ) + if name_col: + name = fields[name_col] if gff_format: start, end = gff_util.convert_gff_coords_to_bed( [start, end] ) if includes_strand_col: @@ -237,13 +246,16 @@ sequence = reverse_complement( sequence ) if output_format == "fasta" : - l = len( sequence ) + l = len( sequence ) c = 0 if gff_format: start, end = gff_util.convert_bed_coords_to_gff( [ start, end ] ) fields = [dbkey, str( chrom ), str( start ), str( end ), strand] meta_data = "_".join( fields ) - fout.write( ">%s\n" % meta_data ) + if name.strip(): + fout.write( ">%s %s\n" % (meta_data, name) ) + else: + fout.write( ">%s\n" % meta_data ) while c < l: b = min( c + 50, l ) fout.write( "%s\n" % str( sequence[c:b] ) ) diff -r febd7622924885dd0729ce00924289cc1f0eb741 -r 1ce68c239d0ee59bd469c4520bf754eaa80ac04c tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml +++ b/tools/extract/extract_genomic_dna.xml @@ -11,9 +11,9 @@ #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): -1 1,4,5,7 --gff #else: - -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol} + -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol} #end if - + #if $seq_source.index_source == "cached": ## Genomic data from cache. -g ${GALAXY_DATA_INDEX_DIR} @@ -52,8 +52,8 @@ </data></outputs><requirements> + <requirement type="package">ucsc_tools</requirement><requirement type="binary">faToTwoBit</requirement> - <requirement type="package">ucsc_tools</requirement></requirements><tests><test> https://bitbucket.org/galaxy/galaxy-central/commits/9c6732931369/ Changeset: 9c6732931369 Branch: extract_genomic_dna_tool_enhancements User: BjoernGruening Date: 2013-10-24 18:13:18 Summary: bugfix and one test fix Affected #: 3 files diff -r 1ce68c239d0ee59bd469c4520bf754eaa80ac04c -r 9c67329313699a02108fe36dc0cee93fee362c70 test-data/extract_genomic_dna_out2.fasta --- a/test-data/extract_genomic_dna_out2.fasta +++ b/test-data/extract_genomic_dna_out2.fasta @@ -1,6 +1,6 @@ ->droPer1_super_1_139823_139913_- +>droPer1_super_1_139823_139913_- AK028861 CGTCGGCTTCTGCTTCTGCTGATGATGGTCGTTCTTCTTCCTTTACTTCT TCCTATTTTTCTTCCTTCCCTTACACTATATCTTCCTTTA ->droPer1_super_1_156750_156844_- +>droPer1_super_1_156750_156844_- BC126698 CCGGGCTGCGGCAAGGGATTCACCTGCTCCAAACAGCTCAAGGTGCACTC CCGCACGCACACGGGCGAGAAGCCCTATCACTGCGACATCTGCT diff -r 1ce68c239d0ee59bd469c4520bf754eaa80ac04c -r 9c67329313699a02108fe36dc0cee93fee362c70 tools/extract/extract_genomic_dna.py --- a/tools/extract/extract_genomic_dna.py +++ b/tools/extract/extract_genomic_dna.py @@ -142,7 +142,8 @@ if isinstance( feature, ( Header, Comment ) ): line_count += 1 continue - + + name = "" if gff_format and interpret_features: # Processing features. gff_util.convert_gff_coords_to_bed( feature ) @@ -150,7 +151,6 @@ start = feature.start end = feature.end strand = feature.strand - name = "" else: # Processing lines, either interval or GFF format. line = feature.rstrip( '\r\n' ) diff -r 1ce68c239d0ee59bd469c4520bf754eaa80ac04c -r 9c67329313699a02108fe36dc0cee93fee362c70 tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml +++ b/tools/extract/extract_genomic_dna.xml @@ -60,7 +60,7 @@ <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /><param name="interpret_features" value="yes"/><param name="index_source" value="cached"/> - <param name="out_format" value="fasta"/> + <param name="out_format" value="fasta"/><output name="out_file1" file="extract_genomic_dna_out1.fasta" /></test><test> https://bitbucket.org/galaxy/galaxy-central/commits/af3fc6046bd6/ Changeset: af3fc6046bd6 Branch: extract_genomic_dna_tool_enhancements User: jmchilton Date: 2013-10-25 06:03:57 Summary: Update hg17 test case for extract_genomic_dna.xml tor reflect recent changes. Cannot delete or modify previous output file - it is used as input for another tool. Use assertion testing to weaken test case. As this tool is migrated to the tool shed the hg17 test cases should probably be eliminated completely. Affected #: 1 file diff -r 9c67329313699a02108fe36dc0cee93fee362c70 -r af3fc6046bd693fdbf1ec26d119bf387d7a92bc2 tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml +++ b/tools/extract/extract_genomic_dna.xml @@ -61,7 +61,21 @@ <param name="interpret_features" value="yes"/><param name="index_source" value="cached"/><param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out1.fasta" /> + <output name="out_file1"> + <assert_contents> + <!-- First few lines... --> + <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" /> + <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" /> + <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" /> + <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" /> + <!-- Last few lines... --> + <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" /> + <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" /> + <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" /> + <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" /> + <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" /> + </assert_contents> + </output></test><test><param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> https://bitbucket.org/galaxy/galaxy-central/commits/7b014440dda1/ Changeset: 7b014440dda1 Branch: extract_genomic_dna_tool_enhancements User: jmchilton Date: 2013-10-25 06:06:14 Summary: Rev slightly the tool version of extract_genomic_dna.xml since output is slightly different. Update tool help to reflect new output format. Affected #: 1 file diff -r af3fc6046bd693fdbf1ec26d119bf387d7a92bc2 -r 7b014440dda1d9dddb292598fb804510b5274438 tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml +++ b/tools/extract/extract_genomic_dna.xml @@ -1,4 +1,4 @@ -<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.2"> +<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.3"><description>using coordinates from assembled/unassembled genomes</description><command interpreter="python"> extract_genomic_dna.py $input $out_file1 -o $out_format -d $dbkey @@ -166,14 +166,14 @@ Extracting sequences with **FASTA** output data type returns:: - >hg17_chr7_127475281_127475310_+ + >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG - >hg17_chr7_127485994_127486166_+ + >hg17_chr7_127485994_127486166_+ NM_000230 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG GATCAATGACATTTCACACACG - >hg17_chr7_127486011_127486166_+ + >hg17_chr7_127486011_127486166_+ D49487 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC https://bitbucket.org/galaxy/galaxy-central/commits/57e9ebe8ea6b/ Changeset: 57e9ebe8ea6b User: jmchilton Date: 2013-10-25 06:08:48 Summary: Merge pull request #239 changes into default. Affected #: 3 files diff -r d4e60067889e2bd8873010fa0fc887e725a4b695 -r 57e9ebe8ea6ba6686a489560cde62b309d2e5271 test-data/extract_genomic_dna_out2.fasta --- a/test-data/extract_genomic_dna_out2.fasta +++ b/test-data/extract_genomic_dna_out2.fasta @@ -1,6 +1,6 @@ ->droPer1_super_1_139823_139913_- +>droPer1_super_1_139823_139913_- AK028861 CGTCGGCTTCTGCTTCTGCTGATGATGGTCGTTCTTCTTCCTTTACTTCT TCCTATTTTTCTTCCTTCCCTTACACTATATCTTCCTTTA ->droPer1_super_1_156750_156844_- +>droPer1_super_1_156750_156844_- BC126698 CCGGGCTGCGGCAAGGGATTCACCTGCTCCAAACAGCTCAAGGTGCACTC CCGCACGCACACGGGCGAGAAGCCCTATCACTGCGACATCTGCT diff -r d4e60067889e2bd8873010fa0fc887e725a4b695 -r 57e9ebe8ea6ba6686a489560cde62b309d2e5271 tools/extract/extract_genomic_dna.py --- a/tools/extract/extract_genomic_dna.py +++ b/tools/extract/extract_genomic_dna.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ usage: %prog $input $out_file1 - -1, --cols=N,N,N,N: Columns for start, end, strand in input file + -1, --cols=N,N,N,N,N: Columns for start, end, strand in input file -d, --dbkey=N: Genome build of input file -o, --output_format=N: the data type of the output file -g, --GALAXY_DATA_INDEX_DIR=N: the directory containing alignseq.loc @@ -54,7 +54,13 @@ # options, args = doc_optparse.parse( __doc__ ) try: - chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols ) + if len(options.cols.split(',')) == 5: + # BED file + chrom_col, start_col, end_col, strand_col, name_col = parse_cols_arg( options.cols ) + else: + # gff file + chrom_col, start_col, end_col, strand_col = parse_cols_arg( options.cols ) + name_col = False dbkey = options.dbkey output_format = options.output_format gff_format = options.gff @@ -136,7 +142,8 @@ if isinstance( feature, ( Header, Comment ) ): line_count += 1 continue - + + name = "" if gff_format and interpret_features: # Processing features. gff_util.convert_gff_coords_to_bed( feature ) @@ -153,6 +160,8 @@ chrom = fields[chrom_col] start = int( fields[start_col] ) end = int( fields[end_col] ) + if name_col: + name = fields[name_col] if gff_format: start, end = gff_util.convert_gff_coords_to_bed( [start, end] ) if includes_strand_col: @@ -237,13 +246,16 @@ sequence = reverse_complement( sequence ) if output_format == "fasta" : - l = len( sequence ) + l = len( sequence ) c = 0 if gff_format: start, end = gff_util.convert_bed_coords_to_gff( [ start, end ] ) fields = [dbkey, str( chrom ), str( start ), str( end ), strand] meta_data = "_".join( fields ) - fout.write( ">%s\n" % meta_data ) + if name.strip(): + fout.write( ">%s %s\n" % (meta_data, name) ) + else: + fout.write( ">%s\n" % meta_data ) while c < l: b = min( c + 50, l ) fout.write( "%s\n" % str( sequence[c:b] ) ) diff -r d4e60067889e2bd8873010fa0fc887e725a4b695 -r 57e9ebe8ea6ba6686a489560cde62b309d2e5271 tools/extract/extract_genomic_dna.xml --- a/tools/extract/extract_genomic_dna.xml +++ b/tools/extract/extract_genomic_dna.xml @@ -1,4 +1,4 @@ -<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.2"> +<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="2.2.3"><description>using coordinates from assembled/unassembled genomes</description><command interpreter="python"> extract_genomic_dna.py $input $out_file1 -o $out_format -d $dbkey @@ -11,9 +11,9 @@ #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): -1 1,4,5,7 --gff #else: - -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol} + -1 ${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol} #end if - + #if $seq_source.index_source == "cached": ## Genomic data from cache. -g ${GALAXY_DATA_INDEX_DIR} @@ -52,16 +52,30 @@ </data></outputs><requirements> + <requirement type="package">ucsc_tools</requirement><requirement type="binary">faToTwoBit</requirement> - <requirement type="package">ucsc_tools</requirement></requirements><tests><test><param name="input" value="1.bed" dbkey="hg17" ftype="bed" /><param name="interpret_features" value="yes"/><param name="index_source" value="cached"/> - <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out1.fasta" /> + <param name="out_format" value="fasta"/> + <output name="out_file1"> + <assert_contents> + <!-- First few lines... --> + <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" /> + <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" /> + <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" /> + <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" /> + <!-- Last few lines... --> + <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" /> + <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" /> + <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" /> + <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" /> + <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" /> + </assert_contents> + </output></test><test><param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> @@ -152,14 +166,14 @@ Extracting sequences with **FASTA** output data type returns:: - >hg17_chr7_127475281_127475310_+ + >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG - >hg17_chr7_127485994_127486166_+ + >hg17_chr7_127485994_127486166_+ NM_000230 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG GATCAATGACATTTCACACACG - >hg17_chr7_127486011_127486166_+ + >hg17_chr7_127486011_127486166_+ D49487 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC https://bitbucket.org/galaxy/galaxy-central/commits/7286338fd77e/ Changeset: 7286338fd77e Branch: extract_genomic_dna_tool_enhancements User: jmchilton Date: 2013-10-25 06:09:25 Summary: Close branch extract_genomic_dna_tool_enhancements. Affected #: 0 files Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.