details: http://www.bx.psu.edu/hg/galaxy/rev/bc2e61c2dac1 changeset: 3559:bc2e61c2dac1 user: Greg Von Kuster <greg@bx.psu.edu> date: Tue Mar 23 15:43:58 2010 -0400 description: Add the add_scores tool. diffstat: test-data/add_scores_input1.interval | 20 +++++++++++ test-data/add_scores_input2.bed | 25 ++++++++++++++ test-data/add_scores_output1.interval | 20 +++++++++++ test-data/add_scores_output2.interval | 25 ++++++++++++++ tool-data/add_scores.loc.sample | 21 ++++++++++++ tool_conf.xml.sample | 1 + tools/evolution/add_scores.xml | 60 +++++++++++++++++++++++++++++++++++ 7 files changed, 172 insertions(+), 0 deletions(-) diffs (206 lines): diff -r dfaa18960944 -r bc2e61c2dac1 test-data/add_scores_input1.interval --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_scores_input1.interval Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,20 @@ +chr20 74149 74150 G G +chr22 14642921 14642922 C A +chr20 74148 74149 T C +chr22 15452519 15452520 C C +chr22 15472687 15472688 C N +chr22 15508088 15508089 G G +chr22 15534573 15534574 C C +chr20 24770 24771 C C +chr20 24961 24962 T T +chr22 15451993 15451994 C C +chr22 14667850 14667851 G G +chr22 15452482 15452483 A G +chr20 71317 71318 T T +chr20 74223 74224 A A +chr22 15453065 15453066 A G +chr20 74284 74285 T T +chr20 74309 74310 A A +chr22 15472610 15472611 G N +chr20 86193 86194 C C +chr20 87418 87419 C C diff -r dfaa18960944 -r bc2e61c2dac1 test-data/add_scores_input2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_scores_input2.bed Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,25 @@ +chr1 90000 90001 A +chr2 90000 90001 A +chr3 90000 90001 A +chr4 90000 90001 A +chr5 90000 90001 A +chr6 90000 90001 A +chr7 90000 90001 A +chr8 90000 90001 A +chr9 90000 90001 A +chr10 90000 90001 A +chr11 90000 90001 A +chr12 90000 90001 A +chr13 90000 90001 A +chr14 90000 90001 A +chr15 90000 90001 A +chr16 90000 90001 A +chr17 90000 90001 A +chr18 90000 90001 A +chr19 90000 90001 A +chr20 90000 90001 A +chr21 90000 90001 A +chr22 90000 90001 A +chrX 90000 90001 A +chrY 90000 90001 A +chrM 9000 9001 A diff -r dfaa18960944 -r bc2e61c2dac1 test-data/add_scores_output1.interval --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_scores_output1.interval Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,20 @@ +chr20 74149 74150 G G 0.885 +chr22 14642921 14642922 C A -1.844 +chr20 74148 74149 T C -3.161 +chr22 15452519 15452520 C C -1.138 +chr22 15472687 15472688 C N NA +chr22 15508088 15508089 G G -1.398 +chr22 15534573 15534574 C C 0.460 +chr20 24770 24771 C C -1.374 +chr20 24961 24962 T T -1.599 +chr22 15451993 15451994 C C 0.645 +chr22 14667850 14667851 G G 0.469 +chr22 15452482 15452483 A G -1.246 +chr20 71317 71318 T T -0.825 +chr20 74223 74224 A A -1.451 +chr22 15453065 15453066 A G -0.776 +chr20 74284 74285 T T -0.701 +chr20 74309 74310 A A -0.863 +chr22 15472610 15472611 G N NA +chr20 86193 86194 C C 0.887 +chr20 87418 87419 C C -1.703 diff -r dfaa18960944 -r bc2e61c2dac1 test-data/add_scores_output2.interval --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_scores_output2.interval Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,25 @@ +chr1 90000 90001 A 0.431 +chr2 90000 90001 A 0.514 +chr3 90000 90001 A 0.808 +chr4 90000 90001 A 0.456 +chr5 90000 90001 A 0.446 +chr6 90000 90001 A 0.397 +chr7 90000 90001 A 0.446 +chr8 90000 90001 A NA +chr9 90000 90001 A 0.470 +chr10 90000 90001 A 0.463 +chr11 90000 90001 A 0.369 +chr12 90000 90001 A 0.557 +chr13 90000 90001 A NA +chr14 90000 90001 A NA +chr15 90000 90001 A NA +chr16 90000 90001 A 0.819 +chr17 90000 90001 A -0.993 +chr18 90000 90001 A 0.657 +chr19 90000 90001 A 0.372 +chr20 90000 90001 A 0.360 +chr21 90000 90001 A NA +chr22 90000 90001 A NA +chrX 90000 90001 A 0.414 +chrY 90000 90001 A 0.414 +chrM 9000 9001 A -0.587 diff -r dfaa18960944 -r bc2e61c2dac1 tool-data/add_scores.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/add_scores.loc.sample Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,21 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of gzipped genome files for use with add_scores. You will +#need to supply these files and then create a add_scores.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The add_scores.loc +#file has this format (white space characters are TAB characters): +# +#<build> <file_path> +# +#So, for example, if your add_scores.loc began like this: +# +#hg18 /afs/bx.psu.edu/depot/data/genome/hg18/misc/phyloP/ +# +#then your /afs/bx.psu.edu/depot/data/genome/hg18/misc/phyloP/ directory +#would need to contain the following gzipped files, among others: +# +#-rw-r--r-- 1 rico rico 161981190 2010-03-19 12:48 chr10.phyloP44way.primate.wigFix.gz +#-rw-r--r-- 1 rico rico 54091 2010-03-19 12:56 chr10_random.phyloP44way.primate.wigFix.gz +#-rw-r--r-- 1 rico rico 158621990 2010-03-19 12:46 chr11.phyloP44way.primate.wigFix.gz +# +hg18 /galaxy/data/hg18/misc/phyloP diff -r dfaa18960944 -r bc2e61c2dac1 tool_conf.xml.sample --- a/tool_conf.xml.sample Tue Mar 23 13:53:21 2010 -0400 +++ b/tool_conf.xml.sample Tue Mar 23 15:43:58 2010 -0400 @@ -162,6 +162,7 @@ <tool file="hyphy/hyphy_dnds_wrapper.xml" /> <tool file="evolution/mutate_snp_codon.xml" /> <tool file="evolution/codingSnps.xml" /> + <tool file="evolution/add_scores.xml" /> </section> <section name="Metagenomic analyses" id="tax_manipulation"> <tool file="taxonomy/gi2taxonomy.xml" /> diff -r dfaa18960944 -r bc2e61c2dac1 tools/evolution/add_scores.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/evolution/add_scores.xml Tue Mar 23 15:43:58 2010 -0400 @@ -0,0 +1,60 @@ +<tool id="add_scores" name="Add scores"> + <description>for interspecies conservation at each SNPs</description> + <command> + add_scores $input1 ${input1.metadata.dbkey} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${GALAXY_DATA_INDEX_DIR}/add_scores.loc $out_file1 + </command> + <inputs> + <param format="interval" name="input1" type="data" label="SNPs"/> + </inputs> + <outputs> + <data format="input" name="out_file1" /> + </outputs> + <tests> + <test> + <param name="input1" value="add_scores_input1.interval" dbkey="hg18" /> + <output name="output" file="add_scores_output1.interval" /> + </test> + <test> + <param name="input1" value="add_scores_input2.bed" dbkey="hg18" /> + <output name="output" file="add_scores_output2.interval" /> + </test> + </tests> + + <help> +This tool adds a column that measures interspecies conservation at each SNP position, using conservation scores for primates computed by the phyloP program. It currently works only for hg18. + +**Example** + +- input file, with SNPs:: + + chr22 16440426 14440427 C/T + chr22 15494851 14494852 A/G + chr22 14494911 14494912 A/T + chr22 14550435 14550436 A/G + chr22 14611956 14611957 G/T + chr22 14612076 14612077 A/G + chr22 14668537 14668538 C + chr22 14668703 14668704 A/T + chr22 14668775 14668776 G + chr22 14680074 14680075 A/T + etc. + +- output file, showing non-synonymous substitutions in coding regions:: + + chr22 16440426 14440427 C/T 0.509 + chr22 15494851 14494852 A/G 0.427 + chr22 14494911 14494912 A/T NA + chr22 14550435 14550436 A/G NA + chr22 14611956 14611957 G/T -2.142 + chr22 14612076 14612077 A/G 0.369 + chr22 14668537 14668538 C 0.419 + chr22 14668703 14668704 A/T -1.462 + chr22 14668775 14668776 G 0.470 + chr22 14680074 14680075 A/T 0.000 + chr22 14680074 14680075 A/T 0.303 + etc. + +"NA", means that the phyloP score was not available. + +</help> +</tool>