commit/galaxy-central: richard_burhans: changes to aaChanges tool
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/fa40b8f3f385/ changeset: fa40b8f3f385 user: richard_burhans date: 2012-03-22 18:19:31 summary: changes to aaChanges tool affected #: 2 files diff -r 1f6c7be22ba9059d8fd8e05148466bb005a7fbd1 -r fa40b8f3f385f67d57294d98a036e483775d873f tools/evolution/codingSnps.pl --- a/tools/evolution/codingSnps.pl +++ b/tools/evolution/codingSnps.pl @@ -7,11 +7,15 @@ # and a gene bed file with cds start and stop. # It then checks for changes in coding regions, reporting # those that cause a frameshift or substitution in the amino acid. +# Output columns: +# chrom, start, end, allele as given (amb code translated) +# Gene ID from genes file, ref amino acid:variant amino acids, +# codon number, (in strand of gene)ref nt, refCodon:variantCodons ######################################################################### my $seqFlag = "2bit"; #flag to set sequence type 2bit|nib if (!@ARGV or scalar @ARGV < 3) { - print "Usage: codingSnps.pl snps.bed genes.bed (/dir/*$seqFlag|Galaxy build= loc=) [chr=# start=# end=# snp=# keepColumns=1] > codingSnps.txt\n"; + print "Usage: codingSnps.pl snps.bed genes.bed (/dir/*$seqFlag|Galaxy build= loc=) [chr=# start=# end=# snp=# strand=#|-|+ keepColumns=1 synon=1 unique=1] > codingSnps.txt\n"; exit; } my $uniq = 0; #flag for whether want uniq positions @@ -25,6 +29,7 @@ my $col1 = 1; my $col2 = 2; my $col3 = 3; +my $strand = -1; #column positions 1 based coming in (for Galaxy) foreach (@ARGV) { if (/chr=(\d+)/) { $col0 = $1 -1; } @@ -32,6 +37,10 @@ elsif (/end=(\d+)/) { $col2 = $1 -1; } elsif (/snp=(\d+)/) { $col3 = $1 -1; } elsif (/keepColumns=1/) { $keep = 1; } + elsif (/synon=1/) { $syn = 1; } + elsif (/unique=1/) { $uniq = 1; } + elsif (/strand=(\d+)/) { $strand = $1 -1; } #0 based column + elsif (/strand=-/) { $strand = -99; } #special case of all minus } if ($col0 < 0 || $col1 < 0 || $col2 < 0 || $col3 < 0) { print STDERR "ERROR column numbers are given with origin 1\n"; @@ -42,6 +51,7 @@ my %codon; #hash of codon amino acid conversions my $ends = 0; #ends vs sizes in bed 11 position, starts relative to chrom my $ignoreN = 1; #skip N +my $origAll; #alleles from input file (before changes for strand) my %amb = ( "R" => "A/G", @@ -90,6 +100,10 @@ print STDERR "ERROR file has fewer columns than requested, requested columns (0 based) $col0 $col1 $col2 $col3, file has $size\n"; exit 1; } + if ($strand >= 0 && $strand > $size) { + print STDERR "ERROR file has fewer columns than requested, requested strand in $strand (0 based), file has $size\n"; + exit 1; + } if ($s[$col1] =~ /\D/) { print STDERR "ERROR the start point must be an integer not $s[$col1]\n"; exit 1; @@ -100,6 +114,11 @@ } if ($s[$col3] eq 'N' && $ignoreN) { next; } if (exists $amb{$s[$col3]}) { $s[$col3] = $amb{$s[$col3]}; } + if (($strand >= 0 && $s[$strand] eq '-') or $strand == -99) { + #reverse complement nts + $origAll = $s[$col3]; + $s[$col3] = reverseCompAlleles($s[$col3]); + }else { undef $origAll } if (!@g && exists $chrSt{$s[$col0]}) { #need to fetch first gene row $i = $chrSt{$s[$col0]}; @g = split(/\t/, $genes[$i]); @@ -287,36 +306,45 @@ } #now compute amino acids my $oldaa = getaa($oldnts); + my $codon = "$oldnts:"; my @newaa; my $change = 0; #flag for if there is a change foreach my $v (@newnts) { my $t = getaa($v); if ($t ne $oldaa) { $change = 1; } - push(@newaa, $t); + push(@newaa, "$t"); + $codon .= "$v/"; } + $codon =~ s/\/$//; if (!$change && $syn) { if (!$keep) { print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]"; - print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\n"; + print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\t$codon\n"; }else { my @s = @{$sref}; print join("\t", @s), - "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\n"; + "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\t$codon\n"; } + $done{"$sref->[$col0] $sref->[$col1] $sref->[$col2]"}++; return; }elsif ($syn) { return; } #only want synonymous changes if (!$change) { return; } #no change in amino acids if (!$keep) { - print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$sref->[$col3]"; - if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref + my $a = $sref->[$col3]; + if (($strand >= 0 && $origAll) or $strand == -99) { $a = $origAll; } + print "$sref->[$col0]\t$sref->[$col1]\t$sref->[$col2]\t$a"; + #my $minus = $changed; #in case minus strand and change back + #if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref if (!$changed) { return; } #skip this one - print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\n"; + print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\t$codon\n"; }else { my @s = @{$sref}; + if (($strand >= 0 && $origAll) or $strand == -99) { $s[$col3] = $origAll; } print join("\t", @s); - if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref + #my $minus = $changed; #in case minus strand and change back + #if ($gref->[5] eq '-') { $changed = compl($changed); } #use plus for ref if (!$changed) { return; } #skip this one - print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\n"; + print "\t$gref->[3]\t$oldaa:", join("/", @newaa), "\t$cdNum\t$changed\t$codon\n"; } $done{"$sref->[$col0] $sref->[$col1] $sref->[$col2]"}++; } @@ -418,6 +446,19 @@ return $comp; } +sub reverseCompAlleles { + my $all = shift; + my @nt = split(/\//, $all); + my $rv = ''; + foreach my $n (@nt) { + $n = reverse(split(/ */, $n)); #needed for indels + $n = compl($n); + $rv .= "$n/"; + } + $rv =~ s/\/$//; + return $rv; +} + sub getaa { my $nts = shift; #in multiples of 3 my $aa = ''; diff -r 1f6c7be22ba9059d8fd8e05148466bb005a7fbd1 -r fa40b8f3f385f67d57294d98a036e483775d873f tools/evolution/codingSnps.xml --- a/tools/evolution/codingSnps.xml +++ b/tools/evolution/codingSnps.xml @@ -2,7 +2,7 @@ <description>amino-acid changes caused by a set of SNPs</description><command interpreter="perl"> - codingSnps.pl $input1 $input2 Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 keepColumns=$keep > $out_file1 + codingSnps.pl $input1 $input2 Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 keepColumns=$keep strand=${strand_source.strand_col} > $out_file1 </command><inputs> @@ -17,6 +17,22 @@ <option value="0" selected="true">No</option><option value="1">Yes</option></param> + <conditional name="strand_source"> + <param name="strand_choice" type="select" label="Strand info"> + <option value="data_column">a column in the dataset</option> + <option value="all_pos" selected="true">all on sense/forward/+ strand</option> + <option value="all_neg">all on antisense/reverse/- strand</option> + </param> + <when value="data_column"> + <param name="strand_col" type="data_column" data_ref="input1" label="Column with strand"/> + </when> + <when value="all_pos"> + <param name="strand_col" type="hidden" value="+"/> + </when> + <when value="all_neg"> + <param name="strand_col" type="hidden" value="-"/> + </when> + </conditional></inputs><outputs> @@ -36,14 +52,26 @@ <param name="input1" ftype="interval" value="codingSnps_input1.interval" dbkey="hg18" /><param name="col1" value="6" /><param name="input2" ftype="interval" value="codingSnps_inputGenes1.bed" dbkey="hg18" /> + <param name="strand_choice" value="all_pos" /> + <param name="strand_col" value="+" /><output name="output" file="codingSnps_output1.interval" /></test><test><param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" /><param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" /><param name="col1" value="4" /> + <param name="strand_choice" value="all_pos" /> + <param name="strand_col" value="+" /><output name="output" file="codingSnps_output2.interval" /></test> + <test> + <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" /> + <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" /> + <param name="col1" value="4" /> + <param name="strand_choice" value="all_neg" /> + <param name="strand_col" value="-" /> + <output name="output" file="codingSnps_output3.interval" /> + </test></tests><help> @@ -79,8 +107,9 @@ the first 12 columns standard BED columns. The output is the same as the first input file with several columns added: the name field from the line of the gene input file -used, the amino acids, the codon number, and the reference nucleotide that -changed in the amino acid. +used, the amino acids, the codon number, the reference nucleotide that +changed in the amino acid (in the same strand as the gene), and the codons +that go with the amino acids. The amino acids are listed with the reference amino acid first, then a colon, and then the amino acids for the alleles. If a SNP is not in a coding region or is synonymous then it is not included in the output file. @@ -128,15 +157,15 @@ - output file, showing non-synonymous substitutions in coding regions:: - chr22 15825725 15825726 G/T uc002zlw.1 Gln:Pro/Gln 469 T - chr22 15827035 15827036 G uc002zlw.1 Glu:Asp 414 C - chr22 15827135 15827136 C/G uc002zlw.1 Gly:Gly/Ala 381 C - chr22 15830928 15830929 A/G uc002zlw.1 Ala:Ser/Pro 281 C - chr22 15830951 15830952 G uc002zlw.1 Leu:Pro 273 A - chr22 15830955 15830956 C/T uc002zlw.1 Ser:Gly/Ser 272 T - chr22 15848885 15848886 C/T uc002zlw.1 Ser:Trp/Stop 217 G - chr22 15848885 15848886 C/T uc010gqs.1 Ser:Trp/Stop 200 G - chr22 15849048 15849049 A/C uc002zlw.1 Gly:Stop/Gly 163 C + chr22 15825725 15825726 G/T uc002zlw.1 Gln:Pro/Gln 469 A CAA:CCA/CAA + chr22 15827035 15827036 G uc002zlw.1 Glu:Asp 414 G GAG:GAC + chr22 15827135 15827136 C/G uc002zlw.1 Gly:Gly/Ala 381 G GGT:GGT/GCT + chr22 15830928 15830929 A/G uc002zlw.1 Ala:Ser/Pro 281 G GCA:TCA/CCA + chr22 15830951 15830952 G uc002zlw.1 Leu:Pro 273 T CTT:CCT + chr22 15830955 15830956 C/T uc002zlw.1 Ser:Gly/Ser 272 A AGC:GGC/AGC + chr22 15848885 15848886 C/T uc002zlw.1 Ser:Trp/Stop 217 C TCG:TGG/TAG + chr22 15848885 15848886 C/T uc010gqs.1 Ser:Trp/Stop 200 C TCG:TGG/TAG + chr22 15849048 15849049 A/C uc002zlw.1 Gly:Stop/Gly 163 G GGA:TGA/GGA etc. </help> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket