[hg] galaxy 2880: Modified Solid_to_Fastq's underlying perl scri...
details: http://www.bx.psu.edu/hg/galaxy/rev/738085dcc542 changeset: 2880:738085dcc542 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Tue Oct 13 16:49:38 2009 -0400 description: Modified Solid_to_Fastq's underlying perl script so that it would not produce the None file, and also uncommented tests and updated example 3 file(s) affected in this change: tools/next_gen_conversion/bwa_solid2fastq_modified.pl tools/next_gen_conversion/solid_to_fastq.py tools/next_gen_conversion/solid_to_fastq.xml diffs (169 lines): diff -r 39c1d12c2e7d -r 738085dcc542 tools/next_gen_conversion/bwa_solid2fastq_modified.pl --- a/tools/next_gen_conversion/bwa_solid2fastq_modified.pl Tue Oct 13 16:37:20 2009 -0400 +++ b/tools/next_gen_conversion/bwa_solid2fastq_modified.pl Tue Oct 13 16:49:38 2009 -0400 @@ -8,9 +8,9 @@ use Getopt::Std; my %opts; -my $version = '0.1.2'; +my $version = '0.1.3'; my $usage = qq{ -Usage: solid2fastq.pl <paired> <outfile1> <outfile2> <outfile3> <F3.csfasta> <F3.qual> <R3.csfasta> <R3.qual> +Usage: solid2fastq.pl <paired> <outfile1> <outfile2> <F3.csfasta> <F3.qual> <R3.csfasta> <R3.qual> Note: <in.title> is the string showed in the `# Title:' line of a ".csfasta" read file. Then <in.title>F3.csfasta is read sequence @@ -25,13 +25,11 @@ }; getopts('', \%opts); -die($usage) if (@ARGV != 8); -my ($is_paired,$outfile1,$outfile2,$outfile3,$f3reads,$f3qual,$r3reads,$r3qual) = @ARGV; +die($usage) if (@ARGV != 7); +my ($is_paired,$outfile1,$outfile2,$f3reads,$f3qual,$r3reads,$r3qual) = @ARGV; my (@fhr, @fhw); my $fn = ''; my @fn_suff = ($f3reads,$f3qual,$r3reads,$r3qual); -#my @fn_suff = ('F3.csfasta', 'F3_QV.qual', 'R3.csfasta', 'R3_QV.qual'); -#my $is_paired = (-f "$title$fn_suff[2]" || -f "$title$fn_suff[2].gz")? 1 : 0; if ($is_paired eq "yes") { # paired end for (0 .. 3) { $fn = $fn_suff[$_]; @@ -40,33 +38,12 @@ } open($fhw[0], "|gzip >$outfile2") || die; open($fhw[1], "|gzip >$outfile1") || die; - open($fhw[2], "|gzip >$outfile3") || die; my (@df, @dr); @df = &read1(1); @dr = &read1(2); while (@df && @dr) { if ($df[0] eq $dr[0]) { # mate pair print {$fhw[0]} $df[1]; print {$fhw[1]} $dr[1]; @df = &read1(1); @dr = &read1(2); - } else { - if ($df[0] le $dr[0]) { - print {$fhw[2]} $df[1]; - @df = &read1(1); - } else { - print {$fhw[2]} $dr[1]; - @dr = &read1(2); - } - } - } - if (@df) { - print {$fhw[2]} $df[1]; - while (@df = &read1(1, $fhr[0], $fhr[1])) { - print {$fhw[2]} $df[1]; - } - } - if (@dr) { - print {$fhw[2]} $dr[1]; - while (@dr = &read1(2, $fhr[2], $fhr[3])) { - print {$fhw[2]} $dr[1]; } } close($fhr[$_]) for (0 .. $#fhr); @@ -95,7 +72,7 @@ my $t = <$fhq>; if (/^>(\d+)_(\d+)_(\d+)_[FR]3/) { $key = sprintf("%.4d_%.4d_%.4d", $1, $2, $3); # this line could be improved on 64-bit machines - #print $key; + #print $key; die(qq/** unmatched read name: '$_' != '$_'\n/) unless ($_ eq $t); my $name = "$1_$2_$3/$i"; $_ = substr(<$fhs>, 2); @@ -106,7 +83,7 @@ s/(\d+)\s*/chr($1+33)/eg; $seq = qq/\@$name\n$s+\n$_\n/; last; - } + } } return defined($seq)? ($key, $seq) : (); } diff -r 39c1d12c2e7d -r 738085dcc542 tools/next_gen_conversion/solid_to_fastq.py --- a/tools/next_gen_conversion/solid_to_fastq.py Tue Oct 13 16:37:20 2009 -0400 +++ b/tools/next_gen_conversion/solid_to_fastq.py Tue Oct 13 16:49:38 2009 -0400 @@ -30,7 +30,7 @@ tmpf = tempfile.NamedTemporaryFile() #forward reads if options.input3 != "None" and options.input4 != "None": tmpr = tempfile.NamedTemporaryFile() #reverse reads - cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s %s 2>&1" %(os.path.split(sys.argv[0])[0], tmpf.name,tmpr.name,None,options.input1,options.input2,options.input3,options.input4) + cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s 2>&1" %(os.path.split(sys.argv[0])[0], tmpf.name,tmpr.name,options.input1,options.input2,options.input3,options.input4) try: os.system(cmd1) os.system('gunzip -c %s >> %s' %(tmpf.name,options.output1)) @@ -40,7 +40,7 @@ tmpr.close() # if single-end data else: - cmd1 = "%s/bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, None, None, options.input1, options.input2, None, None) + cmd1 = "%s/bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, None, options.input1, options.input2, None, None) try: os.system(cmd1) os.system('gunzip -c %s >> %s' % (tmpf.name, options.output1)) diff -r 39c1d12c2e7d -r 738085dcc542 tools/next_gen_conversion/solid_to_fastq.xml --- a/tools/next_gen_conversion/solid_to_fastq.xml Tue Oct 13 16:37:20 2009 -0400 +++ b/tools/next_gen_conversion/solid_to_fastq.xml Tue Oct 13 16:49:38 2009 -0400 @@ -44,15 +44,12 @@ </data> </outputs> <tests> -<!-- <test> <param name="pairedSingle" value="single" /> <param name="input1" value="s2fq_phiX.csfasta" ftype="csfasta" /> <param name="input2" value="s2fq_phiX.qualsolid" ftype="qualsolid" /> <output name="output1" file="s2fq_out1.fastqsanger" /> </test> ---> - <!-- testing framework does not deal with multiple outputs yet <test> <param name="pairedSingle" value="paired" /> <param name="input1" value="s2fq_paired_F3.csfasta" ftype="csfasta" /> @@ -60,9 +57,10 @@ <param name="input3" value="s2fq_paired_R3.csfasta" ftype="csfasta" /> <param name="input4" value="s2fq_paired_R3_QV.qualsolid" ftype="qualsolid" /> <output name="output1" file="s2fq_out2.fastqsanger" /> + <!-- testing framework does not deal with multiple outputs yet <output name="output2" file="s2fq_out3.fastqsanger" /> + --> </test> - --> </tests> <help> @@ -76,25 +74,25 @@ - Converting the following sequences:: - >seq1 + >1831_573_1004_F3 T00030133312212111300011021310132222 - >seq2 + >1831_573_1567_F3 T03330322230322112131010221102122113 - and quality scores:: - >seq1 - 4 29 34 34 32 32 24 24 20 17 10 34 29 20 34 13 30 34 22 24 11 28 19 17 34 17 24 17 25 34 7 24 14 12 22 - >seq2 - 8 26 31 31 16 22 30 31 28 29 22 30 30 31 32 23 30 28 28 31 19 32 30 32 19 8 32 10 13 6 32 10 6 16 11 + >1831_573_1004_F3 + 4 29 34 34 32 32 24 24 20 17 10 34 29 20 34 13 30 34 22 24 11 28 19 17 34 17 24 17 25 34 7 24 14 12 22 + >1831_573_1567_F3 + 8 26 31 31 16 22 30 31 28 29 22 30 30 31 32 23 30 28 28 31 19 32 30 32 19 8 32 10 13 6 32 10 6 16 11 - will produce the following Sanger FASTQ data:: - @seq1 + @1831_573_1004/1 AATACTTTCGGCGCCCTAAACCAGCTCACTGGGG + >CCAA9952+C>5C.?C79,=42C292:C(9/-7 - @seq2 + @1831_573_1567/1 TTTATGGGTATGGCCGCTCACAGGCCAGCGGCCT + ;@@17?@=>7??@A8?==@4A?A4)A+.'A+'1,
participants (1)
-
Greg Von Kuster