1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/changeset/724bbcc69c92/ changeset: 724bbcc69c92 user: jgoecks date: 2012-02-02 04:36:17 summary: Add cuffmerge tool wrapper and functional test. affected #: 5 files
diff -r 0f7670a67901587a939a93047b6b58bdb93f3516 -r 724bbcc69c921ebe53b780d04f2699178201941e test-data/cuffmerge_out1.gtf --- /dev/null +++ b/test-data/cuffmerge_out1.gtf @@ -0,0 +1,74 @@ +chr1 Cufflinks exon 4797974 4798063 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "1"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4798536 4798567 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "2"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4818665 4818730 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "3"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4820349 4820396 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "4"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4822392 4822462 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "5"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4827082 4827155 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "6"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4829468 4829569 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "7"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4831037 4831213 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "8"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4835044 4836816 . + . gene_id "XLOC_000001"; transcript_id "TCONS_00000001"; exon_number "9"; gene_name "Lypla1"; oId "Lypla1"; nearest_ref "Lypla1"; class_code "="; tss_id "TSS1"; +chr1 Cufflinks exon 4847775 4848057 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000002"; exon_number "1"; gene_name "Tcea1"; oId "Tcea1_dup1"; contained_in "TCONS_00000003"; nearest_ref "Tcea1_dup1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4857551 4857613 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000002"; exon_number "2"; gene_name "Tcea1"; oId "Tcea1_dup1"; contained_in "TCONS_00000003"; nearest_ref "Tcea1_dup1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4847775 4848057 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "1"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4857551 4857613 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "2"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4868108 4868213 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "3"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4876825 4876912 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "4"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4879538 4879683 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "5"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4880821 4880877 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "6"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4881996 4882150 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "7"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4883498 4883644 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "8"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4885015 4885086 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "9"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 4886437 4887987 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000003"; exon_number "10"; gene_name "Tcea1"; oId "Tcea1"; nearest_ref "Tcea1"; class_code "="; tss_id "TSS2"; +chr1 Cufflinks exon 3204563 3207049 . - . gene_id "XLOC_000003"; transcript_id "TCONS_00000004"; exon_number "1"; gene_name "Xkr4"; oId "Xkr4"; nearest_ref "Xkr4"; class_code "="; tss_id "TSS3"; +chr1 Cufflinks exon 3411783 3411982 . - . gene_id "XLOC_000003"; transcript_id "TCONS_00000004"; exon_number "2"; gene_name "Xkr4"; oId "Xkr4"; nearest_ref "Xkr4"; class_code "="; tss_id "TSS3"; +chr1 Cufflinks exon 3660633 3661579 . - . gene_id "XLOC_000003"; transcript_id "TCONS_00000004"; exon_number "3"; gene_name "Xkr4"; oId "Xkr4"; nearest_ref "Xkr4"; class_code "="; tss_id "TSS3"; +chr1 Cufflinks exon 4334224 4340172 . - . gene_id "XLOC_000004"; transcript_id "TCONS_00000005"; exon_number "1"; gene_name "Rp1"; oId "Rp1"; nearest_ref "Rp1"; class_code "="; tss_id "TSS4"; +chr1 Cufflinks exon 4341991 4342162 . - . gene_id "XLOC_000004"; transcript_id "TCONS_00000005"; exon_number "2"; gene_name "Rp1"; oId "Rp1"; nearest_ref "Rp1"; class_code "="; tss_id "TSS4"; +chr1 Cufflinks exon 4342283 4342918 . - . gene_id "XLOC_000004"; transcript_id "TCONS_00000005"; exon_number "3"; gene_name "Rp1"; oId "Rp1"; nearest_ref "Rp1"; class_code "="; tss_id "TSS4"; +chr1 Cufflinks exon 4350281 4350473 . - . gene_id "XLOC_000004"; transcript_id "TCONS_00000005"; exon_number "4"; gene_name "Rp1"; oId "Rp1"; nearest_ref "Rp1"; class_code "="; tss_id "TSS4"; +chr1 Cufflinks exon 4481009 4482749 . - . gene_id "XLOC_000005"; transcript_id "TCONS_00000006"; exon_number "1"; gene_name "Sox17"; oId "Sox17"; nearest_ref "Sox17"; class_code "="; tss_id "TSS5"; +chr1 Cufflinks exon 4483181 4483547 . - . gene_id "XLOC_000005"; transcript_id "TCONS_00000006"; exon_number "2"; gene_name "Sox17"; oId "Sox17"; nearest_ref "Sox17"; class_code "="; tss_id "TSS5"; +chr1 Cufflinks exon 4483853 4483944 . - . gene_id "XLOC_000005"; transcript_id "TCONS_00000006"; exon_number "3"; gene_name "Sox17"; oId "Sox17"; nearest_ref "Sox17"; class_code "="; tss_id "TSS5"; +chr1 Cufflinks exon 4485217 4486023 . - . gene_id "XLOC_000005"; transcript_id "TCONS_00000006"; exon_number "4"; gene_name "Sox17"; oId "Sox17"; nearest_ref "Sox17"; class_code "="; tss_id "TSS5"; +chr1 Cufflinks exon 4486372 4486494 . - . gene_id "XLOC_000005"; transcript_id "TCONS_00000006"; exon_number "5"; gene_name "Sox17"; oId "Sox17"; nearest_ref "Sox17"; class_code "="; tss_id "TSS5"; +chr1 Cufflinks exon 4763279 4764597 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000009"; exon_number "1"; gene_name "Mrpl15"; oId "Mrpl15_dup2"; nearest_ref "Mrpl15_dup2"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4767606 4767729 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000009"; exon_number "2"; gene_name "Mrpl15"; oId "Mrpl15_dup2"; nearest_ref "Mrpl15_dup2"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4772649 4772814 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000009"; exon_number "3"; gene_name "Mrpl15"; oId "Mrpl15_dup2"; nearest_ref "Mrpl15_dup2"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4775654 4775807 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000009"; exon_number "4"; gene_name "Mrpl15"; oId "Mrpl15_dup2"; nearest_ref "Mrpl15_dup2"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4763279 4764597 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000008"; exon_number "1"; gene_name "Mrpl15"; oId "Mrpl15_dup1"; nearest_ref "Mrpl15_dup1"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4767606 4767729 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000008"; exon_number "2"; gene_name "Mrpl15"; oId "Mrpl15_dup1"; nearest_ref "Mrpl15_dup1"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4772649 4772814 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000008"; exon_number "3"; gene_name "Mrpl15"; oId "Mrpl15_dup1"; nearest_ref "Mrpl15_dup1"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4774032 4774186 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000008"; exon_number "4"; gene_name "Mrpl15"; oId "Mrpl15_dup1"; nearest_ref "Mrpl15_dup1"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4775654 4775807 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000008"; exon_number "5"; gene_name "Mrpl15"; oId "Mrpl15_dup1"; nearest_ref "Mrpl15_dup1"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4763279 4766882 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000007"; exon_number "1"; gene_name "Mrpl15"; oId "Mrpl15"; nearest_ref "Mrpl15"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4767606 4767729 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000007"; exon_number "2"; gene_name "Mrpl15"; oId "Mrpl15"; nearest_ref "Mrpl15"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4772649 4772814 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000007"; exon_number "3"; gene_name "Mrpl15"; oId "Mrpl15"; nearest_ref "Mrpl15"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4774032 4774186 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000007"; exon_number "4"; gene_name "Mrpl15"; oId "Mrpl15"; nearest_ref "Mrpl15"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 4775654 4775807 . - . gene_id "XLOC_000006"; transcript_id "TCONS_00000007"; exon_number "5"; gene_name "Mrpl15"; oId "Mrpl15"; nearest_ref "Mrpl15"; class_code "="; tss_id "TSS6"; +chr1 Cufflinks exon 3111450 3111490 . . . gene_id "XLOC_000007"; transcript_id "TCONS_00000010"; exon_number "1"; oId "CUFF.1.1"; class_code "u"; tss_id "TSS7"; +chr1 Cufflinks exon 3111546 3111576 . . . gene_id "XLOC_000008"; transcript_id "TCONS_00000011"; exon_number "1"; oId "CUFF.2.1"; class_code "u"; tss_id "TSS8"; +chr1 Cufflinks exon 3174766 3174792 . . . gene_id "XLOC_000009"; transcript_id "TCONS_00000012"; exon_number "1"; oId "CUFF.3.1"; class_code "u"; tss_id "TSS9"; +chr1 Cufflinks exon 3187402 3187428 . . . gene_id "XLOC_000010"; transcript_id "TCONS_00000013"; exon_number "1"; oId "CUFF.4.1"; class_code "u"; tss_id "TSS10"; +chr1 Cufflinks exon 3188522 3188548 . . . gene_id "XLOC_000011"; transcript_id "TCONS_00000014"; exon_number "1"; oId "CUFF.5.1"; class_code "u"; tss_id "TSS11"; +chr1 Cufflinks exon 3189811 3190789 . . . gene_id "XLOC_000012"; transcript_id "TCONS_00000015"; exon_number "1"; oId "CUFF.6.1"; class_code "u"; tss_id "TSS12"; +chr1 Cufflinks exon 3190859 3191434 . . . gene_id "XLOC_000013"; transcript_id "TCONS_00000016"; exon_number "1"; oId "CUFF.7.1"; class_code "u"; tss_id "TSS13"; +chr1 Cufflinks exon 3191513 3192077 . . . gene_id "XLOC_000014"; transcript_id "TCONS_00000017"; exon_number "1"; oId "CUFF.8.1"; class_code "u"; tss_id "TSS14"; +chr1 Cufflinks exon 3192251 3192336 . . . gene_id "XLOC_000015"; transcript_id "TCONS_00000018"; exon_number "1"; oId "CUFF.9.1"; class_code "u"; tss_id "TSS15"; +chr1 Cufflinks exon 3192442 3192494 . . . gene_id "XLOC_000016"; transcript_id "TCONS_00000019"; exon_number "1"; oId "CUFF.10.1"; class_code "u"; tss_id "TSS16"; +chr1 Cufflinks exon 3192551 3192676 . . . gene_id "XLOC_000017"; transcript_id "TCONS_00000020"; exon_number "1"; oId "CUFF.11.1"; class_code "u"; tss_id "TSS17"; +chr1 Cufflinks exon 3192732 3192811 . . . gene_id "XLOC_000018"; transcript_id "TCONS_00000021"; exon_number "1"; oId "CUFF.12.1"; class_code "u"; tss_id "TSS18"; +chr1 Cufflinks exon 3192941 3193042 . . . gene_id "XLOC_000019"; transcript_id "TCONS_00000022"; exon_number "1"; oId "CUFF.13.1"; class_code "u"; tss_id "TSS19"; +chr1 Cufflinks exon 3194186 3194226 . . . gene_id "XLOC_000020"; transcript_id "TCONS_00000023"; exon_number "1"; oId "CUFF.14.1"; class_code "u"; tss_id "TSS20"; +chr1 Cufflinks exon 3194303 3194329 . . . gene_id "XLOC_000021"; transcript_id "TCONS_00000024"; exon_number "1"; oId "CUFF.15.1"; class_code "u"; tss_id "TSS21"; +chr1 Cufflinks exon 3194707 3194733 . . . gene_id "XLOC_000022"; transcript_id "TCONS_00000025"; exon_number "1"; oId "CUFF.16.1"; class_code "u"; tss_id "TSS22"; +chr1 Cufflinks exon 3195084 3195110 . . . gene_id "XLOC_000023"; transcript_id "TCONS_00000026"; exon_number "1"; oId "CUFF.17.1"; class_code "u"; tss_id "TSS23"; +chr1 Cufflinks exon 3195451 3195477 . . . gene_id "XLOC_000024"; transcript_id "TCONS_00000027"; exon_number "1"; oId "CUFF.18.1"; class_code "u"; tss_id "TSS24"; +chr1 Cufflinks exon 3197090 3197116 . . . gene_id "XLOC_000025"; transcript_id "TCONS_00000028"; exon_number "1"; oId "CUFF.19.1"; class_code "u"; tss_id "TSS25"; +chr1 Cufflinks exon 3197247 3197273 . . . gene_id "XLOC_000026"; transcript_id "TCONS_00000029"; exon_number "1"; oId "CUFF.20.1"; class_code "u"; tss_id "TSS26"; +chr1 Cufflinks exon 3197347 3197373 . . . gene_id "XLOC_000027"; transcript_id "TCONS_00000030"; exon_number "1"; oId "CUFF.21.1"; class_code "u"; tss_id "TSS27"; +chr1 Cufflinks exon 3197426 3197452 . . . gene_id "XLOC_000028"; transcript_id "TCONS_00000031"; exon_number "1"; oId "CUFF.22.1"; class_code "u"; tss_id "TSS28"; +chr1 Cufflinks exon 3200023 3200191 . . . gene_id "XLOC_000029"; transcript_id "TCONS_00000032"; exon_number "1"; oId "CUFF.23.1"; class_code "u"; tss_id "TSS29"; +chr1 Cufflinks exon 3200326 3200352 . . . gene_id "XLOC_000030"; transcript_id "TCONS_00000033"; exon_number "1"; oId "CUFF.24.1"; class_code "u"; tss_id "TSS30"; +chr1 Cufflinks exon 3200431 3200457 . . . gene_id "XLOC_000031"; transcript_id "TCONS_00000034"; exon_number "1"; oId "CUFF.25.1"; class_code "u"; tss_id "TSS31"; +chr1 Cufflinks exon 3201008 3201481 . . . gene_id "XLOC_000032"; transcript_id "TCONS_00000035"; exon_number "1"; oId "CUFF.26.1"; class_code "u"; tss_id "TSS32"; +chr1 Cufflinks exon 3201597 3201809 . . . gene_id "XLOC_000033"; transcript_id "TCONS_00000036"; exon_number "1"; oId "CUFF.27.1"; class_code "u"; tss_id "TSS33";
diff -r 0f7670a67901587a939a93047b6b58bdb93f3516 -r 724bbcc69c921ebe53b780d04f2699178201941e tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -406,6 +406,7 @@ <tool file="ngs_rna/tophat_wrapper.xml" /><tool file="ngs_rna/cufflinks_wrapper.xml" /><tool file="ngs_rna/cuffcompare_wrapper.xml" /> + <tool file="ngs_rna/cuffmerge_wrapper.xml" /><tool file="ngs_rna/cuffdiff_wrapper.xml" /><label text="Filtering" id="filtering" /><tool file="ngs_rna/filter_transcripts_via_tracking.xml" />
diff -r 0f7670a67901587a939a93047b6b58bdb93f3516 -r 724bbcc69c921ebe53b780d04f2699178201941e tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -359,6 +359,7 @@ <tool file="ngs_rna/tophat_color_wrapper.xml" /><tool file="ngs_rna/cufflinks_wrapper.xml" /><tool file="ngs_rna/cuffcompare_wrapper.xml" /> + <tool file="ngs_rna/cuffmerge_wrapper.xml" /><tool file="ngs_rna/cuffdiff_wrapper.xml" /><!-- Trinity is very memory-intensive and should only be enabled/run on instances with sufficient resources.
diff -r 0f7670a67901587a939a93047b6b58bdb93f3516 -r 724bbcc69c921ebe53b780d04f2699178201941e tools/ngs_rna/cuffmerge_wrapper.py --- /dev/null +++ b/tools/ngs_rna/cuffmerge_wrapper.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python + +import optparse, os, shutil, subprocess, sys, tempfile + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +# Copied from sam_to_bam.py: +def check_seq_file( dbkey, cached_seqs_pointer_file ): + seq_path = '' + for line in open( cached_seqs_pointer_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ) and line.startswith( 'index' ): + fields = line.split( '\t' ) + if len( fields ) < 3: + continue + if fields[1] == dbkey: + seq_path = fields[2].strip() + break + return seq_path + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-g', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' ) + parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.') + + # Wrapper / Galaxy options. + parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) + parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) + + # Outputs. + parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' ) + + (options, args) = parser.parse_args() + + # output version # of tool + try: + tmp = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp, 'wb' ) + proc = subprocess.Popen( args='cuffmerge -v 2>&1', shell=True, stdout=tmp_stdout ) + tmp_stdout.close() + returncode = proc.wait() + stdout = None + for line in open( tmp_stdout.name, 'rb' ): + if line.lower().find( 'merge_cuff_asms v' ) >= 0: + stdout = line.strip() + break + if stdout: + sys.stdout.write( '%s\n' % stdout ) + else: + raise Exception + except: + sys.stdout.write( 'Could not determine Cuffmerge version\n' ) + + # Set/link to sequence file. + if options.use_seq_data: + cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) + if not os.path.exists( cached_seqs_pointer_file ): + stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) + # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, + # and the equCab2.fa file will contain fasta sequences. + seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) + if options.ref_file != 'None': + # Create symbolic link to ref_file so that index will be created in working directory. + seq_path = "ref.fa" + os.symlink( options.ref_file, seq_path ) + + # Build command. + + # Base. + cmd = "cuffmerge -o cm_output " + + # Add options. + if options.ref_annotation: + cmd += " -g %s " % options.ref_annotation + if options.use_seq_data: + cmd += " -s %s " % seq_path + + # Add input files to a file. + inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name + inputs_file = open( inputs_file_name, 'w' ) + for arg in args: + inputs_file.write( arg + "\n" ) + inputs_file.close() + cmd += inputs_file_name + + # Debugging. + print cmd + + # Run command. + try: + tmp_name = tempfile.NamedTemporaryFile( dir="." ).name + tmp_stderr = open( tmp_name, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + + # Get stderr, allowing for case where it's very large. + tmp_stderr = open( tmp_name, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + + # Error checking. + if returncode != 0: + raise Exception, stderr + + if len( open( "cm_output/merged.gtf", 'rb' ).read().strip() ) == 0: + raise Exception, 'The output file is empty, there may be an error with your input file or settings.' + + # Copy outputs. + shutil.copyfile( "cm_output/merged.gtf" , options.merged_transcripts ) + + except Exception, e: + stop_err( 'Error running cuffmerge. ' + str( e ) ) + +if __name__=="__main__": __main__()
diff -r 0f7670a67901587a939a93047b6b58bdb93f3516 -r 724bbcc69c921ebe53b780d04f2699178201941e tools/ngs_rna/cuffmerge_wrapper.xml --- /dev/null +++ b/tools/ngs_rna/cuffmerge_wrapper.xml @@ -0,0 +1,126 @@ +<tool id="cuffmerge" name="Cuffmerge" version="0.0.5"> + <!-- Wrapper supports Cuffmerge versions v1.0.0-v1.0.3 --> + <description>merge together several Cufflinks assemblies</description> + <requirements> + <requirement type="package">cufflinks</requirement> + </requirements> + <command interpreter="python"> + cuffmerge_wrapper.py + + ## Use annotation reference? + #if $annotation.use_ref_annotation == "Yes": + -g $annotation.reference_annotation + #end if + + ## Use sequence data? + #if $seq_data.use_seq_data == "Yes": + -s + #if $seq_data.seq_source.index_source == "history": + --ref_file=$seq_data.seq_source.ref_file + #else: + --ref_file="None" + #end if + --dbkey=${first_input.metadata.dbkey} + --index_dir=${GALAXY_DATA_INDEX_DIR} + #end if + + ## Outputs. + --merged-transcripts=${merged_transcripts} + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + </command> + <inputs> + <param format="gtf" name="first_input" type="data" label="GTF file produced by Cufflinks" help=""/> + <repeat name="input_files" title="Additional GTF Input Files"> + <param format="gtf" name="additional_input" type="data" label="GTF file produced by Cufflinks" help=""/> + </repeat> + <conditional name="annotation"> + <param name="use_ref_annotation" type="select" label="Use Reference Annotation"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param format="gtf" name="reference_annotation" type="data" label="Reference Annotation" help="Make sure your annotation file is in GTF format and that Galaxy knows that your file is GTF--not GFF."/> + </when> + <when value="No"> + </when> + </conditional> + <conditional name="seq_data"> + <param name="use_seq_data" type="select" label="Use Sequence Data" help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"></when> + <when value="Yes"> + <conditional name="seq_source"> + <param name="index_source" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"></when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + </when> + </conditional> + </inputs> + + <outputs> + <data format="gtf" name="merged_transcripts" label="${tool.name} on ${on_string}: merged transcripts"/> + </outputs> + + <tests> + <!-- + cuffmerge -g cuffcompare_in3.gtf cuffcompare_in1.gtf cuffcompare_in2.gtf + --> + <test> + <param name="first_input" value="cuffcompare_in1.gtf" ftype="gtf"/> + <param name="additional_input" value="cuffcompare_in2.gtf" ftype="gtf"/> + <param name="use_ref_annotation" value="Yes"/> + <param name="reference_annotation" value="cuffcompare_in3.gtf" ftype="gtf"/> + <param name="use_seq_data" value="No"/> + <output name="merged_transcripts" file="cuffmerge_out1.gtf"/> + </test> + </tests> + + <help> +**Cuffmerge Overview** + +Cuffmerge is part of Cufflinks_. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621 + +.. _Cufflinks: http://cufflinks.cbcb.umd.edu/ + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + +.. __: http://cufflinks.cbcb.umd.edu/manual.html#cuffmerge + +------ + +**Input format** + +Cuffmerge takes Cufflinks' GTF output as input, and optionally can take a "reference" annotation (such as from Ensembl_) + +.. _Ensembl: http://www.ensembl.org + +------ + +**Outputs** + +Cuffmerge produces the following output files: + +Merged transcripts file: + +Cuffmerge produces a GTF file that contains an assembly that merges together the input assemblies. </help> +</tool>
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
galaxy-commits@lists.galaxyproject.org