details: http://www.bx.psu.edu/hg/galaxy/rev/36f438ce1f82 changeset: 2639:36f438ce1f82 user: Kelly Vincent <kpvincent@bx.psu.edu> date: Fri Aug 28 15:59:16 2009 -0400 description: Added samtools-based tools (sam_to_bam, sam_merge, sam_pileup) with their supporting files and modified Bam datatype so temp files are properly cleaned up 15 file(s) affected in this change: lib/galaxy/datatypes/images.py test-data/chrM.fa test-data/sam_to_bam_in1.sam test-data/sam_to_bam_in2.sam test-data/sam_to_bam_out1.bam test-data/sam_to_bam_out2.bam tool-data/sam_fa_indices.loc.sample tool_conf.xml.sample tools/samtools/sam_merge.py tools/samtools/sam_merge.xml tools/samtools/sam_merge_code.py tools/samtools/sam_pileup.py tools/samtools/sam_pileup.xml tools/samtools/sam_to_bam.py tools/samtools/sam_to_bam.xml diffs (843 lines): diff -r d261f41a2a03 -r 36f438ce1f82 lib/galaxy/datatypes/images.py --- a/lib/galaxy/datatypes/images.py Fri Aug 28 15:29:53 2009 -0400 +++ b/lib/galaxy/datatypes/images.py Fri Aug 28 15:59:16 2009 -0400 @@ -252,14 +252,17 @@ index_file = dataset.metadata.spec['bam_index'].param.new_file( dataset = dataset ) tmp_dir = tempfile.gettempdir() tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf1bai = '%s.bai' % tmpf1.name try: subprocess.check_call(['cd', tmp_dir], shell=True) subprocess.check_call('cp %s %s' % (dataset.file_name, tmpf1.name), shell=True) subprocess.check_call('samtools index %s' % tmpf1.name, shell=True) - subprocess.check_call('cp %s.bai %s' % (tmpf1.name, index_file.file_name), shell=True) + subprocess.check_call('cp %s %s' % (tmpf1bai, index_file.file_name), shell=True) except subprocess.CalledProcessError: sys.stderr.write('There was a problem creating the index for the BAM file\n') tmpf1.close() + if os.path.exists(tmpf1bai): + os.remove(tmpf1bai) dataset.metadata.bam_index = index_file def set_peek( self, dataset ): if not dataset.dataset.purged: diff -r d261f41a2a03 -r 36f438ce1f82 test-data/chrM.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/chrM.fa Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,335 @@ +>chrM +GTTAATGTAGCTTAATAATATAAAGCAAGGCACTGAAAATGCCTAGATGA +GTATTCTTACTCCATAAACACATAGGCTTGGTCCTAGCCTTTTTATTAGT +TATTAATAGAATTACACATGCAAGTATCCGCACCCCAGTGAGAATGCCCT +CTAAATCACGTCTCTACGATTAAAAGGAGCAGGTATCAAGCACACTAGAA +AGTAGCTCATAACACCTTGCTCAGCCACACCCCCACGGGACACAGCAGTG +ATAAAAATTAAGCTATGAACGAAAGTTCGACTAAGTCATATTAAATAAGG +GTTGGTAAATTTCGTGCCAGCCACCGCGGTCATACGATTAACCCAAATTA +ATAAATCTCCGGCGTAAAGCGTGTCAAAGACTAATACCAAAATAAAGTTA +AAACCCAGTTAAGCCGTAAAAAGCTACAACCAAAGTAAAATAGACTACGA +AAGTGACTTTAATACCTCTGACTACACGATAGCTAAGACCCAAACTGGGA +TTAGATACCCCACTATGCTTAGCCCTAAACTAAAATAGCTTACCACAACA +AAGCTATTCGCCAGAGTACTACTAGCAACAGCCTAAAACTCAAAGGACTT +GGCGGTGCTTTACATCCCTCTAGAGGAGCCTGTTCCATAATCGATAAACC +CCGATAAACCCCACCATCCCTTGCTAATTCAGCCTATATACCGCCATCTT +CAGCAAACCCTAAACAAGGTACCGAAGTAAGCACAAATATCCAACATAAA +AACGTTAGGTCAAGGTGTAGCCCATGGGATGGAGAGAAATGGGCTACATT +TTCTACCCTAAGAACAAGAACTTTAACCCGGACGAAAGTCTCCATGAAAC +TGGAGACTAAAGGAGGATTTAGCAGTAAATTAAGAATAGAGAGCTTAATT +GAATCAGGCCATGAAGCGCGCACACACCGCCCGTCACCCTCCTTAAATAT +CACAAATCATAACATAACATAAAACCGTGACCCAAACATATGAAAGGAGA +CAAGTCGTAACAAGGTAAGTATACCGGAAGGTGTACTTGGATAACCAAAG +TGTAGCTTAAACAAAGCATCCAGCTTACACCTAGAAGATTTCACTCAAAA +TGAACACTTTGAACTAAAGCTAGCCCAAACAATACCTAATTCAATTACCC +TTAGTCACTTAACTAAAACATTCACCAAACCATTAAAGTATAGGAGATAG +AAATTTTAACTTGGCGCTATAGAGAAAGTACCGTAAGGGAACGATGAAAG +ATGCATTAAAAGTACTAAACAGCAAAGCTTACCCCTTTTACCTTTTGCAT +AATGATTTAACTAGAATAAACTTAGCAAAGAGAACTTAAGCTAAGCACCC +CGAAACCAGACGAGCTACCTATGAACAGTTACAAATGAACCAACTCATCT +ATGTCGCAAAATAGTGAGAAGATTCGTAGGTAGAGGTGAAAAGCCCAACG +AGCCTGGTGATAGCTGGTTGTCCAGAAACAGAATTTCAGTTCAAATTTAA +ATTTACCTAAAAACTACTCAATTCTAATGTAAATTTAAATTATAGTCTAA +AAAGGTACAGCTTTTTAGATACAGGTTACAACCTTCATTAGAGAGTAAGA +ACAAGATAAACCCATAGTTGGCTTAAAAGCAGCCATCAATTAAGAAAGCG +TTCAAGCTCAACGACACATCTATCTTAATCCCAACAATCAACCCAAACTA +ACTCCTAATCTCATACTGGACTATTCTATCAACACATAGAAGCAATAATG +TTAATATGAGTAACAAGAATTATTTCTCCTTGCATAAGCTTATATCAGAA +CGAATACTCACTGATAGTTAACAACAAGATAGGGATAATCCAAAAACTAA +TCATCTATTTAAACCATTGTTAACCCAACACAGGCATGCATCTATAAGGA +AAGATTAAAAGAAGTAAAAGGAACTCGGCAAACACAAACCCCGCCTGTTT +ACCAAAAACATCACCTCTAGCATTTCCAGTATTAGAGGCACTGCCTGCCC +AGTGACATCTGTTtaaacggccgcggtatcctaaccgtgcaaaggtagca +taatcacttgttccctaaatagggacttgtatgaatggccacacgagggt +tttactgtctcttacttccaatcagtgaaattgaccttcccgtgaagagg +cgggaatgactaaataagacgagaagaccctatggagcttTAATTAACTG +ATTCACAAAAAACAACACACAAACCTTAACCTTCAGGGACAACAAAACTT +TTGATTGAATCAGCAATTTCGGTTGGGGTGACCTCGGAGAACAAAACAAC +CTCCGAGTGATTTAAATCCAGACTAACCAGTCAAAATATATAATCACTTA +TTGATCCAAACCATTGATCAACGGAACAAGTTACCCTAGGGATAACAGCG +CAATCCTATTCCAGAGTCCATATCGACAATTAGGGTTTACGACCTCGATG +TTGGATCAAGACATCCTAATGGTGCAACCGCTATTAAGGGTTCGTTTGTT +CAACGATTAAAGTCTTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGT +CGGTTTCTATCTATTCTATACTTTTCCCAGTACGAAAGGACAAGAAAAGT +AGGGCCCACTTTACAAGAAGCGCCCTCAAACTAATAGATGACATAATCTA +AATCTAACTAATTTATAACTTCTACCGCCCTAGAACAGGGCTCgttaggg +tggcagagcccggaaattgcataaaacttaaacctttacactcagaggtt +caactcctctccctaacaacaTGTTCATAATTAACGTCCTCCTCCTAATT +GTCCCAATCTTGCTCGCCGTAGCATTCCTCACACTAGTTGAACGAAAAGT +CTTAGGCTATATGCAACTTCGCAAAGGACCCAACATCGTAGGCCCCTATG +GCCTACTACAACCTATTGCCGATGCCCTCAAACTATTTATCAAAGAGCCA +CTACAACCACTAACATCATCGACATCCATATTCATCATCGCACCAATCCT +AGCCCTAACCCTGGCCTTAACCATATGAATCCCTCTGCCCATACCATACC +CACTAATCAACATAAACCTAGGAATTCTATTCATACTAGCCATGTCCAGC +CTAGCTGTCTACTCAATCCTTTGATCAGGATGGGCCTCAAACTCAAAATA +CGCCCTAATTGGAGCTCTACGAGCAGTAGCACAAACCATCTCATACGAAG +TAACTCTAGCAATCATCCTACTCTCAGTCCTCCTAATAAGCGGATCATTC +ACATTATCAACACTTATTATTACCCAAGAATACCTCTGATTAATCTTCCC +ATCATGACCCTTAGCCATAATGTGATTCATCTCAACATTAGCCGAAACCA +ACCGAGCTCCATTTGACCTAACAGAAGGAGAATCAGAACTCGTCTCTGGA +TTCAACGTTGAATACGCAGCCGGCCCATTTGCTCTATTCTTCCTAGCAGA +ATACGCAAACATCATCATGATAAACATCTTCACAACAACCCTATTTCTAG +GAGCATTTCACAACCCCTACCTGCCAGAACTCTACTCAATTAATTTCACC +ATTAAAGCTCTCCTTCTAACATGTTCCTTCCTATGAATCCGAGCATCCTA +CCCACGATTCCGATATGACCAACTTATACACCTCCTATGAAAGAACTTCC +TACCACTCACACTAGCCCTCTGCATATGACACGTCTCACTTCCAATCATA +CTATCCAGCATCCCACCACAAACATAGGAAATATGTCTGACAAAAGAGTT +ACTTTGATAGAGTAAAACATAGAGGCTCAAACCCTCTTATTTctagaact +acaggaattgaacctgctcctgagaattcaaaatcctccgtgctaccgaa +ttacaccatgtcctaCAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCAT +ACCCCGAAAATGTTGGATTACACCCTTCCCGTACTAATAAATCCCCTTAT +CTTCACAACTATTCTAATAACAGTTCTTCTAGGAACTATAATCGTTATAA +TAAGCTCACACTGACTAATAATCTGAATCGGATTTGAAATAAATCTACTA +GCCATTATCCCTATCCTAATAAAAAAGTACAATCCCCGAACCATAGAAGC +CTCCACCAAATATTTTCTAACCCAAGCCACCGCATCAATACTCCTCATAA +TAGCGATCATCATTAACCTCATACACTCAGGCCAATGAACAATCACAAAA +GTCTTCAACCCCACAGCGTCCATCATTATAACTTCAGCTCTCGCCATAAA +ACTTGGACTCACACCATTCCACTTCTGAGTACCCGAAGTCACACAGGGCA +TCTCATTAACATCAGGTCTCATCCTACTTACATGACAAAAACTAGCCCCA +ATATCAATCCTATATCAAATCTCACCCTCAATTAACCTAAATATCTTATT +AACTATAGCCGTACTGTCAATCCTAGTAGGAGGCTGAGGCGGTCTCAACC +AAACCCAACTACGAAAAATCATAGCATACTCGTCAATCGCGCATATAGGA +TGAATAACAGCTGTCCTAGTATATAACCCAACACTAACAATACTAAACAT +ATTAATTTACATTATAATAACACTCACAATATTCATACTATTTATCCACA +GCTCCTCTACTACAACACTATCACTCTCCCACACATGAAACAAAATACCT +CTAACCACTACACTAATCTTAATTACCTTACTATCCATAGGAGGCCTCCC +CCCACTATCAGGATTCATACCCAAATGAATAATCATTCAAGAGCTCACCA +AAAATAGCAGCATCATCCTCCCCACACTAATAGCCATTATAGCACTACTC +AACCTCTACTTCTACATACGACTAACCTATTCCACCTCACTGACCATATT +CCCATCCACAAACAACATAAAAATAAAATGACAATTCGAAACCAAACGAA +TTACTCTCTTACCCCCGTTAATTGTTATATCCTCCCTACTCCTCCCCCTA +ACCCCCATACTATCAATTTTGGACTAGGAATTTAGGTTAACATCCCAGAC +CAAGAGCCTTCAAAGCTCTAAGCAAGTGAATCCACTTAATTCCTGCATAC +TAAGGACTGCGAGACTCTATCTCACATCAATTGAACGCAAATCAAACTCT +TTTATTAAGCTAAGCCCTTACTAGATTGGTGGGCTACCATCCCACGAAAT +TTTAGTTAACAGCTAAATACCCTAATCAACTGGCTTCAATCTACTTCTCC +CGCCGCCTAGAAAAAAAGGCGGGAGAAGCCCCGGCAGAAATTGAAGCTGC +TCCTTTGAATTTGCAATTCAATGTGAAAATTCACCACGGGACTTGATAAG +AAGAGGATTCCAACCCCTGTCTTTAGATTTACAGTCTAATGCTTACTCAG +CCATCTTACCTATGTTCATCAACCGCTGACTATTTTCAACTAACCACAAA +GACATCGGCACTCTGTACCTCCTATTCGGCGCTTGAGCTGGAATAGTAGG +AACTGCCCTAAGCCTCCTAATCCGTGCTGAATTAGGCCAACCTGGGACCC +TACTAGGAGATGATCAGATCTACAATGTCATTGTAACCGCCCATGCATTC +GTAATAATTTTCTTTATGGTCATACCCATTATAATCGGAGGATTCGGAAA +CTGATTAGTCCCCCTGATAATTGGAGCACCTGATATAGCTTTCCCCCGAA +TAAACAACATAAGCTTCTGATTACTTCCCCCATCATTCCTACTTCTTCTC +GCTTCCTCAATAATTGAAGCAGGTGCCGGAACAGGCTGAACCGTATATCC +TCCTCTAGCTGGAAATCTGGCGCATGCAGGAGCCTCTGTTGACTTAACCA +TTTTCTCTCTCCACCTAGCTGGGGTGTCCTCGATTTTAGGTGCCATCAAC +TTTATTACCACAATCATTAACATAAAACCACCAGCCCTATCCCAATATCA +AACCCCCCTATTCGTTTGATCTGTCCTTATTACGGCAGTACTCCTTCTCC +TAGCCCTCCCGGTCCTAGCAGCAGGCATTACCATGCTTCTCACAGACCGT +AACCTGAACACTACTTTCTTCGACCCCGCAGGAGGAGGGGATCCAATCCT +TTATCAACACCTATTCTGATTCTTCGGACACCCCGAAGTCTATATTCTTA +TCCTACCAGGCTTCGGTATAATCTCACACATCGTCACATACTACTCAGGT +AAAAAGGAACCTTTTGGCTACATGGGTATAGTGTGAGCTATAATATCCAT +TGGCTTTCTAGGCTTCATCGTATGGGCTCACCACATGTTTACAGTAGGGA +TAGACGTTGACACACGAGCATACTTCACATCAGCTACCATAATCATCGCT +ATCCCTACTGGTGTAAAAGTATTCAGCTGACTAGCCACCCTGCACGGAGG +AAATATCAAATGATCTCCAGCTATACTCTGAGCTCTAGGCTTCATCTTCT +TATTCACAGTAGGAGGTCTAACAGGAATCGTCCTAGCTAACTCATCCCTA +GATATTGTTCTCCACGATACTTATTATGTAGTAGCACATTTCCATTATGT +CCTGTCTATAGGAGCAGTCTTCGCCATTATGGGGGGATTTGTACACTGAT +TCCCTCTATTCTCAGGATACACACTCAACCAAACCTGAGCAAAAATCCAC +TTTACAATTATATTCGTAGGGGTAAATATAACCTTCTTCCCACAACATTT +CCTTGGCCTCTCAGGAATGCCACGACGCTATTCTGATTATCCAGACGCAT +ATACAACATGAAATACCATCTCATCCATAGGATCTTTTATCTCACTTACA +GCAGTGATACTAATAATTTTCATAATTTGAGAAGCGTTCGCATCCAAACG +AGAAGTGTCTACAGTAGAATTAACCTCAACTAATCTGGAATGACTACACG +GATGCCCCCCACCATACCACACATTTGAAGAACCCACCTACGTAAACCTA +AAAtaagaaaggaaggaatcgaaccccctctaactggtttcaagccaata +tcataaccactatgtctttctcCATCAATTGAGGTATTAGTAAAAATTAC +ATGACTTTGTCAAAGTTAAATTATAGGTTAAACCCCTATATACCTCTATG +GCCTACCCCTTCCAACTAGGATTCCAAGACGCAACATCCCCTATTATAGA +AGAACTCCTACACTTCCACGACCACACACTAATAATCGTATTCCTAATTA +GCTCTCTAGTATTATATATTATCTCATCAATACTAACAACTAAATTAACC +CATACCAGCACCATAGATGCTCAAGAAGTAGAGACAATTTGAACGATTTT +ACCAGCCATCATCCTTATTCTAATCGCCCTCCCATCCCTACGAATTCTAT +ATATAATAGATGAAATCAATAATCCGTCCCTCACAGTCAAAACAATAGGC +CACCAATGATACTGAAGCTACGAGTATACCGATTACGAAGACTTGACCTT +TGACTCCTACATGATCCCCACATCAGACCTAAAACCAGGAGAATTACGTC +TTCTAGAAGTCGACAATCGAGTGGTTCTCCCCATAGAAATAACCATCCGA +ATGCTAATTTCATCCGAAGACGTCCTACACTCATGAGCTGTGCCCTCCCT +AGGCCTAAAAACAGACGCTATCCCTGGGCGCCTAAATCAGACAACTCTCG +TGGCCTCTCGACCAGGACTTTACTACGGTCAATGCTCAGAGATCTGCGGA +TCAAACCACAGCTTTATACCAATTGTCCTTGAACTAGTTCCACTGAAACA +CTTCGAAGAATGATCTGCATCAATATTATAAAGTCACTAAGAAGCTATTA +TAGCATTAACCTTTTAAGTTAAAGATTGAGGGTTCAACCCCCTCCCTAGT +GATATGCCACAGTTGGATACATCAACATGATTTATTAATATCGTCTCAAT +AATCCTAACTCTATTTATTGTATTTCAACTAAAAATCTCAAAGCACTCCT +ATCCGACACACCCAGAAGTAAAGACAACCAAAATAACAAAACACTCTGCC +CCTTGAGAATCAAAATGAACGAAAATCTATTCGCCTCTTTCGCTACCCCA +ACAATAGTAGGCCTCCCTATTGTAATTCTGATCATCATATTTCCCAGCAT +CCTATTCCCCTCACCCAACCGACTAATCAACAATCGCCTAATCTCAATTC +AACAATGGCTAGTCCAACTTACATCAAAACAAATAATAGCTATCCATAAC +AGCAAAGGACAAACCTGAACTCTTATACTCATATCACTGATCCTATTCAT +TGGCTCAACAAACTTATTAGGCCTACTACCTCACTCATTTACACCAACAA +CACAACTATCAATAAACCTAGGCATAGCTATTCCCCTATGGGCAGGGACA +GTATTCATAGGCTTTCGTCACAAAACAAAAGCAGCCCTAGCCCACTTTCT +ACCTCAAGGGACGCCCATTTTCCTCATCCCCATACTAGTAATTATCGAGA +CTATCAGCCTATTTATTCAACCTGTAGCCCTAGCCGTGCGGCTAACCGCT +AACATTACCGCCGGACACCTCCTAATACACCTCATCGGAGGGGCAACACT +AGCCCTCATAAGCATCAGCCCCTCAACAGCCCTTATTACGTTTATCATCC +TAATTCTACTAACTATCCTCGAATTCGCAGTAGCTATAATCCAAGCCTAC +GTATTCACTCTCCTGGTAAGCCTTTACTTACACGACAACACCTAATGACC +CACCAAACCCACGCTTACCACATAGTAAACCCCAGCCCATGACCACTTAC +AGGAGCCCTATCAGCCCTCCTGATAACATCAGGACTAGCCATGTGATTTC +ACTTTAACTCAACCTTACTTCTAGCTATAGGGCTATTAACTAACATCCTT +ACCATATATCAATGATGACGAGACATCATCCGAGAAAGCACATTCCAAGG +CCATCACACATCAATCGTTCAAAAGGGACTCCGATATGGCATAATCCTTT +TTATTATCTCAGAAGTCTTCTTCTTCTCTGGCTTCTTCTGAGCCTTTTAC +CACTCAAGCCTAGCCCCCACACCCGAACTAGGCGGCTGCTGACCACCCAC +AGGTATCCACCCCTTAAACCCCCTAGAAGTCCCCTTACTCAACACCTCAG +TGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCACCATAGCCTAATA +GAAGGAAACCGTAAAAATATGCTCCAAGGCCTATTCATCACAATTTCACT +AGGCGTATACTTCACCCTTCTCCAAGCCTCAGAATACTATGAAGCCTCAT +TTACTATTTCAGATGGAGTATACGGATCAACATTTTTCGTAGCAACAGGG +TTCCACGGACTACACGTAATTATCGGATCTACCTTCCTCATTGTATGTTT +CCTACGCCAACTAAAATTCCACTTTACATCCAGCCACCACTTCGGATTCG +AAGCAGCCGCTTGATACTGACACTTCGTCGACGTAGTCTGACTATTCTTG +TACGTCTCTATTTATTGATGAGGATCCTATTCTTTTAGTATTGACCAGTA +CAATTGACTTCCAATCAATCAGCTTCGGTATAACCCGAAAAAGAATAATA +AACCTCATACTGACACTCCTCACTAACACATTACTAGCCTCGCTACTCGT +ACTCATCGCATTCTGACTACCACAACTAAACATCTATGCAGAAAAAACCA +GCCCATATGAATGCGGATTTGACCCTATAGGGTCAGCACGCCTCCCCTTC +TCAATAAAATTTTTCTTAGTGGCCATTACATTTCTGCTATTCGACTTAGA +AATTGCCCTCCTATTACCCCTTCCATGAGCATCCCAAACAACTAACCTAA +ACACTATACTTATCATAGCACTAGTCCTAATCTCTCTTCTAGCCATCAGC +CTAGCCTACGAATGAACCCAAAAAGGACTAGAATGAACTGAGTATGGTAA +TTAGTTTAAACCAAAACAAATGATTTCGACTCATTAAACTATGATTAACT +TCATAATTACCAACATGTCACTAGTCCATATTAATATCTTCCTAGCATTC +ACAGTATCCCTCGTAGGCCTACTAATGTACCGATCCCACCTAATATCCTC +ACTCCTATGCCTAGAAGGAATAATACTATCACTATTCGTCATAGCAACCA +TAATAGTCCTAAACACCCACTTCACACTAGCTAGTATAATACCTATCATC +TTACTAGTATTTGCTGCCTGCGAACGAGCTCTAGGATTATCCCTACTAGT +CATAGTCTCCAATACTTATGGAGTAGACCACGTACAAAACCTTAACCTCC +TCCAATGCTAAAAATTATCATTCCCACAATCATACTTATGCCCCTTACAT +GACTATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATAGTCTA +TTAATCAGCCTTATCAGCCTATCCCTCCTAAACCAACCTAGCAACAATAG +CCTAAACTTCTCACTAATATTCTTCTCCGATCCCCTATCAGCCCCACTTC +TGGTGTTGACAACATGACTACTGCCACTAATACTCATAGCCAGCCAACAC +CATCTATCTAAGGAACCACTAATCCGAAAAAAACTCTACATCACCATGCT +AACCATACTTCAAACTTTCCTAATCATGACTTTTACCGCCACAGAACTAA +TCTCCTTCTACATCCTATTTGAAGCCACATTAGTTCCAACACTAATTATC +ATCACCCGCTGAGGCAACCAAACAGAACGCCTGAACGCAGGCCTCTACTT +CCTATTCTACACACTAATAGGTTCCCTCCCACTCTTAGTTGCACTAATCT +CTATCCAAAACCTAACAGGCTCACTAAACTTCCTATTAATTCAATACTGA +AACCAAGCACTACCCGACTCTTGATCCAATATTTTCCTATGACTAGCATG +TATAATAGCATTCATAGTCAAAATACCGGTATATGGTCTTCACCTCTGAC +TCCCAAAAGCCCATGTAGAAGCCCCAATTGCCGGATCCATAGTGCTAGCA +GCCATTCTACTAAAACTAGGAGGCTACGGAATACTACGAATTACAACAAT +ACTAAACCCCCAAACTAGCTTTATAGCCTACCCCTTCCTCATACTATCCC +TGTGAGGAATAATCATAACTAGTTCCATCTGCTTGCGACAAACCGATCTA +AAATCACTTATTGCATACTCCTCTGTCAGCCACATAGCCCTAGTAATCGT +AGCCGTCCTCATCCAAACACCATGAAGTTATATAGGAGCTACAGCCCTAA +TAATCGCTCACGGCCTTACATCATCAATACTATTCTGCCTGGCAAACTCA +AATTACGAACGTACCCATAGCCGAACTATAATCCTAGCCCGCGGGCTTCA +AACACTTCTTCCCCTTATAGCAGCCTGATGACTATTAGCCAGCCTAACCA +ACCTGGCCCTCCCTCCCAGCATTAACCTAATTGGAGAGCTATTCGTAGTA +ATATCATCATTCTCATGATCAAATATTACCATTATCCTAATAGGAGCCAA +TATCACCATCACCGCCCTCTACTCCCTATACATACTAATCACAACACAAC +GAGGGAAATACACACACCATATCAACAGCATTAAACCTTCATTTACACGA +GAAAACGCACTCATGGCCCTCCACATGACTCCCCTACTACTCCTATCACT +TAACCCTAAAATTATCCTAGGCTTTACGTACTGTAAATATAGTTTAACAA +AAACACTAGATTGTGGATCTAGAAACAGAAACTTAATATTTCTTATTTAC +CGAGAAAGTATGCAAGAACTGCTAATTCATGCCCCCATGTCCAACAAACA +TGGCTCTCTCAAACTTTTAAAGGATAGGAGCTATCCGTTGGTCTTAGGAA +CCAAAAAATTGGTGCAACTCCAAATAAAAGTAATCAACATGTTCTCCTCC +CTCATACTAGTTTCACTATTAGTACTAACCCTCCCAATCATATTATCAAT +CTTCAATACCTACAAAAACAGCACGTTCCCGCATCATGTAAAAAACACTA +TCTCATATGCCTTCATTACTAGCCTAATTCCCACTATAATATTTATTCAC +TCTGGACAAGAAACAATTATCTCAAACTGACACTGAATAACCATACAAAC +CCTCAAACTATCCCTAAGCTTCAAACTAGATTACTTCTCAATAATTTTCG +TACCAGTAGCCCTATTCGTAACATGATCTATTATGGAATTCTCCCTATGA +TACATGCACTCAGATCCTTACATTACTCGATTTTTTAAATACTTACTTAC +ATTCCTCATCACTATAATAATTCTAGTCACAGCTAACAACCTTTTCCAAC +TGTTCATCGGATGGGAGGGAGTAGGCATCATGTCATTCTTACTAATCGGA +TGATGATACGGCCGAACAGATGCCAACACCGCGGCCCTTCAAGCAATCCT +TTATAACCGCATCGGGGATATCGGCTTCATCATGGCCATAGCCTGATTCC +TATTCAACACCAACACATGAGACCTCCAACAAATCTTCATACTCGACCCC +AACCTTACCAACCTCCCGCTCCTAGGCCTCCTCCTAGCCGCAACTGGCAA +ATCCGCTCAATTTGGACTCCACCCATGACTTCCTTCAGCCATAGAGGGCC +CTACACCAGTCTCAGCCCTACTCCACTCCAGCACAATAGTTGTAGCAGGC +GTCTTCCTGCTAATCCGCTTCCATCCACTAATAGAAAACAACAAAACAAT +CCAGTCACTTACCCTATGCCTAGGAGCCATCACCACACTATTCACAGCAA +TCTGCGCACTCACTCAAAACGATATCAAAAAAATCATTGCTTTCTCCACC +TCCAGCCAACTAGGCCTGATAATCGTAACCATCGGTATCAATCAACCCTA +CCTAGCATTCCTCCACATTTGCACTCACGCATTCTTCAAAGCTATACTAT +TTATATGTTCCGGATCCATTATCCACAGCCTAAATGACGAGCAAGATATC +CGAAAAATAGGCGGACTATTTAATGCAATACCCTTCACCACCACATCTCT +AATTATTGGCAGCCTTGCACTCACCGGAATTCCTTTCCTCACAGGCTTCT +ACTCCAAAGACCTCATCATCGAAACCGCCAACACATCGTACACCAACGCC +TGAGCCCTACTAATAACTCTCATTGCCACATCCCTCACAGCTGTCTACAG +TACCCGAATCATCTTCTTTGCACTCCTAGGGCAACCCCGCTTCCTCCCTC +TGACCTCAATCAACGAAAATAACCCCTTTCTAATTAACTCCATCAAACGC +CTCTTAATTGGCAGCATTTTTGCCGGATTCTTCATCTCCAACAATATCTA +CCCCACAACCGTCCCAGAAATAACCATACCTACTTACATAAAACTCACCG +CCCTCGCAGTAACCATCCTAGGATTTACACTAGCCCTAGAACTAAGCTTG +ATAACCCATAACTTAAAACTAGAACACTCCACCAACGTATTCAAATTCTC +CAACCTCCTAGGATACTACCCAACAATTATACACCGACTCCCACCGCTCG +CTAACCTATCAATAAGCCAAAAATCAGCATCACTTCTACTAGACTCAATC +TGACTAGAAAACATCCTGCCAAAATCTATCTCCCAGTTCCAAATAAAAAC +CTCGATCCTAATTTCCACCCAAAAAGGACAAATCAAATTATATTTCCTCT +CATTCCTCATCACCCTTACCCTAAGCATACTACTTTTTAATCTCCACGAG +TAACCTCTAAAATTACCAAGACCCCAACAAGCAACGATCAACCAGTCACA +ATCACAACCCAAGCCCCATAACTATACAATGCAGCAGCCCCTATAATTTC +CTCACTAAACGCCCCAGAATCTCCAGTATCATAAATAGCTCAAGCCCCCA +CACCACTAAACTTAAACACTACCCCCACTTCCTCACTCTTCAGAACATAT +AAAACCAACATAACCTCCATCAACAACCCTAAAAGAAATACCCCCATAAC +AGTCGTATTAGACACCCATACCTCAGGATACTGCTCAGTAGCCATAGCCG +TTGTATAACCAAAAACAACCAACATTCCTCCCAAATAAATCAAAAACACC +ATCAACCCCAAAAAGGACCCTCCAAAATTCATAATAATACCACAACCTAC +CCCTCCACTTACAATCAGCACTAAACCCCCATAAATAGGTGAAGGTTTTG +AAGAAAACCCCACAAAACTAACAACAAAAATAACACTCAAAATAAACACA +ATATATGTCATCATTATTCCCACGTGGAATCTAACCACGACCAATGACAT +GAAAAATCATCGTTGTATTTCAACTATAAGAACACCAATGACAAACATCC +GGAAATCTCACCCACTAATTAAAATCATCAATCACTCTTTTATTGACCTA +CCAGCCCCCTCAAACATTTCATCATGATGAAACTTCGGCTCCCTCCTAGG +AATCTGCCTAATCCTCCAAATCTTAACAGGCCTATTCCTAGCCATACACT +ACACATCAGACACGACAACTGCCTTCTCATCCGTCACTCACATCTGCCGA +GACGTTAACTACGGATGAATTATTCGCTACCTCCATGCCAACGGAGCATC +AATATTTTTTATCTGCCTCTTCATTCACGTAGGACGCGGCCTCTACTACG +GCTCTTACACATTCCTAGAGACATGAAACATTGGAATCATCCTACTTTTC +ACAGTTATAGCTACAGCATTCATGGGCTATGTCCTACCATGAGGCCAAAT +ATCCTTTTGAGGAGCAACAGTCATCACGAACCTCCTATCAGCAATTCCCT +ACATCGGTACTACCCTCGTCGAGTGAATCTGAGGTGGATTCTCAGTAGAC +AAAGCCACCCTTACCCGATTTTTTGCTTTCCACTTCATCCTACCCTTCAT +CATCACAGCCCTGGTAGTCGTACATTTACTATTTCTTCACGAAACAGGAT +CTAATAACCCCTCAGGAATCCCATCCGATATGGACAAAATCCCATTCCAC +CCATATTATACAATTAAAGACATCCTAGGACTCCTCCTCCTGATCTTGCT +CCTACTAACTCTAGTATTATTCTCCCCCGACCTCCTAGGAGACCCAGACA +ACTACACCCCAGCTAACCCTCTCAGCACTCCCCCTCATATTAAACCAGAA +TGGTACTTCCTGTTTGCCTACGCCATCCTACGCTCCATTCCCAACAAACT +AGGCGGCGTATTAGCCCTAATCCTCTCCATCCTGATCCTAGCACTCATCC +CCACCCTCCACATATCAAAACAACGAAGCATAATATTCCGGCCTCTCAGC +CAATGCGTATTCTGACTCTTAGTGGCAGACTTACTGACACTAACATGAAT +CGGCGGACAGCCAGTGGAACACCCATACGTAATTATCGGCCAACTGGCCT +CAATCCTCTACTTCTCCCTAATTCTCATTTTTATACCACTCGCAAGCACC +ATCGAAAACAATCTTCTAAAATGAAGAGTCCCTGTAGTATATCGCACATT +ACCCTGGTCTTGTAAACCAGAAAAGGGGGAAAACGTTTCCTCCCAAGGAC +TATCAAGGAAGAAGCTCTAGCTCCACCATCAACACCCAAAGCTGAAATTC +TACTTAAACTATTCCTTGATTTCTTCCCCTAAACGACAACAATTTACCCT +CATGTGCTATGTCAGTATCAGATTATACCCCCACATAACACCATACCCAC +CTGACATGCAATATCTTATGAATGGCCTATGTACGTCGTGCATTAAATTG +TCTGCCCCATGAATAATAAGCATGTACATAATATCATTTATCTTACATAA +GTACATTATATTATTGATCGTGCATACCCCATCCAAGTCAAATCATTTCC +AGTCAACACGCATATCACAGCCCATGTTCCACGAGCTTAATCACCAAGCC +GCGGGAAATCAGCAACCCTCCCAACTACGTGTCCCAATCCTCGCTCCGGG +CCCATCCAAACGTGGGGGTTTCTACAATGAAACTATACCTGGCATCTGGT +TCTTTCTTCAGGGCCATTCCCACCCAACCTCGCCCATTCTTTCCCCTTAA +ATAAGACATCTCGATGGACTAATGACTAATCAGCCCATGCTCACACATAA +CTGTGATTTCATGCATTTGGTATCTTTTTATATTTGGGGATGCTATGACT +CAGCTATGGCCGTCAAAGGCCTCGACGCAGTCAATTAAATTGAAGCTGGA +CTTAAATTGAACGTTATTCCTCCGCATCAGCAACCATAAGGTGTTATTCA +GTCCATGGTAGCGGGACATAGGAAACAAgtgcacctgtgcacctgtgcac +ctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacct +gtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgt +gcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgc +acctgtgcacctgtgcacctgtgcacctgtgcacctgtgcacctgtgcac +ctgtgcacctACCCGCGCAGTAAGCAAGTAATATAGCTTTCTTAATCAAA +CCCCCCCTACCCCCCATTAAACTCCACATATGTACATTCAACACAATCTT +GCCAAACCCCAAAAACAAGACTAAACAATGCACAATACTTCATGAAGCTT +AACCCTCGCATGCCAACCATAATAACTCAACACACCTAACAATCTTAACA +GAACTTTCCCCCCGCCATTAATACCAACATGCTACTTTAATCAATAAAAT +TTCCATAGACAGGCATCCCCCTAGATCTAATTTTCTAAATCTGTCAACCC +TTCTTCCCCC diff -r d261f41a2a03 -r 36f438ce1f82 test-data/sam_to_bam_in1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_to_bam_in1.sam Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,10 @@ +HWI-EAS91_1_30788AAXX:1:1:1513:715 16 chrM 9563 25 36M * 0 0 CTGACTACCACAACTAAACATCTATGCNNAAAAAAC I+-II?IDIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1698:516 16 chrM 2735 25 36M * 0 0 TTTACACTCAGAGGTTCAACTCCTCTCNNTAACAAC I9IIIII5IIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1491:637 16 chrM 10864 25 36M * 0 0 TGTAGAAGCCCCAATTGCCGGATCCATNNTGCTAGC DBAIIIIIIIIIIIFIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1711:249 16 chrM 10617 25 36M * 0 0 ACCAAACAGAACGCCTGAACGCAGGCCNNTACTTCC IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1634:211 0 chrM 9350 25 36M * 0 0 GAAGCAGNNGCTTGATACTGACACTTCGTCGACGTA IIIIIII""IIIIIIIIIIIIIIIIIIIIII9IIDF NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1218:141 16 chrM 14062 25 36M * 0 0 ACAAAACTAACAACAAAAATAACACTCNNAATAAAC I+IIII1IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1398:854 16 chrM 3921 25 36M * 0 0 CACCCTTCCCGTACTAATAAATCCCCTNNTCTTCAC IIIII=AIIIIIIIIIIIIIIBIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1310:991 16 chrM 10002 25 36M * 0 0 CTCCTATGCCTAGAAGGAATAATACTANNACTATTC I:2IEI:IIDIIIIII4IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1716:413 0 chrM 6040 25 36M * 0 0 GATCCAANNCTTTATCAACACCTATTCTGATTCTTC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1630:59 16 chrM 12387 25 36M * 0 0 TCATACTCGACCCCAACCTTACCAACCNNCCGCTCC FIIHII;IIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r d261f41a2a03 -r 36f438ce1f82 test-data/sam_to_bam_in2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sam_to_bam_in2.sam Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,10 @@ +HWI-EAS91_1_30788AAXX:1:1:1095:605 0 chrM 23 25 36M * 0 0 AAGCAAGNNACTGAAAATGCCTAGATGAGTATTCTT IIIIIII""IIIIIIIIIIIIIIIEIIIIIIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1650:1185 0 chrM 14956 25 36M * 0 0 ACCCCAGNNAACCCTCTCAGCACTCCCCCTCATATT IIIIIII""IIIIIIIIIIII6IIIIIIIII5I-II NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:799:192 16 chrM 8421 25 36M * 0 0 CCTGTAGCCCTAGCCGTGCGGCTAACCNNTAACATT II%::I<IIIIIEIII8IIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1082:719 16 chrM 7191 25 36M * 0 0 TAAATTAACCCATACCAGCACCATAGANNCTCAAGA <III0EII3+3I29I>III8AIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1746:1180 16 chrM 12013 25 36M * 0 0 CCTAAGCTTCAAACTAGATTACTTCTCNNTAATTTT IIIIIIIIFIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:606:460 0 chrM 4552 25 36M * 0 0 TTAATTTNNATTATAATAACACTCACAATATTCATA IIIIIII""IIIIIIIIIIIIIIIIII?I6IIIII6 NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1059:362 16 chrM 7348 25 36M * 0 0 GGCCACCAATGATACTGAAGCTACGAGNNTACCGAT II/<)2IIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1483:1161 16 chrM 15080 25 36M * 0 0 TCCTGATCCTAGCACTCATCCCCACCCNNCACATAT HIIIIIFIIAIHIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1273:600 16 chrM 13855 25 36M * 0 0 GTATTAGACACCCATACCTCAGGATACNNCTCAGTA IIIIIIIIIIIIIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 +HWI-EAS91_1_30788AAXX:1:1:1190:1283 16 chrM 15338 25 36M * 0 0 TATATCGCACATTACCCTGGTCTTGTANNCCAGAAA EIII?-IIIIIAIIIIIIIIIIIIIII""IIIIIII NM:i:1 X1:i:1 MD:Z:7N0N27 diff -r d261f41a2a03 -r 36f438ce1f82 test-data/sam_to_bam_out1.bam Binary file test-data/sam_to_bam_out1.bam has changed diff -r d261f41a2a03 -r 36f438ce1f82 test-data/sam_to_bam_out2.bam Binary file test-data/sam_to_bam_out2.bam has changed diff -r d261f41a2a03 -r 36f438ce1f82 tool-data/sam_fa_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sam_fa_indices.loc.sample Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,27 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory ) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#<index> <seq> <location> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your sam_fa_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18 /depot/data2/galaxy/sam/hg18.fa diff -r d261f41a2a03 -r 36f438ce1f82 tool_conf.xml.sample --- a/tool_conf.xml.sample Fri Aug 28 15:29:53 2009 -0400 +++ b/tool_conf.xml.sample Fri Aug 28 15:59:16 2009 -0400 @@ -331,9 +331,13 @@ <tool file="metag_tools/megablast_xml_parser.xml" /> <tool file="metag_tools/blat_wrapper.xml" /> <tool file="metag_tools/mapping_to_ucsc.xml" /> - <tool file="sr_mapping/bwa_wrapper.xml" /> </section> <section name="Tracks" id="tracks"> <tool file="visualization/genetrack.xml" /> </section> + <section name="SAM Tools" id="samtools"> + <tool file="samtools/sam_to_bam.xml" /> + <tool file="samtools/sam_merge.xml" /> + <tool file="samtools/sam_pileup.xml" /> + </section> </toolbox> diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_merge.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_merge.py Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,21 @@ +#! /usr/bin/python + +import os, sys + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + infile = sys.argv[1] + outfile = sys.argv[2] + if len( sys.argv ) < 3: + stop_err( 'No files to merge' ) + filenames = sys.argv[3:] + cmd1 = 'samtools merge %s %s %s' % (outfile, infile, ' '.join(filenames)) + try: + os.system(cmd1) + except Exception, eq: + stop_err('Error running SAMtools merge tool\n' + str(eq)) + +if __name__ == "__main__" : __main__() \ No newline at end of file diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_merge.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_merge.xml Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,32 @@ +<tool id="sam_merge" name="Merge BAM Files" version="1.0.0"> + <description>merges BAM files together</description> + <command interpreter="python"> + sam_merge.py + $input1 + $output1 + $input2 + #for $i in $inputs + ${i.input} + #end for + </command> + <inputs> + <param name="input1" label="First file" type="data" format="bam" /> + <param name="input2" label="with file" type="data" format="bam" help="Need to add more files? Use controls below." /> + <repeat name="inputs" title="Input Files"> + <param name="input" label="Add file" type="data" format="bam" /> + </repeat> + </inputs> + <outputs> + <data name="output1" format="bam" /> + </outputs> + <!-- bam files are binary and not sniffable so can't be uploaded without being corrupted, so no tests --> + <help> + +**What it does** + +This tool uses SAMTools_' merge command to merge any number of BAM files together into one BAM file. + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + + </help> +</tool> diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_merge_code.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_merge_code.py Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,35 @@ +import sets +from galaxy.tools.parameters import DataToolParameter + +def validate_input( trans, error_map, param_values, page_param_map ): + dbkeys = sets.Set() + data_param_names = sets.Set() + data_params = 0 + for name, param in page_param_map.iteritems(): + if isinstance( param, DataToolParameter ): + # for each dataset parameter + if param_values.get(name, None) != None: + dbkeys.add( param_values[name].dbkey ) + data_params += 1 + # check meta data +# try: +# param = param_values[name] +# startCol = int( param.metadata.startCol ) +# endCol = int( param.metadata.endCol ) +# chromCol = int( param.metadata.chromCol ) +# if param.metadata.strandCol is not None: +# strandCol = int ( param.metadata.strandCol ) +# else: +# strandCol = 0 +# except: +# error_msg = "The attributes of this dataset are not properly set. " + \ +# "Click the pencil icon in the history item to set the chrom, start, end and strand columns." +# error_map[name] = error_msg + data_param_names.add( name ) + if len( dbkeys ) > 1: + for name in data_param_names: + error_map[name] = "All datasets must belong to same genomic build, " \ + "this dataset is linked to build '%s'" % param_values[name].dbkey + if data_params != len(data_param_names): + for name in data_param_names: + error_map[name] = "A dataset of the appropriate type is required" \ No newline at end of file diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_pileup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_pileup.py Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,89 @@ +#! /usr/bin/python + +""" +Creates a pileup file from a bam file and a reference. + +usage: %prog [options] + -i, --input1=i: bam file + -o, --output1=o: Output pileup + -r, --ref=r: Reference file type + -n, --ownFile=n: User-supplied fasta reference file + -d, --dbkey=d: dbkey of user-supplied file + -x, --indexDir=x: Index directory + -b, --bamIndex=b: BAM index file + +usage: %prog input1 output1 ref_type refFile ownFile dbkey index_dir bam_index +""" + +import os, sys, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): + seq_file = "%s/sam_fa_indices.loc" % GALAXY_DATA_INDEX_DIR + seq_path = '' + for line in open( seq_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( "#" ) and line.startswith( 'index' ): + fields = line.split( '\t' ) + if len( fields ) < 3: + continue + if fields[1] == dbkey: + seq_path = fields[2].strip() + break + return seq_path + +def __main__(): + #Parse Command Line + options, args = doc_optparse.parse( __doc__ ) + seq_path = check_seq_file( options.dbkey, options.indexDir ) + tmp_dir = tempfile.gettempdir() + os.chdir(tmp_dir) + tmpf0 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf0bam = '%s.bam' % tmpf0.name + tmpf0bambai = '%s.bam.bai' % tmpf0.name + tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf1fai = '%s.fai' % tmpf1.name + cmd1 = None + cmd2 = 'cp %s %s; cp %s %s' % (options.input1, tmpf0bam, options.bamIndex, tmpf0bambai) + cmd3 = 'samtools pileup -f %s %s > %s 2> /dev/null' + if options.ref =='indexed': + full_path = "%s.fai" % seq_path + if not os.path.exists( full_path ): + stop_err( "No sequences are available for '%s', request them by reporting this error." % options.dbkey ) + cmd3 = cmd3 % (seq_path, tmpf0bam, options.output1) + elif options.ref == 'history': + cmd1 = 'cp %s %s; cp %s.fai %s' % (options.ownFile, tmpf1.name, options.ownFile, tmpf1fai) + cmd3 = cmd3 % (tmpf1.name, tmpf0bam, options.output1) + # index reference if necessary + if cmd1: + try: + os.system(cmd1) + except Exception, eq: + stop_err('Error moving reference sequence\n' + str(eq)) + # copy bam index to working directory + try: + os.system(cmd2) + except Exception, eq: + stop_err('Error moving files to temp directory\n' + str(eq)) + # perform pileup command + try: + os.system(cmd3) + except Exception, eq: + stop_err('Error running SAMtools merge tool\n' + str(eq)) + # clean up temp files + tmpf1.close() + tmpf0.close() + if os.path.exists(tmpf0bam): + os.remove(tmpf0bam) + if os.path.exists(tmpf0bambai): + os.remove(tmpf0bambai) + if os.path.exists(tmpf1fai): + os.remove(tmpf0bambai) + +if __name__ == "__main__" : __main__() diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_pileup.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_pileup.xml Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,50 @@ +<tool id="sam_pileup" name="SAM Pileup Format" version="1.0.0"> + <description>generates the pileup format for a provided BAM file</description> + <command interpreter="python"> + sam_pileup.py + --input1=$input1 + --output=$output1 + --ref=$refOrHistory.reference + #if $refOrHistory.reference == "history": + --ownFile=$refOrHistory.ownFile + #else: + --ownFile="None" + #end if + --dbkey=${input1.metadata.dbkey} + --indexDir=${GALAXY_DATA_INDEX_DIR} + --bamIndex=${input1.metadata.bam_index} + </command> + <inputs> + <conditional name="refOrHistory"> + <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="sam, bam" label="Select the BAM file to generate the pileup file for" /> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="output1" /> + </outputs> + <!-- tests are not possible because bam is a non-sniffable binary format and cannot be uploaded without being corrupted --> + <help> + +**What it does** + +Uses SAMTools_' pileup command to produce a file in the pileup format based on the provided BAM file. + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + + </help> +</tool> + + diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_to_bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_to_bam.py Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,85 @@ +#! /usr/bin/python + +""" +Converts SAM data to BAM format. + +usage: %prog [options] + -i, --input1=i: SAM file to be converted + -d, --dbkey=d: dbkey value + -r, --ref_file=r: Reference file if choosing from history + -o, --output1=o: BAM output + -x, --index_dir=x: Index directory + +usage: %prog input_file dbkey ref_list output_file +""" + +import os, sys, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): + seq_file = "%s/sam_fa_indices.loc" % GALAXY_DATA_INDEX_DIR + seq_path = '' + for line in open( seq_file ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( "#" ) and line.startswith( 'index' ): + fields = line.split( '\t' ) + if len( fields ) < 3: + continue + if fields[1] == dbkey: + seq_path = fields[2].strip() + break + return seq_path + +def __main__(): + #Parse Command Line + options, args = doc_optparse.parse( __doc__ ) + seq_path = check_seq_file( options.dbkey, options.index_dir ) + tmp_dir = tempfile.gettempdir() + os.chdir(tmp_dir) + tmpf1 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf1fai = '%s.fai' % tmpf1.name + tmpf2 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf3 = tempfile.NamedTemporaryFile(dir=tmp_dir) + tmpf3bam = '%s.bam' % tmpf3.name + if options.ref_file == "None": + full_path = "%s.fai" % seq_path + if not os.path.exists( full_path ): + stop_err( "No sequences are available for '%s', request them by reporting this error." % options.dbkey ) + cmd1 = "cp %s %s; cp %s %s" % (seq_path, tmpf1.name, full_path, tmpf1fai) + else: + cmd1 = "cp %s %s; samtools faidx %s 2>/dev/null" % (options.ref_file, tmpf1.name, tmpf1.name) + cmd2 = "samtools view -bt %s -o %s %s 2>/dev/null" % (tmpf1fai, tmpf2.name, options.input1) + cmd3 = "samtools sort %s %s 2>/dev/null" % (tmpf2.name, tmpf3.name) + cmd4 = "cp %s %s" % (tmpf3bam, options.output1) + # either create index based on fa file or copy provided index to temp directory + try: + os.system(cmd1) + except Exception, eq: + stop_err("Error creating the reference list index.\n" + str(eq)) + # create original bam file + try: + os.system(cmd2) + except Exception, eq: + stop_err("Error running view command.\n" + str(eq)) + # sort original bam file to produce sorted output bam file + try: + os.system(cmd3) + os.system(cmd4) + except Exception, eq: + stop_err("Error sorting data and creating output file.\n" + str(eq)) + # cleanup temp files + tmpf1.close() + tmpf2.close() + tmpf3.close() + if os.path.exists(tmpf1fai): + os.remove(tmpf1fai) + if os.path.exists(tmpf3bam): + os.remove(tmpf3bam) + +if __name__=="__main__": __main__() diff -r d261f41a2a03 -r 36f438ce1f82 tools/samtools/sam_to_bam.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/samtools/sam_to_bam.xml Fri Aug 28 15:59:16 2009 -0400 @@ -0,0 +1,59 @@ +<tool id="sam_to_bam" name="SAM-to-BAM" version="1.0.0"> + <description>converts SAM format to BAM format</description> + <command interpreter="python"> + sam_to_bam.py + --input1=$source.input1 + --dbkey=${input1.metadata.dbkey} + #if $source.indexSource == "history": + --ref_file=$ref_file + #else + --ref_file="None" + #end if + --output1=$output1 + --index_dir=${GALAXY_DATA_INDEX_DIR} + </command> + <inputs> + <conditional name="source"> + <param name="indexSource" type="select" label="Choose the source for the reference list"> + <option value="built_in">Built-in</option> + <option value="history">History</option> + </param> + <when value="built_in"> + <param name="input1" type="data" format="sam" label="SAM File to Convert"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="sam" label="SAM File to Convert" /> + <param name="ref_file" type="data" format="fasta" label="Choose the reference file" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="bam"/> + </outputs> + <tests> + <test> + <param name="indexSource" value="history" /> + <param name="input1" value="sam_to_bam_in1.sam" ftype="sam" /> + <param name="ref_file" value="chrM.fa" ftype="fasta" /> + <output name="output1" file="sam_to_bam_out1.bam" /> + </test> + <test> + <param name="indexSource" value="built_in" /> + <param name="input1" value="sam_to_bam_in2.sam" ftype="sam" dbkey="chrM" /> + <param name="ref_file" value="chrM.fa" ftype="fasta" /> + <output name="output1" file="sam_to_bam_out2.bam" /> + </test> + </tests> + <help> + +**What it does** + +This tool uses the SAMTools_ toolkit to produce a BAM file based on a sorted input SAM file. + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + + </help> +</tool>