[hg] galaxy 2767: merge

25 Sep 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/93b8b571a9fa
changeset: 2767:93b8b571a9fa
user:      Kanwei Li <kanwei@gmail.com>
date:      Thu Sep 24 16:16:56 2009 -0400
description:
merge

0 file(s) affected in this change:


diffs (595 lines):

diff -r 9e0c6e9b9dbb -r 93b8b571a9fa lib/galaxy/web/controllers/tracks.py

--- a/lib/galaxy/web/controllers/tracks.py	Thu Sep 24 16:15:57 2009 -0400
+++ b/lib/galaxy/web/controllers/tracks.py	Thu Sep 24 16:16:56 2009 -0400
@@ -48,9 +48,6 @@
 # FIXME: hardcoding this for now, but it should be derived from the available
 #        converters
 browsable_types = set( ["wig" ] )
-
-# For natural sort
-NUM_RE = re.compile('([0-9]+)')
 
 class TracksController( BaseController ):
     """
@@ -122,8 +119,14 @@
         """
         Returns a naturally sorted list of chroms/contigs for the given dbkey
         """
+        def check_int(s):
+            if s.isdigit():
+                return int(s)
+            else:
+                return s
+            
         def split_by_number(s):
-            return [ int(c) if c.isdigit() else c for c in NUM_RE.split(s) ]
+            return [ check_int(c) for c in re.split('([0-9]+)', s) ]
         
         chroms = self._chroms( trans, dbkey )
         to_sort = [{ 'chrom': chrom, 'len': length } for chrom, length in chroms.iteritems()]
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa scripts/cleanup_datasets/cleanup_datasets.py
--- a/scripts/cleanup_datasets/cleanup_datasets.py	Thu Sep 24 16:15:57 2009 -0400
+++ b/scripts/cleanup_datasets/cleanup_datasets.py	Thu Sep 24 16:16:56 2009 -0400
@@ -204,32 +204,42 @@
 
 def delete_datasets( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ):
     # Marks datasets as deleted if associated items are all deleted.
-    print '# The following datasets have been marked deleted'
+    print "######### Starting delete_datasets #########\n"
     start = time.clock()
     if force_retry:
-        datasets = app.model.Dataset.filter( app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time ).all() + app.model.Dataset.filter( app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time ).all()
-    else:
-        datasets = app.model.Dataset.filter( and_( app.model.HistoryDatasetAssociation.table.c.deleted==True,
-                                    app.model.Dataset.table.c.deleted == False,
-                                    app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time ) ).all()
-        datasets = datasets + app.model.Dataset.filter( and_( app.model.LibraryDatasetDatasetAssociation.table.c.deleted==True,
-                                    app.model.Dataset.table.c.deleted == False,
-                                    app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time ) ).all()
+        history_datasets = app.model.Dataset.options( eagerload( "history_associations" ) ) \
+                                            .filter( app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time ).all()
+        library_datasets = app.model.Dataset.options( eagerload( "library_associations" ) ) \
+                                            .filter( app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time ).all()
+    else:                                    
+        history_datasets = app.model.Dataset.filter_by( deleted = False ) \
+                                            .options( eagerload( "history_associations" ) ) \
+                                            .filter( and_( app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time,
+                                                           app.model.HistoryDatasetAssociation.table.c.deleted==True ) ).all()
+        library_datasets = app.model.Dataset.filter_by( deleted = False ) \
+                                            .options( eagerload( "library_associations" ) ) \
+                                            .filter( and_( app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time,
+                                                           app.model.LibraryDatasetDatasetAssociation.table.c.deleted==True ) ).all()
+    print "Time to query history and library datasets: ",  time.clock() - start, "\n"
+    print "Processing ", len( history_datasets ), " history datasets and ", len( library_datasets ), " library datasets...\n\n"
+    datasets = history_datasets + library_datasets
     skip = []
     deleted_dataset_count = 0
     deleted_instance_count = 0
     for dataset in datasets:
+        print "Processing dataset id:", dataset.id, "\n"
         if dataset.id not in skip and _dataset_is_deletable( dataset ):
             deleted_dataset_count += 1
-            print "Dataset:", dataset.id
             for dataset_instance in dataset.history_associations + dataset.library_associations:
-                print "\tAssociated Dataset instance:", dataset_instance.__class__.__name__, dataset_instance.id
-                _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children = True, info_only = info_only )
+                print "Associated Dataset instance: ", dataset_instance.__class__.__name__, dataset_instance.id, "\n"
+                _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children=True, info_only=info_only, is_deletable=True )
                 deleted_instance_count += 1
         skip.append( dataset.id )
-    print
-    print '# Examined %d datasets, marked %d as deleted and purged %d dataset instances\n' % ( len( skip ), deleted_dataset_count, deleted_instance_count )
-    print "Elapsed time: ", time.clock() - start, "\n"
+        print "Time to process dataset id: ", dataset.id, " - ", time.clock() - start, "\n\n"
+    print "Time to mark datasets deleted: ", time.clock() - start, "\n\n"
+    print "Examined %d datasets, marked %d as deleted and purged %d dataset instances\n" % ( len( skip ), deleted_dataset_count, deleted_instance_count )
+    print "Total elapsed time: ", time.clock() - start, "\n"
+    print "######### Finished delete_datasets #########\n"
 
 def purge_datasets( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ):
     # Purges deleted datasets whose update_time is older than cutoff_time.  Files may or may
@@ -262,15 +272,16 @@
     print "Elapsed time: ", stop - start, "\n"
 
 
-def _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children = True, info_only = False ):
-    #purging a dataset instance marks the instance as deleted, 
-    #and marks the dataset as deleted if it is not associated with another DatsetInstance that is not deleted
+def _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children=True, info_only=False, is_deletable=False ):
+    # A dataset_instance is either a HDA or an LDDA.  Purging a dataset instance marks the instance as deleted, 
+    # and marks the associated dataset as deleted if it is not associated with another active DatsetInstance.
     if not info_only:
         dataset_instance.mark_deleted( include_children = include_children )
         dataset_instance.clear_associated_files()
         dataset_instance.flush()
         dataset_instance.dataset.refresh()
-    if _dataset_is_deletable( dataset_instance.dataset ):
+    if is_deletable or _dataset_is_deletable( dataset_instance.dataset ):
+        # Calling methods may have already checked _dataset_is_deletable, if so, is_deletable should be True
         _delete_dataset( dataset_instance.dataset, app, remove_from_disk, info_only = info_only )
     #need to purge children here
     if include_children:
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa templates/root/history.mako
--- a/templates/root/history.mako	Thu Sep 24 16:15:57 2009 -0400
+++ b/templates/root/history.mako	Thu Sep 24 16:16:56 2009 -0400
@@ -81,7 +81,7 @@
 // Functionized so AJAX'd datasets can call them
 function initShowHide() {
 
-    // Load saved state and show as neccesary
+    // Load saved state and show as necessary
     try {
         var stored = $.jStore.store("history_expand_state");
         if (stored) {
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa test-data/cat_wrapper_out1.bed
--- a/test-data/cat_wrapper_out1.bed	Thu Sep 24 16:15:57 2009 -0400
+++ b/test-data/cat_wrapper_out1.bed	Thu Sep 24 16:16:56 2009 -0400
@@ -131,28 +131,3 @@
 chrX	152648964	152649196	NM_000425_cds_0_0_chrX_152648965_r	0	-
 chrX	152691446	152691471	AF101728_cds_0_0_chrX_152691447_f	0	+
 chrX	152694029	152694263	BC052303_cds_0_0_chrX_152694030_r	0	-
-chr1	147962006	147975713	NM_005997	0	-	147962192	147975670	0	6	574,145,177,115,153,160,	0,1543,7859,9048,9340,13547,
-chr1	147984101	148035079	BC007833	0	+	147984545	148033414	0	14	529,32,81,131,118,153,300,206,84,49,85,130,46,1668,	0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310,
-chr1	148077485	148111797	NM_002651	0	-	148078400	148111728	0	12	1097,121,133,266,124,105,110,228,228,45,937,77,	0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235,
-chr1	148185113	148187485	NM_002796	0	+	148185136	148187378	0	7	163,207,147,82,117,89,120,	0,416,877,1199,1674,1977,2252,
-chr2	118288484	118306183	NM_006773	0	+	118288583	118304530	0	14	184,285,144,136,101,200,115,140,162,153,114,57,178,1796,	0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903,
-chr2	118389378	118390700	BC005078	0	-	118390395	118390500	0	1	1322,	0,
-chr2	220108603	220116964	NM_001927	0	+	220108689	220116217	0	9	664,61,96,162,126,221,44,83,789,	0,1718,1874,2118,2451,2963,5400,7286,7572,
-chr2	220229182	220233943	NM_024536	0	-	220229609	220233765	0	4	1687,180,574,492,	0,1990,2660,4269,
-chr5	131170738	131357870	AF099740	0	-	131311206	131357817	0	31	112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248,	0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884,
-chr5	131424245	131426795	NM_000588	0	+	131424298	131426383	0	5	215,42,90,42,535,	0,313,1658,1872,2015,
-chr5	131556201	131590458	NM_004199	0	-	131556601	131582218	0	15	471,97,69,66,54,100,71,177,194,240,138,152,97,100,170,	0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087,
-chr5	131621285	131637046	NM_003687	0	+	131621326	131635821	0	7	134,152,82,179,164,118,1430,	0,4915,8770,13221,13609,14097,14331,
-chr6	108298214	108386086	NM_007214	0	-	108299600	108385906	0	21	1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304,	0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568,
-chr6	108593954	108616704	NM_003269	0	+	108594662	108615360	0	9	733,146,88,236,147,97,150,106,1507,	0,5400,8778,10445,12037,14265,14749,15488,21243,
-chr6	108639410	108689143	NM_152827	0	-	108640045	108688818	0	3	741,125,487,	0,2984,49246,
-chr6	108722790	108950942	NM_145315	0	+	108722976	108950321	0	13	325,224,52,102,131,100,59,83,71,101,141,114,750,	0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402,
-chr7	113320332	113924911	AK131266	0	+	113862563	113893433	0	20	285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124,	0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455,
-chr7	116511232	116557294	NM_003391	0	-	116512159	116556994	0	5	1157,265,278,227,383,	0,20384,37843,43339,45679,
-chr7	116713967	116902666	NM_000492	0	+	116714099	116901113	0	27	185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754,	0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945,
-chr7	116944658	117107512	AF377960	0	-	116945541	116979926	0	23	1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173,	0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681,
-chr8	118880786	119193239	NM_000127	0	-	118881131	119192466	0	11	531,172,161,90,96,119,133,120,108,94,1735,	0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718,
-chr9	128763240	128783870	NM_174933	0	+	128764156	128783586	0	12	261,118,74,159,76,48,56,63,129,117,127,370,	0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260,
-chr9	128787362	128789566	NM_014908	0	-	128787519	128789136	0	1	2204,	0,
-chr9	128789530	128848928	NM_015354	0	+	128789552	128848511	0	44	54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594,	0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804,
-chr9	128849867	128870133	NM_020145	0	-	128850516	128869987	0	11	757,241,101,90,24,63,93,134,129,142,209,	0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057,
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/filters/catWrapper.py
--- a/tools/filters/catWrapper.py	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/filters/catWrapper.py	Thu Sep 24 16:16:56 2009 -0400
@@ -11,19 +11,28 @@
 def main():
     outfile = sys.argv[1]
     infile = sys.argv[2]
-    catfiles = sys.argv[3:]
+    
     try:
         fout = open(sys.argv[1],'w')
-    except Exxception, ex:
-        stop_err("Output file cannot be opened for writing\n" + str(ex))
+    except:
+        stop_err("Output file cannot be opened for writing.")
+        
     try:
         fin = open(sys.argv[2],'r')
-    except Exception, ex:
-        stop_err("Input file cannot be opened for reading\n" + str(ex))
-    cmdline = "cat %s %s > %s" % (infile, ' '.join(catfiles), outfile)
+    except:
+        stop_err("Input file cannot be opened for reading.")
+    
+    if len(sys.argv) < 4:
+        os.system("cp %s %s" %(infile,outfile))
+        sys.exit()
+    
+    cmdline = "cat %s " %(infile)
+    for inp in sys.argv[3:]:
+        cmdline = cmdline + inp + " "
+    cmdline = cmdline + ">" + outfile
     try:
         os.system(cmdline)
-    except Exception, ex:
-        stop_err("Error encountered with cat\n" + str(ex))
+    except:
+        stop_err("Error encountered with cat.")
         
 if __name__ == "__main__": main()
\ No newline at end of file
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/filters/catWrapper.xml
--- a/tools/filters/catWrapper.xml	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/filters/catWrapper.xml	Thu Sep 24 16:16:56 2009 -0400
@@ -1,19 +1,17 @@
-<tool id="cat1" name="Concatenate queries" version="1.0.1">
+<tool id="cat1" name="Concatenate queries">
     <description>tail-to-head</description>
-    <command interpreter="python">
-        catWrapper.py 
-        $out_file1 
+    <command interpreter="python">
+        catWrapper.py 
+        $out_file1 
         $input1
-        $input2
-        #for $q in $queries
-            ${q.input3}
-        #end for
+        #for $q in $queries
+            ${q.input2}
+        #end for
     </command>
     <inputs>
-        <param name="input1" type="data" label="First query to concatenate:"/>
-        <param name="input2" type="data" label="Second query to concatenate:"/>
-        <repeat name="queries" title="Additional query">
-            <param name="input3" type="data" label="Select" />
+        <param name="input1" type="data" label="Concatenate Query"/>
+        <repeat name="queries" title="Query">
+            <param name="input2" type="data" label="Select" />
         </repeat>
     </inputs>
     <outputs>
@@ -23,16 +21,14 @@
         <test>
             <param name="input1" value="1.bed"/>
             <param name="input2" value="2.bed"/>
-            <param name="input3" value="3.bed"/>
             <output name="out_file1" file="cat_wrapper_out1.bed"/>
         </test>
         <!--TODO: if possible, enhance the underlying test code to handle this test
-            the problem is multiple params with the same name "input3"
+            the problem is multiple params with the same name "input2"
         <test>
             <param name="input1" value="1.bed"/>
             <param name="input2" value="2.bed"/>
-            <param name="input3" value="3.bed"/>
-            <param name="input3" value="4.bed"/>
+            <param name="input2" value="3.bed"/>
             <output name="out_file1" file="cat_wrapper_out2.bed"/>
         </test>
         -->
@@ -62,12 +58,12 @@
 
     chr1  151242630  151242955  X  0  +
     chr1  151271715  151271999  Y  0  +
-    chr1  151278832  151279227  Z  0  -
-    
-and with Query2::
-
-    chr2  100000030  200000955  P  0  +
-    chr2  100000015  200000999  Q  0  +
+    chr1  151278832  151279227  Z  0  -
+    
+and with Query2::
+
+    chr2  100000030  200000955  P  0  +
+    chr2  100000015  200000999  Q  0  +
 
 will result in the following::
 
@@ -76,8 +72,8 @@
     chr1  151242630  151242955  X  0  +
     chr1  151271715  151271999  Y  0  +
     chr1  151278832  151279227  Z  0  -
-    chr2  100000030  200000955  P  0  +
-    chr2  100000015  200000999  Q  0  +
+    chr2  100000030  200000955  P  0  +
+    chr2  100000015  200000999  Q  0  +
 
     </help>
 </tool>
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/filters/sorter.py
--- a/tools/filters/sorter.py	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/filters/sorter.py	Thu Sep 24 16:16:56 2009 -0400
@@ -1,28 +1,44 @@
-# This script sorts a file based on the inputs: 
-# -cols		- column to sort on
-# -order	- ASC- or DESCending order
-# -i		- input filename 
-# -o		- output filename
+"""
+Sorts tabular data on one or more columns.
+
+usage: %prog [options]
+   -i, --input=i: Tabular file to be sorted
+   -o, --out_file1=o: Sorted output file
+   -c, --column=c: First column to sort on
+   -s, --style=s: Sort style (numerical or alphabetical)
+   -r, --order=r: Order (ASC or DESC)
+
+usage: %prog input out_file1 column style order [column style ...]
+"""
 
 import os, re, string, sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
 
 def stop_err( msg ):
     sys.stderr.write( "%s\n" % msg )
     sys.exit()
 
 def main():
+    #Parse Command Line
+    options, args = doc_optparse.parse( __doc__ )
     try:
-        inputfile = sys.argv[1]
-        outputfile = '-o %s' % sys.argv[2]
-        order = ('', '-r')[sys.argv[3] == 'DESC']
-        sort_type = ('','-n')[sys.argv[4] == 'num']
-        columns = sys.argv[5:]
-        cols = [ '-k%s,%s'%(n, n) for n in columns ]
+        inputfile = options.input
+        outputfile = '-o %s' % options.out_file1
+        order = ('', '-r')[options.order == 'DESC']
+        columns = [options.column]
+        styles = [('','n')[options.style == 'num']]
+        col_styles = sys.argv[6:]
+        if len(col_styles) > 1:
+            columns.extend([col_styles[i] for i in range(0,len(col_styles),2)])
+            styles.extend([('','n')[col_styles[i] == 'num'] for i in range(1,len(col_styles),2)])
+        cols = [ '-k%s,%s%s'%(columns[i], columns[i], styles[i]) for i in range(len(columns)) ]
     except Exception, ex:
         stop_err('Error parsing input parameters\n' + str(ex))
 
     # Launch sort.
-    cmd = "sort -f -t $'\t' %s %s %s %s %s" % (sort_type, ' '.join(cols), order, outputfile, inputfile)
+    cmd = "sort -f -t $'\t' %s %s %s %s" % (order, ' '.join(cols), outputfile, inputfile)
     try:
         os.system(cmd)
     except Exception, ex:
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/filters/sorter.xml
--- a/tools/filters/sorter.xml	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/filters/sorter.xml	Thu Sep 24 16:16:56 2009 -0400
@@ -2,28 +2,33 @@
   <description>data in ascending or descending order</description>
   <command interpreter="python">
     sorter.py 
-      $input 
-      $out_file1 
-      $order 
-      $style
-      $firstcol
+      --input=$input 
+      --out_file1=$out_file1 
+      --column=$column
+      --style=$style
+      --order=$order 
       #for $col in $column_set:
-        ${col.column}
+        ${col.other_column}
+        ${col.other_style}
       #end for
   </command>
   <inputs>
     <param format="tabular" name="input" type="data" label="Sort Query" />
-    <param name="firstcol" label="on column" type="data_column" data_ref="input" accept_default="true" />
-    <repeat name="column_set" title="Column selection">
-      <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true" />
-    </repeat>
-    <param name="order" type="select" label="in">
-      <option value="DESC">Descending order</option>
-      <option value="ASC">Ascending order</option>
-    </param>
+    <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true" />
     <param name="style" type="select" label="with flavor">
       <option value="num">Numerical sort</option>
       <option value="alpha">Alphabetical sort</option>
+    </param>
+    <repeat name="column_set" title="Column selection">
+      <param name="other_column" label="on column" type="data_column" data_ref="input" accept_default="true" />
+      <param name="other_style" type="select" label="with flavor">
+        <option value="num">Numerical sort</option>
+        <option value="alpha">Alphabetical sort</option>
+      </param>
+    </repeat>
+    <param name="order" type="select" label="everything in">
+      <option value="DESC">Descending order</option>
+      <option value="ASC">Ascending order</option>
     </param>
   </inputs>
   <outputs>
@@ -32,18 +37,20 @@
   <tests>
     <test>
       <param name="input" value="sort_in1.bed"/>
-      <param name="firstcol" value="1"/>
-      <param name="column" value="3" />
+      <param name="column" value="1"/>
+      <param name="style" value="num"/>
+      <param name="other_column" value="3"/>
+      <param name="other_style" value="num"/>
       <param name="order" value="ASC"/>
-      <param name="style" value="num"/>
       <output name="out_file1" file="sort_out1.bed"/>
     </test>
     <test>
       <param name="input" value="sort_in1.bed"/>
-      <param name="firstcol" value="3" />
-      <param name="column" value="1"/>
+      <param name="column" value="3"/>
+      <param name="style" value="alpha"/>
+      <param name="other_column" value="1"/>
+      <param name="other_style" value="alpha"/>
       <param name="order" value="ASC"/>
-      <param name="style" value="alpha"/>
       <output name="out_file1" file="sort_out2.bed"/>
     </test>
   </tests>
@@ -64,9 +71,53 @@
 
 -----
 
-**Example**
+**Examples**
 
 The list of numbers 4,17,3,5 collates to 3,4,5,17 by numerical sorting, while it collates to 17,3,4,5 by alphabetical sorting.
 
+Sorting the following::
+
+  Q     d    7   II    jhu  45
+  A     kk   4   I     h    111
+  Pd    p    1   ktY   WS   113
+  A     g    10  H     ZZ   856
+  A     edf  4   tw    b    234
+  BBB   rt   10  H     ZZ   100
+  A     rew  10  d     b    1111
+  C     sd   19  YH    aa   10
+  Hah   c    23  ver   bb   467
+  MN    gtr  1   a     X    32
+  N     j    9   a     T    205
+  BBB   rrf  10  b     Z    134
+  odfr  ws   6   Weg   dew  201
+  C     f    3   WW    SW   34
+  A     jhg  4   I     b    345
+  Pd    gf   7   Gthe  de   567
+  rS    hty  90  YY    LOp  89
+  A     g    10  H     h    43
+  A     g    4   I     h    500
+
+on columns 1 (alpha), 3 (num), and 6 (num) in ascending order will yield::
+
+  A     kk   4   I     h    111
+  A     edf  4   tw    b    234
+  A     jhg  4   I     b    345
+  A     g    4   I     h    500
+  A     g    10  H     h    43
+  A     g    10  H     ZZ   856
+  A     rew  10  d     b    1111
+  BBB   rt   10  H     ZZ   100
+  BBB   rrf  10  b     Z    134
+  C     f    3   WW    SW   34
+  C     sd   19  YH    aa   10
+  Hah   c    23  ver   bb   467
+  MN    gtr  1   a     X    32
+  N     j    9   a     T    205
+  odfr  ws   6   Weg   dew  201
+  Pd    p    1   ktY   WS   113
+  Pd    gf   7   Gthe  de   567
+  Q     d    7   II    jhu  45
+  rS    hty  90  YY    LOp  89
+
   </help>
 </tool>
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/next_gen_conversion/solid_to_fastq.py
--- a/tools/next_gen_conversion/solid_to_fastq.py	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/next_gen_conversion/solid_to_fastq.py	Thu Sep 24 16:16:56 2009 -0400
@@ -30,7 +30,7 @@
     tmpf = tempfile.NamedTemporaryFile()    #forward reads
     if options.input3 != "None" and options.input4 != "None":
         tmpr = tempfile.NamedTemporaryFile()    #reverse reads
-        cmd1 = "bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s %s 2>&1" %(tmpf.name,tmpr.name,None,options.input1,options.input2,options.input3,options.input4)
+        cmd1 = "%s/bwa_solid2fastq_modified.pl 'yes' %s %s %s %s %s %s %s 2>&1" %(os.path.split(sys.argv[0])[0], tmpf.name,tmpr.name,None,options.input1,options.input2,options.input3,options.input4)
         try:
             os.system(cmd1)
             os.system('gunzip -c %s >> %s' %(tmpf.name,options.output1))
@@ -40,7 +40,7 @@
         tmpr.close()
     # if single-end data
     else:
-        cmd1 = "bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s %s 2>&1" % (tmpf.name, None, None, options.input1, options.input2, None, None)
+        cmd1 = "%s/bwa_solid2fastq_modified.pl 'no' %s %s %s %s %s %s %s 2>&1" % (os.path.split(sys.argv[0])[0], tmpf.name, None, None, options.input1, options.input2, None, None)
         try:
             os.system(cmd1)
             os.system('gunzip -c %s >> %s' % (tmpf.name, options.output1))
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/samtools/sam_pileup.xml
--- a/tools/samtools/sam_pileup.xml	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/samtools/sam_pileup.xml	Thu Sep 24 16:16:56 2009 -0400
@@ -56,13 +56,13 @@
     </param>
     <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
     <conditional name="c">
-      <param name="consensus" type="select" label="Whether or not to call the consensus sequence using the MAQ consensus model">
-        <option value="no">Don't use MAQ consensus model</option>
-        <option value="yes">Use the MAQ consensus model</option>
+      <param name="consensus" type="select" label="Call consensus according to MAQ model?">
+        <option selected="true" value="no">No</option>
+        <option value="yes">Yes</option>
       </param> 
       <when value="no" />
       <when value="yes">
-        <param name="theta" type="float" value="0.85" label="Theta paramter (error dependency coefficient) in the MAQ consensus calling model" />
+        <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
         <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
         <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
         <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
@@ -77,9 +77,68 @@
     	
 **What it does**   	
 
-Uses SAMTools_' pileup command to produce a file in the pileup format based on the provided BAM file. 
+Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generated two types of pileup datasets depending on chosen options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
 
 .. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+------
+
+**Types of pileup datasets**
+
+The description of pileup format below is largely based on information that can be found on SAMTools_ documentation page. The 6- and 10-column variants are described below.
+
+.. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
+
+**Six column pileup**::
+
+    1    2  3  4        5        6
+ ---------------------------------   
+ chrM  412  A  2       .,       II
+ chrM  413  G  4     ..t,     IIIH
+ chrM  414  C  4     ...a     III2
+ chrM  415  C  4     TTTt     III7
+   
+where::
+
+  Column Definition
+ ------- ----------------------------
+       1 Chromosome
+       2 Position (1-based)
+       3 Reference base at that position
+       4 Coverage (# reads aligning over that position)
+       5 Bases within reads where (see Galaxy wiki for more info)
+       6 Quality values (phred33 scale, see Galaxy wiki for more)
+       
+**Ten column pileup**
+
+The `ten-column`__ pileup incoroporates additional consensus information generated with *-c* option of *samtools pileup* command::
+
+
+    1    2  3  4   5   6   7   8       9       10
+ ------------------------------------------------
+ chrM  412  A  A  75   0  25  2       .,       II
+ chrM  413  G  G  72   0  25  4     ..t,     IIIH
+ chrM  414  C  C  75   0  25  4     ...a     III2
+ chrM  415  C  T  75  75  25  4     TTTt     III7
+
+where::
+
+  Column Definition
+ ------- ----------------------------
+       1 Chromosome
+       2 Position (1-based)
+       3 Reference base at that position
+       4 Consensus bases
+       5 Consensus quality
+       6 SNP quality
+       7 Maximum mapping quality
+       8 Coverage (# reads aligning over that position)
+       9 Bases within reads where (see Galaxy wiki for more info)
+      10 Quality values (phred33 scale, see Galaxy wiki for more)
+
+
+.. __: http://samtools.sourceforge.net/cns0.shtml
+
 
   </help>
 </tool>
diff -r 9e0c6e9b9dbb -r 93b8b571a9fa tools/samtools/sam_to_bam.xml
--- a/tools/samtools/sam_to_bam.xml	Thu Sep 24 16:15:57 2009 -0400
+++ b/tools/samtools/sam_to_bam.xml	Thu Sep 24 16:16:56 2009 -0400
@@ -51,7 +51,7 @@
 
 **What it does**
 
-This tool uses the SAMTools_ toolkit to produce a BAM file based on a sorted input SAM file.
+This tool uses the SAMTools_ toolkit to produce a indexed BAM file based on a sorted input SAM file.
 
 .. _SAMTools: http://samtools.sourceforge.net/samtools.shtml

    

Nate Coraor

tags

participants (1)