[galaxy-commits] commit/galaxy-central: 2 new changesets

19 May 2014

2 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/c559805d44df/
Changeset:   c559805d44df
User:        davebgx
Date:        2014-05-19 19:01:46
Summary:     Migrate 22 tools from the distribution to the tool shed.
Affected #:  67 files

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 lib/tool_shed/galaxy_install/migrate/versions/0011_tools.py

--- /dev/null
+++ b/lib/tool_shed/galaxy_install/migrate/versions/0011_tools.py
@@ -0,0 +1,62 @@
+"""
+The following tools have been eliminated from the distribution:
+
+1:  Profile Annotations for a set of genomic intervals
+2:  Polymorphism of the Reads
+3:  Coverage of the Reads in wiggle format
+4:  Canonical Correlation Analysis
+5:  Convert Color Space to Nucleotides
+6:  Compute sequence length
+7:  Concatenate FASTA alignment by species
+8:  Filter sequences by length
+9:  FASTA-to-Tabular converter
+10: FASTQSOLEXA-to-FASTA-QUAL extracts sequences and quality scores from FASTQSOLEXA data
+11: Kernel Canonical Correlation Analysis
+12: Kernel Principal Component Analysis
+13: Format mapping data as UCSC custom track
+14: Megablast compare short reads against htgs, nt, and wgs databases
+15: Parse blast XML output
+16: Principal Component Analysis
+17: RMAP for Solexa Short Reads Alignment
+18: RMAPQ for Solexa Short Reads Alignment with Quality Scores
+19: Histogram of high quality score reads
+20: Build base quality distribution
+21: Select high quality segments
+22: Tabular-to-FASTA
+
+The tools are now available in the repositories respectively:
+
+1:  annotation_profiler
+2:  blat_coverage_report
+3:  blat_mapping
+4:  canonical_correlation_analysis
+5:  convert_solid_color2nuc
+6:  fasta_compute_length
+7:  fasta_concatenate_by_species
+8:  fasta_filter_by_length
+9:  fasta_to_tabular
+10: fastqsolexa_to_fasta_qual
+11: kernel_canonical_correlation_analysis
+12: kernel_principal_component_analysis
+13: mapping_to_ucsc
+14: megablast_wrapper
+15: megablast_xml_parser
+16: principal_component_analysis
+17: rmap
+18: rmapq
+19: short_reads_figure_high_quality_length
+20: short_reads_figure_score
+21: short_reads_trim_seq
+22: tabular_to_fasta
+
+from the main Galaxy tool shed at http://toolshed.g2.bx.psu.edu
+and will be installed into your local Galaxy instance at the 
+location discussed above by running the following command.
+
+"""
+
+def upgrade( migrate_engine ):
+    print __doc__
+    
+def downgrade( migrate_engine ):
+    pass

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 scripts/migrate_tools/0011_tools.sh
--- /dev/null
+++ b/scripts/migrate_tools/0011_tools.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+cd `dirname $0`/../..
+python ./scripts/migrate_tools/migrate_tools.py 0011_tools.xml $@

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 scripts/migrate_tools/0011_tools.xml
--- /dev/null
+++ b/scripts/migrate_tools/0011_tools.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<toolshed name="toolshed.g2.bx.psu.edu">
+    <repository changeset_revision="3b33da018e74" owner="devteam" name="annotation_profiler" description="Profile Annotations for a set of genomic intervals">
+        <tool file="annotation_profiler/annotation_profiler.xml" id="Annotation_Profiler_0" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="30f0948c649c" owner="devteam" name="blat_coverage_report" description="Polymorphism of the Reads">
+        <tool id="generate_coverage_report" version="1.0.0" file="metag_tools/blat_coverage_report.xml" />
+    </repository>
+    <repository changeset_revision="807e3e50845a" owner="devteam" name="blat_mapping" description="Coverage of the Reads in wiggle format">
+        <tool id="blat2wig" version="1.0.0" file="metag_tools/blat_mapping.xml" />
+    </repository>
+    <repository changeset_revision="9bc0c48a027f" owner="devteam" name="canonical_correlation_analysis" description="Canonical Correlation Analysis">
+        <tool file="multivariate_stats/cca.xml" id="cca1" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="ab28e7de2db3" owner="devteam" name="convert_solid_color2nuc" description="Convert Color Space to Nucleotides">
+        <tool id="color2nuc" version="1.0.0" file="metag_tools/convert_SOLiD_color2nuc.xm" />
+    </repository>
+    <repository changeset_revision="ece409f6573c" owner="devteam" name="fasta_compute_length" description="Compute sequence length">
+        <tool file="fasta_tools/fasta_compute_length.xml" id="fasta_compute_length" version="__VERSION__" />
+    </repository>
+    <repository changeset_revision="2126e1b833a2" owner="devteam" name="fasta_concatenate_by_species" description="Concatenate FASTA alignment by species">
+        <tool file="fasta_tools/fasta_concatenate_by_species.xml" id="fasta_concatenate0" version="0.0.0" />
+    </repository>
+    <repository changeset_revision="16679a7f554a" owner="devteam" name="fasta_filter_by_length" description="Filter sequences by length">
+        <tool file="fasta_tools/fasta_filter_by_length.xml" id="fasta_filter_by_length" version="1.1" />
+    </repository>
+    <repository changeset_revision="9d189d08f2ad" owner="devteam" name="fasta_to_tabular" description="FASTA-to-Tabular converter">
+        <tool file="fasta_tools/fasta_to_tabular.xml" id="fasta2tab" version="1.1.0" />
+    </repository>
+    <repository changeset_revision="ef23c03d7497" owner="devteam" name="fastqsolexa_to_fasta_qual" description="FASTQSOLEXA-to-FASTA-QUAL extracts sequences and quality scores from FASTQSOLEXA data">
+        <tool id="fastqsolexa_to_fasta_qual" version="1.0.0" file="metag_tools/fastqsolexa_to_fasta_qual.xml" />
+    </repository>
+    <repository changeset_revision="7a092113eb8c" owner="devteam" name="kernel_canonical_correlation_analysis" description="Kernel Canonical Correlation Analysis">
+        <tool file="multivariate_stats/kcca.xml" id="kcca1" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="e9ebd4bfbdfc" owner="devteam" name="kernel_principal_component_analysis" description="Kernel Principal Component Analysis">
+        <tool file="multivariate_stats/kpca.xml" id="kpca1" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="601abbd22cea" owner="devteam" name="mapping_to_ucsc" description="Format mapping data as UCSC custom track">
+        <tool id="mapToUCSC" version="1.0.0" file="metag_tools/mapping_to_ucsc.xml" />
+    </repository>
+    <repository changeset_revision="dc7b4acb3fa6" owner="devteam" name="megablast_wrapper" description="Megablast compare short reads against htgs, nt, and wgs databases">
+        <tool file="metag_tools/megablast_wrapper.xml" id="megablast_wrapper" version="1.2.0" />
+    </repository>
+    <repository changeset_revision="03ca082aeb2e" owner="devteam" name="megablast_xml_parser" description="Parse blast XML output">
+        <tool file="metag_tools/megablast_xml_parser.xml" id="megablast_xml_parser" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="f568051cdf2e" owner="devteam" name="principal_component_analysis" description="Principal Component Analysis">
+        <tool file="multivariate_stats/pca.xml" id="pca1" version="1.0.2" />
+    </repository>
+    <repository changeset_revision="ee49255302d8" owner="devteam" name="rmap" description="RMAP for Solexa Short Reads Alignment">
+        <tool id="rmap_wrapper" version="1.0.0" file="metag_tools/rmap_wrapper.xml" />
+    </repository>
+    <repository changeset_revision="f6e5bb5aa2f5" owner="devteam" name="rmapq" description="RMAPQ for Solexa Short Reads Alignment with Quality Scores">
+        <tool id="rmapq_wrapper" version="1.0.0" file="metag_tools/rmapq_wrapper.xml" />
+    </repository>
+    <repository changeset_revision="556ceed24699" owner="devteam" name="short_reads_figure_high_quality_length" description="Histogram of high quality score reads">
+        <tool id="hist_high_quality_score" version="1.0.0" file="metag_tools/short_reads_figure_high_quality_length.xml" />
+    </repository>
+    <repository changeset_revision="b52b9c7aabd9" owner="devteam" name="short_reads_figure_score" description="Build base quality distribution">
+        <tool file="metag_tools/short_reads_figure_score.xml" id="quality_score_distribution" version="1.0.2" />
+    </repository>
+    <repository changeset_revision="f17a1585733b" owner="devteam" name="short_reads_trim_seq" description="Select high quality segments">
+        <tool file="metag_tools/short_reads_trim_seq.xml" id="trim_reads" version="1.0.0" />
+    </repository>
+    <repository changeset_revision="0b4e36026794" owner="devteam" name="tabular_to_fasta" description="Tabular-to-FASTA">
+        <tool file="fasta_tools/tabular_to_fasta.xml" id="tab2fasta" version="1.1.0" />
+    </repository>
+</toolshed>
\ No newline at end of file

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 scripts/tools/annotation_profiler/README.txt
--- a/scripts/tools/annotation_profiler/README.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-This file explains how to create annotation indexes for the annotation profiler tool. Annotation profiler indexes are an exceedingly simple binary format, 
-containing no header information and consisting of an ordered linear list of (start,stop encoded individually as '<I') regions which are covered by a UCSC table partitioned 
-by chromosome name. Genomic regions are merged by overlap / direct adjacency (e.g. a table having ranges of: 1-10, 6-12, 12-20 and 25-28 results in two merged ranges of: 1-20 and 25-28).
-
-Files are arranged like:
-/profiled_annotations/DBKEY/TABLE_NAME/
-                                       CHROMOSOME_NAME.covered
-                                       CHROMOSOME_NAME.total_coverage
-                                       CHROMOSOME_NAME.total_regions
-/profiled_annotations/DBKEY/
-                            DBKEY_tables.xml
-                            chromosomes.txt
-                            profiled_info.txt
-
-
-where CHROMOSOME_NAME.covered is the binary file, CHROMOSOME_NAME.total_coverage is a text file containing the integer count of bases covered by the 
-table and CHROMOSOME_NAME.total_regions contains the integer count of the number of regions found in CHROMOSOME_NAME.covered
-
-DBKEY_tables.xml should be appended to the annotation profile available table configuration file (tool-data/annotation_profiler_options.xml).
-The DBKEY should also be added as a new line to the annotation profiler valid builds file (annotation_profiler_valid_builds.txt).
-The output (/profiled_annotations/DBKEY) should be made available as GALAXY_ROOT/tool-data/annotation_profiler/DBKEY.
-
-profiled_info.txt contains info on the generated annotations, separated by lines with tab-delimited label,value pairs:
-        profiler_version - the version of the build_profile_indexes.py script that was used to generate the profiled data
-        dbkey - the dbkey used for the run
-        chromosomes - contains the names and lengths of chromosomes that were used to parse single-chromosome tables (tables divided into individual files by chromosome)
-        dump_time - the declared dump time of the database, taken from trackDb.txt.gz
-        profiled_time - seconds since epoch in utc for when the database dump was profiled
-        database_hash - a md5 hex digest of all the profiled table info 
-
-
-Typical usage includes:
-
-python build_profile_indexes.py -d hg19 -i /ucsc_data/hg19/database/ > hg19.txt
-
-where the genome build is hg19 and /ucsc_data/hg19/database/ contains the downloaded database dump from UCSC (e.g. obtained by rsync: rsync -avzP rsync://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ /ucsc_data/hg19/database/).
-
-
-
-By default, chromosome names come from a file named 'chromInfo.txt.gz' found in the input directory, with FTP used as a backup.
-When FTP is used to obtain the names of chromosomes from UCSC for a particular genome build, alternate ftp sites and paths can be specified by using the --ftp_site and --ftp_path attributes. 
-Chromosome names can instead be provided on the commandline via the --chromosomes option, which accepts a comma separated list of:ChromName1[=length],ChromName2[=length],...
-
-
-
-    usage = "usage: %prog options"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-d', '--dbkey', dest='dbkey', default='hg18', help='dbkey to process' )
-    parser.add_option( '-i', '--input_dir', dest='input_dir', default=os.path.join( 'golden_path','%s', 'database' ), help='Input Directory' )
-    parser.add_option( '-o', '--output_dir', dest='output_dir', default=os.path.join( 'profiled_annotations','%s' ), help='Output Directory' )
-    parser.add_option( '-c', '--chromosomes', dest='chromosomes', default='', help='Comma separated list of: ChromName1[=length],ChromName2[=length],...' )
-    parser.add_option( '-b', '--bitset_size', dest='bitset_size', default=DEFAULT_BITSET_SIZE, type='int', help='Default BitSet size; overridden by sizes specified in chromInfo.txt.gz or by --chromosomes' )
-    parser.add_option( '-f', '--ftp_site', dest='ftp_site', default='hgdownload.cse.ucsc.edu', help='FTP site; used for chromosome info when chromInfo.txt.gz method fails' )
-    parser.add_option( '-p', '--ftp_path', dest='ftp_path', default='/goldenPath/%s/chromosomes/', help='FTP Path; used for chromosome info when chromInfo.txt.gz method fails' )

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 scripts/tools/annotation_profiler/build_profile_indexes.py
--- a/scripts/tools/annotation_profiler/build_profile_indexes.py
+++ /dev/null
@@ -1,338 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-VERSION = '1.0.0' # version of this script
-
-from optparse import OptionParser
-import os, gzip, struct, time
-from ftplib import FTP #do we want a diff method than using FTP to determine Chrom Names, eg use local copy
-
-#import md5 from hashlib; if python2.4 or less, use old md5
-try:
-    from hashlib import md5
-except ImportError:
-    from md5 import new as md5
-
-#import BitSet from bx-python, try using eggs and package resources, fall back to any local installation
-try:
-    from galaxy import eggs
-    import pkg_resources
-    pkg_resources.require( "bx-python" )
-except: pass #Maybe there is a local installation available
-from bx.bitset import BitSet
-
-#Define constants
-STRUCT_FMT = '<I'
-STRUCT_SIZE = struct.calcsize( STRUCT_FMT )
-DEFAULT_BITSET_SIZE = 300000000
-CHUNK_SIZE = 1024
-
-#Headers used to parse .sql files to determine column indexes for chromosome name, start and end
-alias_spec = { 
-    'chromCol'  : [ 'chrom' , 'CHROMOSOME' , 'CHROM', 'Chromosome Name', 'tName' ],  
-    'startCol'  : [ 'start' , 'START', 'chromStart', 'txStart', 'Start Position (bp)', 'tStart', 'genoStart' ],
-    'endCol'    : [ 'end'   , 'END'  , 'STOP', 'chromEnd', 'txEnd', 'End Position (bp)', 'tEnd', 'genoEnd' ], 
-}
-
-#Headers used to parse trackDb.txt.gz
-#TODO: these should be parsed directly from trackDb.sql
-trackDb_headers = ["tableName", "shortLabel", "type", "longLabel", "visibility", "priority", "colorR", "colorG", "colorB", "altColorR", "altColorG", "altColorB", "useScore", "private", "restrictCount", "restrictList", "url", "html", "grp", "canPack", "settings"]
-
-def get_columns( filename ):
-    input_sql = open( filename ).read()
-    input_sql = input_sql.split( 'CREATE TABLE ' )[1].split( ';' )[0]
-    input_sql = input_sql.split( ' (', 1 )
-    table_name = input_sql[0].strip().strip( '`' )
-    input_sql = [ split.strip().split( ' ' )[0].strip().strip( '`' ) for split in input_sql[1].rsplit( ')', 1 )[0].strip().split( '\n' ) ]
-    print input_sql
-    chrom_col = None
-    start_col = None
-    end_col = None
-    for col_name in alias_spec['chromCol']:
-        for i, header_name in enumerate( input_sql ):
-            if col_name == header_name:
-                chrom_col = i
-                break
-        if chrom_col is not None:
-            break
-    
-    for col_name in alias_spec['startCol']:
-        for i, header_name in enumerate( input_sql ):
-            if col_name == header_name:
-                start_col = i
-                break
-        if start_col is not None:
-            break
-
-    for col_name in alias_spec['endCol']:
-        for i, header_name in enumerate( input_sql ):
-            if col_name == header_name:
-                end_col = i
-                break
-        if end_col is not None:
-            break
-
-    return table_name, chrom_col, start_col, end_col
-
-
-def create_grouping_xml( input_dir, output_dir, dbkey ):
-    output_filename = os.path.join( output_dir, '%s_tables.xml' % dbkey )
-    def load_groups( file_name = 'grp.txt.gz' ):
-        groups = {}
-        for line in gzip.open( os.path.join( input_dir, file_name ) ):
-            fields = line.split( '\t' )
-            groups[fields[0]] = { 'desc': fields[1], 'priority':  fields[2] }
-        return groups
-    f = gzip.open( os.path.join( input_dir, 'trackDb.txt.gz' ) )
-    out = open( output_filename, 'wb' )
-    tables = {}
-    cur_buf = ''
-    while True:
-        line = f.readline()
-        if not line: break
-        #remove new lines
-        line = line.rstrip( '\n\r' )
-        line = line.replace( '\\\t', ' ' ) #replace escaped tabs with space
-        cur_buf += "%s\n" % line.rstrip( '\\' )
-        if line.endswith( '\\' ):
-            continue #line is wrapped, next line
-        #all fields should be loaded now...
-        fields = cur_buf.split( '\t' )
-        cur_buf = '' #reset buffer
-        assert len( fields ) == len( trackDb_headers ), 'Failed Parsing trackDb.txt.gz; fields: %s' % fields
-        table_name = fields[ 0 ]
-        tables[ table_name ] = {}
-        for field_name, field_value in zip( trackDb_headers, fields ):
-            tables[ table_name ][ field_name ] = field_value
-        #split settings fields into dict
-        fields = fields[-1].split( '\n' )
-        tables[ table_name ][ 'settings' ] = {}
-        for field in fields:
-            setting_fields = field.split( ' ', 1 )
-            setting_name = setting_value = setting_fields[ 0 ]
-            if len( setting_fields ) > 1:
-                setting_value = setting_fields[ 1 ]
-            if setting_name or setting_value:
-                tables[ table_name ][ 'settings' ][ setting_name ] = setting_value
-    #Load Groups
-    groups = load_groups()
-    in_groups = {}
-    for table_name, values in tables.iteritems():
-        if os.path.exists( os.path.join( output_dir, table_name ) ):
-            group = values['grp']
-            if group not in in_groups:
-                in_groups[group]={}
-            #***NAME CHANGE***, 'subTrack' no longer exists as a setting...use 'parent' instead
-            #subTrack = values.get('settings', {} ).get( 'subTrack', table_name )
-            subTrack = values.get('settings', {} ).get( 'parent', table_name ).split( ' ' )[0] #need to split, because could be e.g. 'trackgroup on'
-            if subTrack not in in_groups[group]:
-                in_groups[group][subTrack]=[]
-            in_groups[group][subTrack].append( table_name )
-    
-    assigned_tables = []
-    out.write( """<filter type="data_meta" data_ref="input1" meta_key="dbkey" value="%s">\n""" % ( dbkey ) )
-    out.write( "  <options>\n" )
-    for group, subTracks in sorted( in_groups.iteritems() ):
-        out.write( """    <option name="%s" value="group-%s">\n""" % ( groups[group]['desc'], group ) )
-        for sub_name, sub_tracks in subTracks.iteritems():
-            if len( sub_tracks ) > 1:
-                out.write( """      <option name="%s" value="subtracks-%s">\n""" % ( sub_name, sub_name ) )
-                sub_tracks.sort()
-                for track in sub_tracks:
-                    track_label = track
-                    if "$" not in tables[track]['shortLabel']:
-                        track_label = tables[track]['shortLabel']
-                    out.write( """        <option name="%s" value="%s"/>\n""" % ( track_label, track ) )
-                    assigned_tables.append( track )
-                out.write( "      </option>\n" )
-            else:
-                track = sub_tracks[0]
-                track_label = track
-                if "$" not in tables[track]['shortLabel']:
-                    track_label = tables[track]['shortLabel']
-                out.write( """        <option name="%s" value="%s"/>\n""" % ( track_label, track ) )
-                assigned_tables.append( track )
-        out.write( "    </option>\n" )
-    unassigned_tables = list( sorted( [ table_dir for table_dir in os.listdir( output_dir ) if table_dir not in assigned_tables and os.path.isdir( os.path.join( output_dir, table_dir ) ) ] ) )
-    if unassigned_tables:
-        out.write( """    <option name="Uncategorized Tables" value="group-trackDbUnassigned">\n""" )
-        for table_name in unassigned_tables:
-            out.write( """        <option name="%s" value="%s"/>\n""" % ( table_name, table_name ) )
-        out.write( "    </option>\n" )
-    out.write( "  </options>\n" )
-    out.write( """</filter>\n""" )
-    out.close()
-
-def write_database_dump_info( input_dir, output_dir, dbkey, chrom_lengths, default_bitset_size ):
-    #generate hash for profiled table directories
-    #sort directories off output root (files in output root not hashed, including the profiler_info.txt file)
-    #sort files in each directory and hash file contents
-    profiled_hash = md5()
-    for table_dir in sorted( [ table_dir for table_dir in os.listdir( output_dir ) if os.path.isdir( os.path.join( output_dir, table_dir ) ) ] ):
-        for filename in sorted( os.listdir( os.path.join( output_dir, table_dir ) ) ):
-            f  = open( os.path.join( output_dir, table_dir, filename ), 'rb' )
-            while True:
-                hash_chunk = f.read( CHUNK_SIZE )
-                if not hash_chunk:
-                    break
-                profiled_hash.update( hash_chunk )
-    profiled_hash = profiled_hash.hexdigest()
-    
-    #generate hash for input dir
-    #sort directories off input root
-    #sort files in each directory and hash file contents
-    database_hash = md5()
-    for dirpath, dirnames, filenames in sorted( os.walk( input_dir ) ):
-        for filename in sorted( filenames ):
-            f  = open( os.path.join( input_dir, dirpath, filename ), 'rb' )
-            while True:
-                hash_chunk = f.read( CHUNK_SIZE )
-                if not hash_chunk:
-                    break
-                database_hash.update( hash_chunk )
-    database_hash = database_hash.hexdigest()
-    
-    #write out info file
-    out = open( os.path.join( output_dir, 'profiler_info.txt' ), 'wb' )
-    out.write( 'dbkey\t%s\n' % ( dbkey ) )
-    out.write( 'chromosomes\t%s\n' % ( ','.join( [ '%s=%s' % ( chrom_name, chrom_len ) for chrom_name, chrom_len in chrom_lengths.iteritems() ] ) ) )
-    out.write( 'bitset_size\t%s\n' % ( default_bitset_size ) )
-    for line in open( os.path.join( input_dir, 'trackDb.sql' ) ):
-        line = line.strip()
-        if line.startswith( '-- Dump completed on ' ):
-            line = line[ len( '-- Dump completed on ' ): ]
-            out.write( 'dump_time\t%s\n' % ( line ) )
-            break
-    out.write( 'dump_hash\t%s\n' % ( database_hash ) )
-    out.write( 'profiler_time\t%s\n' % ( time.time() ) )
-    out.write( 'profiler_hash\t%s\n' % ( profiled_hash ) )
-    out.write( 'profiler_version\t%s\n' % ( VERSION ) )
-    out.write( 'profiler_struct_format\t%s\n' % ( STRUCT_FMT ) )
-    out.write( 'profiler_struct_size\t%s\n' % ( STRUCT_SIZE ) )
-    out.close()
-    
-def __main__():
-    usage = "usage: %prog options"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-d', '--dbkey', dest='dbkey', default='hg18', help='dbkey to process' )
-    parser.add_option( '-i', '--input_dir', dest='input_dir', default=os.path.join( 'golden_path','%s', 'database' ), help='Input Directory' )
-    parser.add_option( '-o', '--output_dir', dest='output_dir', default=os.path.join( 'profiled_annotations','%s' ), help='Output Directory' )
-    parser.add_option( '-c', '--chromosomes', dest='chromosomes', default='', help='Comma separated list of: ChromName1[=length],ChromName2[=length],...' )
-    parser.add_option( '-b', '--bitset_size', dest='bitset_size', default=DEFAULT_BITSET_SIZE, type='int', help='Default BitSet size; overridden by sizes specified in chromInfo.txt.gz or by --chromosomes' )
-    parser.add_option( '-f', '--ftp_site', dest='ftp_site', default='hgdownload.cse.ucsc.edu', help='FTP site; used for chromosome info when chromInfo.txt.gz method fails' )
-    parser.add_option( '-p', '--ftp_path', dest='ftp_path', default='/goldenPath/%s/chromosomes/', help='FTP Path; used for chromosome info when chromInfo.txt.gz method fails' )
-    
-    ( options, args ) = parser.parse_args()
-    
-    input_dir = options.input_dir
-    if '%' in input_dir:
-        input_dir = input_dir % options.dbkey
-    assert os.path.exists( input_dir ), 'Input directory does not exist'
-    output_dir = options.output_dir
-    if '%' in output_dir:
-        output_dir = output_dir % options.dbkey
-    assert not os.path.exists( output_dir ), 'Output directory already exists'
-    os.makedirs( output_dir )
-    ftp_path = options.ftp_path
-    if '%' in ftp_path:
-        ftp_path = ftp_path % options.dbkey
-    
-    #Get chromosome names and lengths
-    chrom_lengths = {}
-    if options.chromosomes:
-        for chrom in options.chromosomes.split( ',' ):
-            fields = chrom.split( '=' )
-            chrom = fields[0]
-            if len( fields ) > 1:
-                chrom_len = int( fields[1] )
-            else:
-                chrom_len = options.bitset_size
-            chrom_lengths[ chrom ] = chrom_len
-        chroms = chrom_lengths.keys()
-        print 'Chrom info taken from command line option.'
-    else:
-        try:
-            for line in gzip.open( os.path.join( input_dir, 'chromInfo.txt.gz' ) ):
-                fields = line.strip().split( '\t' )
-                chrom_lengths[ fields[0] ] = int( fields[ 1 ] )
-            chroms = chrom_lengths.keys()
-            print 'Chrom info taken from chromInfo.txt.gz.'
-        except Exception, e:
-            print 'Error loading chrom info from chromInfo.txt.gz, trying FTP method.'
-            chrom_lengths = {} #zero out chrom_lengths
-            chroms = []
-            ftp = FTP( options.ftp_site )
-            ftp.login()
-            for name in ftp.nlst( ftp_path ):
-                if name.endswith( '.fa.gz' ):
-                    chroms.append( name.split( '/' )[-1][ :-len( '.fa.gz' ) ] )
-            ftp.close()
-            for chrom in chroms:
-                chrom_lengths[ chrom ] = options.bitset_size
-    #sort chroms by length of name, decending; necessary for when table names start with chrom name
-    chroms = list( reversed( [ chrom for chrom_len, chrom in sorted( [ ( len( chrom ), chrom ) for chrom in chroms ] ) ] ) )
-    
-    #parse tables from local files
-    #loop through directory contents, if file ends in '.sql', process table
-    for filename in os.listdir( input_dir ):
-        if filename.endswith ( '.sql' ):
-            base_filename = filename[ 0:-len( '.sql' ) ]
-            table_out_dir = os.path.join( output_dir, base_filename )
-            #some tables are chromosome specific, lets strip off the chrom name
-            for chrom in chroms:
-                if base_filename.startswith( "%s_" % chrom ):
-                    #found chromosome
-                    table_out_dir = os.path.join( output_dir, base_filename[len( "%s_" % chrom ):] )
-                    break
-            #create table dir
-            if not os.path.exists( table_out_dir ):
-                os.mkdir( table_out_dir ) #table dir may already exist in the case of single chrom tables
-                print "Created table dir (%s)." % table_out_dir
-            else:
-                print "Table dir (%s) already exists." % table_out_dir
-            #find column assignments
-            table_name, chrom_col, start_col, end_col = get_columns( "%s.sql" % os.path.join( input_dir, base_filename ) )
-            if chrom_col is None or start_col is None or end_col is None:
-                print "Table %s (%s) does not appear to have a chromosome, a start, or a stop." % ( table_name, "%s.sql" % os.path.join( input_dir, base_filename ) )
-                if not os.listdir( table_out_dir ):
-                    print "Removing empty table (%s) directory (%s)." % ( table_name, table_out_dir )
-                    os.rmdir( table_out_dir )
-                continue
-            #build bitsets from table
-            bitset_dict = {}
-            for line in gzip.open( '%s.txt.gz' % os.path.join( input_dir, base_filename )  ):
-                fields = line.strip().split( '\t' )
-                chrom = fields[ chrom_col ]
-                start = int( fields[ start_col ] )
-                end = int( fields[ end_col ] )
-                if chrom not in bitset_dict:
-                    bitset_dict[ chrom ] = BitSet( chrom_lengths.get( chrom, options.bitset_size ) )
-                bitset_dict[ chrom ].set_range(  start, end - start  )
-            #write bitsets as profiled annotations
-            for chrom_name, chrom_bits in bitset_dict.iteritems():
-                out = open( os.path.join( table_out_dir, '%s.covered' % chrom_name  ), 'wb' )
-                end = 0
-                total_regions = 0
-                total_coverage = 0
-                max_size = chrom_lengths.get( chrom_name, options.bitset_size )
-                while True:
-                    start = chrom_bits.next_set( end )
-                    if start >= max_size:
-                        break
-                    end = chrom_bits.next_clear( start )
-                    out.write( struct.pack( STRUCT_FMT, start ) )
-                    out.write( struct.pack( STRUCT_FMT, end ) )
-                    total_regions += 1
-                    total_coverage += end - start
-                    if end >= max_size:
-                        break
-                out.close()
-                open( os.path.join( table_out_dir, '%s.total_regions' % chrom_name  ), 'wb' ).write( str( total_regions ) )
-                open( os.path.join( table_out_dir, '%s.total_coverage' % chrom_name  ), 'wb' ).write( str( total_coverage ) )
-    
-    #create xml
-    create_grouping_xml( input_dir, output_dir, options.dbkey )
-    #create database dump info file, for database version control
-    write_database_dump_info( input_dir, output_dir, options.dbkey, chrom_lengths, options.bitset_size )
-    
-if __name__ == "__main__": __main__()

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 static/images/blat_mapping_example.png
Binary file static/images/blat_mapping_example.png has changed

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 static/images/dualcolorcode.png
Binary file static/images/dualcolorcode.png has changed

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 static/images/short_reads_boxplot.png
Binary file static/images/short_reads_boxplot.png has changed

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 test-data/annotation_profiler_1.out
--- a/test-data/annotation_profiler_1.out
+++ /dev/null
@@ -1,9 +0,0 @@
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	multiz17way	1700000	1
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	mrna	1476531	12
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	multiz28way	1700000	1
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	refGene	1247808	15
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	knownAlt	14617	57
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	affyGnf1h	16218	2
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	snp126	8224	7262
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	acembly	1532618	20
-chr22	30128507	31828507	uc003bnx.1_cds_2_0_chr22_29227_f	0	+	knownGene	1282789	18

diff -r 1fae433a54eaaab6392da9e9a8eec94a3b568c14 -r c559805d44df064f98dfbc94b59edba7be01adb9 test-data/annotation_profiler_2.out
--- a/test-data/annotation_profiler_2.out
+++ /dev/null
@@ -1,10 +0,0 @@
-#tableName	tableChromosomeCoverage	tableChromosomeCount	tableRegionCoverage	tableRegionCount	allIntervalCount	allIntervalSize	allCoverage	allTableRegionsOverlaped	allIntervalsOverlapingTable	nrIntervalCount	nrIntervalSize	nrCoverage	nrTableRegionsOverlaped	nrIntervalsOverlapingTable
-multiz17way	1232617592	115	107496500	7	25	2178864	2178864	25	25	24	2178828	2178828	7	24
-mrna	610115393	8453	53577685	617	25	2178864	1904380	38	24	24	2178828	1904344	33	23
-multiz28way	1233785185	143	107466479	10	25	2178864	2178864	25	25	24	2178828	2178828	8	24
-refGene	496767116	7324	46112187	488	25	2178864	1677947	30	23	24	2178828	1677911	27	22
-knownAlt	8647368	20213	766619	1630	25	2178864	5612	31	11	24	2178828	5612	31	11
-affyGnf1h	24034558	3995	2446754	307	25	2178864	191851	9	6	24	2178828	191851	9	6
-snp126	5297125	4456213	382226	331523	25	2178864	9205	7074	25	24	2178828	9205	7074	24
-acembly	710938193	13800	63146381	938	25	2178864	1903560	35	24	24	2178828	1903524	30	23
-knownGene	555770538	7921	50317496	558	25	2178864	1822985	30	23	24	2178828	1822949	27	22

This diff is so big that we needed to truncate the remainder.

https://bitbucket.org/galaxy/galaxy-central/commits/175bacc005a1/
Changeset:   175bacc005a1
User:        davebgx
Date:        2014-05-19 19:02:11
Summary:     Remove migrated tools from the tool conf files.
Affected #:  2 files

diff -r c559805d44df064f98dfbc94b59edba7be01adb9 -r 175bacc005a1437bfeedc8c76fe757467c08a870 tool_conf.xml.main
--- a/tool_conf.xml.main
+++ b/tool_conf.xml.main
@@ -51,13 +51,6 @@
     <tool file="filters/wig_to_bigwig.xml" /><tool file="filters/bed_to_bigbed.xml" /></section>
-  <section id="fasta_manipulation" name="FASTA manipulation">
-    <tool file="fasta_tools/fasta_compute_length.xml" />
-    <tool file="fasta_tools/fasta_filter_by_length.xml" />
-    <tool file="fasta_tools/fasta_concatenate_by_species.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" />
-  </section><section id="filter" name="Filter and Sort"><tool file="stats/filtering.xml" /><tool file="filters/sorter.xml" />
@@ -97,9 +90,6 @@
     <tool file="filters/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /></section>
-  <section id="bxops" name="Operate on Genomic Intervals">
-    <tool file="annotation_profiler/annotation_profiler.xml" />
-  </section><section id="stats" name="Statistics"><tool file="stats/gsummary.xml" /><tool file="filters/uniq.xml" />
@@ -116,12 +106,6 @@
     <tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /></section>
-  <section id="multVar" name="Multivariate Analysis">
-    <tool file="multivariate_stats/pca.xml" />
-    <tool file="multivariate_stats/cca.xml" />
-    <tool file="multivariate_stats/kpca.xml" />
-    <tool file="multivariate_stats/kcca.xml" />
-  </section><section id="hgv" name="Phenotype Association"><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" />
@@ -141,8 +125,6 @@
   <section id="cshl_library_information" name="NGS: QC and manipulation"><label id="illumina" text="Illumina data" /><label id="454" text="Roche-454 data" />
-        <tool file="metag_tools/short_reads_figure_score.xml" />
-        <tool file="metag_tools/short_reads_trim_seq.xml" /><label id="solid" text="AB-SOLiD data" /><tool file="next_gen_conversion/solid2fastq.xml" /><tool file="solid_tools/solid_qual_stats.xml" />
@@ -153,8 +135,6 @@
   <section id="ngs_mapping" name="NGS: Mapping"><label id="illumina" text="Illumina" /><label id="roche_454" text="Roche-454" />
-    <tool file="metag_tools/megablast_wrapper.xml" />
-    <tool file="metag_tools/megablast_xml_parser.xml" /><label id="ab_solid" text="AB-SOLiD" /></section><section id="samtools" name="NGS: SAM Tools">

diff -r c559805d44df064f98dfbc94b59edba7be01adb9 -r 175bacc005a1437bfeedc8c76fe757467c08a870 tool_conf.xml.sample
--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -68,13 +68,11 @@
     <tool file="filters/axt_to_fasta.xml" /><tool file="filters/axt_to_lav.xml" /><tool file="filters/bed2gff.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" /><tool file="filters/gff2bed.xml" /><tool file="filters/lav_to_bed.xml" /><tool file="maf/maf_to_bed.xml" /><tool file="maf/maf_to_interval.xml" /><tool file="maf/maf_to_fasta.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" /><tool file="filters/wiggle_to_simple.xml" /><tool file="filters/sff_extractor.xml" /><tool file="filters/gtf2bedgraph.xml" />
@@ -105,9 +103,6 @@
     <tool file="filters/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /></section>
-  <section id="bxops" name="Operate on Genomic Intervals">
-    <tool file="annotation_profiler/annotation_profiler.xml" />
-  </section><section id="stats" name="Statistics"><tool file="stats/gsummary.xml" /><tool file="filters/uniq.xml" />
@@ -130,12 +125,6 @@
     <tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /></section>
-  <section id="multVar" name="Multivariate Analysis">
-    <tool file="multivariate_stats/pca.xml" />
-    <tool file="multivariate_stats/cca.xml" />
-    <tool file="multivariate_stats/kpca.xml" />
-    <tool file="multivariate_stats/kcca.xml" />
-  </section><section id="hyphy" name="Evolution"><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" />
@@ -144,15 +133,6 @@
     <tool file="meme/meme.xml" /><tool file="meme/fimo.xml" /></section>
-  <section id="clustal" name="Multiple Alignments">
-  </section>
-  <section id="fasta_manipulation" name="FASTA manipulation">
-    <tool file="fasta_tools/fasta_compute_length.xml" />
-    <tool file="fasta_tools/fasta_filter_by_length.xml" />
-    <tool file="fasta_tools/fasta_concatenate_by_species.xml" />
-    <tool file="fasta_tools/fasta_to_tabular.xml" />
-    <tool file="fasta_tools/tabular_to_fasta.xml" />
-  </section><section id="NGS_QC" name="NGS: QC and manipulation"><label id="fastqcsambam" text="FastQC: fastq/sam/bam" />
@@ -160,9 +140,6 @@
     <label id="illumina" text="Illumina fastq" /><label id="454" text="Roche-454 data" />
-    <tool file="metag_tools/short_reads_figure_score.xml" />
-    <tool file="metag_tools/short_reads_trim_seq.xml" />
-    
     <label id="solid" text="AB-SOLiD data" /><tool file="next_gen_conversion/solid2fastq.xml" /><tool file="solid_tools/solid_qual_stats.xml" />
@@ -185,8 +162,6 @@
   --><section id="solexa_tools" name="NGS: Mapping"><tool file="sr_mapping/bfast_wrapper.xml" />
-    <tool file="metag_tools/megablast_wrapper.xml" />
-    <tool file="metag_tools/megablast_xml_parser.xml" /><tool file="sr_mapping/PerM.xml" /><tool file="sr_mapping/srma_wrapper.xml" /><tool file="sr_mapping/mosaik.xml" />

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

[galaxy-commits] commit/galaxy-central: 2 new changesets

commits-noreply＠bitbucket.org