[galaxy-dev] [hg] galaxy 2976: Add Genetrack tools and display site

7 Nov 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/4a7f9fdead89
changeset: 2976:4a7f9fdead89
user:      Nate Coraor <nate@bx.psu.edu>
date:      Fri Nov 06 14:22:44 2009 -0500
description:
Add Genetrack tools and display site

diffstat:

 lib/galaxy/datatypes/tracks.py                    |   22 +-
 lib/galaxy/web/controllers/genetrack.py           |  293 --------------------------
 tool-data/shared/genetrack/genetrack_sites.txt    |    8 +-
 tool_conf.xml.sample                              |    3 +
 tools/visualization/genetrack.py                  |  189 -----------------
 tools/visualization/genetrack.xml                 |   72 ------
 tools/visualization/genetrack_code.py             |   12 -
 tools/visualization/genetrack_indexer.py          |   39 +++
 tools/visualization/genetrack_indexer.xml         |   54 ++++
 tools/visualization/genetrack_peak_prediction.py  |   40 +++
 tools/visualization/genetrack_peak_prediction.xml |   59 +++++
 universe_wsgi.ini.sample                          |    3 +-
 12 files changed, 214 insertions(+), 580 deletions(-)

diffs (871 lines):

diff -r 411b53d32b78 -r 4a7f9fdead89 lib/galaxy/datatypes/tracks.py

--- a/lib/galaxy/datatypes/tracks.py	Fri Nov 06 09:57:05 2009 -0500
+++ b/lib/galaxy/datatypes/tracks.py	Fri Nov 06 14:22:44 2009 -0500
@@ -20,26 +20,26 @@
 class GeneTrack( data.Binary ):
     file_ext = "genetrack"
     
-    MetadataElement( name="hdf", default="data.hdf", desc="HDF DB", readonly=True, visible=True, no_value=0 )
-    MetadataElement( name="sqlite", default="features.sqlite", desc="SQLite Features DB", readonly=True, visible=True, no_value=0 )
+    MetadataElement( name="genetrack", default="data.genetrack", desc="HDF index", readonly=True, visible=True, no_value=0 )
     MetadataElement( name="label", default="Custom", desc="Track Label", readonly=True, visible=True, no_value="Custom" )
     
     def __init__(self, **kwargs):
         super( GeneTrack, self ).__init__( **kwargs )
-        self.add_display_app( 'genetrack', 'View in GeneTrack', '', 'genetrack_link' )
+        self.add_display_app( 'genetrack', 'View in', '', 'genetrack_link' )
     def get_display_links( self, dataset, type, app, base_url, target_frame='galaxy_main', **kwd ):
         return data.Binary.get_display_links( self, dataset, type, app, base_url, target_frame=target_frame, **kwd )
     def genetrack_link( self, hda, type, app, base_url ):
         ret_val = []
         if hda.has_data:
-            # Get the disk file name
+            # Get the disk file name and data id
             file_name = hda.dataset.get_file_name()
+            data_id  = hda.dataset.id
             # Make it secure
-            a = hmac_new( app.config.tool_secret, file_name )
-            b = binascii.hexlify( file_name )
-            encoded_file_name = "%s:%s" % ( a, b )
-            for site_name, site_url in util.get_genetrack_sites():
-                if site_name in app.config.genetrack_display_sites:
-                    link = "%s?filename=%s" % ( site_url, encoded_file_name )
-                    ret_val.append( ( site_name, link ) )
+            hashkey = hmac_new( app.config.tool_secret, file_name )
+            encoded = binascii.hexlify( file_name )
+            for name, url in util.get_genetrack_sites():
+                if name.lower() in app.config.genetrack_display_sites:
+                    # send both  parameters filename and hashkey
+                    link = "%s?filename=%s&hashkey=%s&id=%s&GALAXY_URL=%s" % ( url, encoded, hashkey, data_id, base_url )
+                    ret_val.append( ( name, link ) )
             return ret_val
diff -r 411b53d32b78 -r 4a7f9fdead89 lib/galaxy/web/controllers/genetrack.py
--- a/lib/galaxy/web/controllers/genetrack.py	Fri Nov 06 09:57:05 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,293 +0,0 @@
-import time, glob, os, sys
-from itertools import cycle
-from mako import exceptions
-from mako.template import Template
-from mako.lookup import TemplateLookup
-from galaxy.web.base.controller import *
-from galaxy.util.hash_util import *
-
-try:
-    import pkg_resources
-    pkg_resources.require("GeneTrack")
-    import atlas
-    from atlas import sql
-    from atlas import hdf
-    from atlas import util as atlas_utils
-    from atlas.web import formlib, feature_query, feature_filter
-    from atlas.web import label_cache as atlas_label_cache
-    from atlas.plotting.const import *
-    from atlas.plotting.tracks import prefab
-    from atlas.plotting.tracks import chart
-    from atlas.plotting import tracks
-except Exception, exc:
-    raise ControllerUnavailable("GeneTrack could not import a required dependency: %s" % str(exc))
-
-pkg_resources.require( "Paste" )
-import paste.httpexceptions
-
-# Database helpers
-SHOW_LABEL_LIMIT = 10000
-
-def list_labels(session):
-    """
-    Returns a list of labels that will be plotted in order.
-    """
-    labels = sql.Label
-    query = session.query(labels).order_by("-id")
-    return query
-
-def open_databases( conf ):
-    """
-    A helper function that returns handles to the hdf and sql databases
-    """
-    db = hdf.hdf_open( conf.HDF_DATABASE, mode='r' )
-    session = sql.get_session( conf.SQL_URI )
-    return db, session
-
-def hdf_query(db, name, param, autosize=False ):
-    """
-    Schema specific hdf query. 
-    Note that returns data as columns not rows.
-    """
-    if not hdf.has_node(db=db, name=name):
-        atlas.warn( 'missing label %s' % name )
-        return [], [], [], []
-    data  = hdf.GroupData( db=db, name=name)
-    istart, iend = data.get_indices(label=param.chrom, start=param.start, stop=param.end)
-    table = data.get_table(label=param.chrom)
-    if autosize:
-        # attempts to reduce the number of points
-        size = len( table.cols.ix[istart:iend] )
-        step = max( [1, size/1200] )
-    else:
-        step = 1
-
-    ix = table.cols.ix[istart:iend:step].tolist()
-    wx = table.cols.wx[istart:iend:step].tolist()
-    cx = table.cols.cx[istart:iend:step].tolist()
-    ax = table.cols.ax[istart:iend:step].tolist()
-    return ix, wx, cx, ax
-
-# Chart helpers
-def build_tracks( param, conf, data_label, fit_label, pred_label, strand, show=False ):
-    """
-    Builds tracks
-    """
-    # gets all the labels for a fast lookup
-    label_cache = atlas_label_cache( conf )       
-
-    # get database handles for hdf and sql
-    db, session = open_databases( conf )
-
-    # fetching x and y coordinates for bar and fit (line) for 
-    # each strand plus (p), minus (m), all (a) 
-    bix, bpy, bmy, bay = hdf_query( db=db, name=data_label, param=param )
-    fix, fpy, fmy, fay = hdf_query( db=db, name=fit_label, param=param )
-
-    # close the hdf database
-    db.close()
-
-    # get all features within the range
-    all = feature_query( session=session,  param=param )
-
-    # draws the barchart and the nucleosome chart below it
-    if strand == 'composite':
-        bar = prefab.composite_bartrack( fix=fix, fay=fay, bix=bix, bay=bay, param=param)
-    else:
-        bar = prefab.twostrand_bartrack( fix=fix, fmy=fmy, fpy=fpy, bix=bix, bmy=bmy, bpy=bpy, param=param)
-    
-    charts = list()
-    charts.append( bar )            
-
-    return charts
-
-def feature_chart(param=None, session=None, label=None, label_dict={}, color=cycle( [LIGHT, WHITE] ) ):
-    all = feature_filter(feature_query(session=session,  param=param), name=label, kdict=label_dict)
-    flipped = []
-    for feature in all:
-        if feature.strand == "-":
-            feature.start, feature.end = feature.end, feature.start
-        flipped.append(feature)
-    opts  = track_options( 
-        xscale=param.xscale, w=param.width, fgColor=PURPLE,
-        show_labels=param.show_labels, ylabel=str(label),
-        bgColor=color.next()
-    )
-    return [
-       tracks.split_tracks(features=flipped, options=opts, split=param.show_labels, track_type='vector')
-    ]
-
-def consolidate_charts( charts, param ):
-    # create the multiplot
-    opt = chart_options( w=param.width )
-    multi = chart.MultiChart(options=opt, charts=charts)
-    return multi
-
-# SETUP Track Builders
-import functools
-def twostrand_tracks( param=None, conf=None ):
-    return build_tracks( data_label=conf.LABEL, fit_label=conf.FIT_LABEL, pred_label=conf.PRED_LABEL, param=param, conf=conf, strand='twostrand')
-def composite_tracks( param=None, conf=None ):
-    return build_tracks( data_label=conf.LABEL, fit_label=conf.FIT_LABEL, pred_label=conf.PRED_LABEL, param=param, conf=conf, strand='composite')
-
-class BaseConf( object ):
-    """
-    Fake web_conf for atlas.
-    """
-    IMAGE_DIR = "static/genetrack/plots/"
-    LEVELS = [str(x) for x in [ 50, 100, 250, 500, 1000, 2500, 5000, 10000, 20000, 50000, 100000, 200000 ]]
-    ZOOM_LEVELS = zip(LEVELS, LEVELS)
-    PLOT_SETUP = [
-        ('comp-id', 'Composite' ,  'genetrack/index.html', composite_tracks ),
-        ('two-id' , 'Two Strand',  'genetrack/index.html', twostrand_tracks ),
-    ]
-    PLOT_CHOICES = [ (id, name) for (id, name, page, func) in PLOT_SETUP ]
-    PLOT_MAPPER = dict( [ (id, (page, func)) for (id, name, page, func) in PLOT_SETUP ] )
-    
-    def __init__(self, **kwds):
-        for key,value in kwds.items():
-            setattr( self, key, value)
-            
-class WebRoot(BaseController):   
-    @web.expose
-    def search(self, trans, word='', dataset_id=None, submit=''):
-        """
-        Default search page
-        """
-        data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataset_id )
-        if not data:
-            raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) )
-        # the main configuration file
-        conf = BaseConf(
-            TITLE = "<i>%s</i>: %s" % (data.metadata.dbkey, data.metadata.label),
-            HDF_DATABASE = os.path.join( data.extra_files_path, data.metadata.hdf ),
-            SQL_URI = "sqlite:///%s" % os.path.join( data.extra_files_path, data.metadata.sqlite ),
-            LABEL = data.metadata.label,
-            FIT_LABEL = "%s-SIGMA-%d" % (data.metadata.label, 20),
-            PRED_LABEL = "PRED-%s-SIGMA-%d" % (data.metadata.label, 20),
-            )
-
-        param = atlas.Param( word=word )
-        
-        # search for a given 
-        try:
-            session = sql.get_session( conf.SQL_URI )
-        except:
-            return trans.fill_template_mako('genetrack/invalid.html', dataset_id=dataset_id)
-
-        if param.word:
-            def search_query( word, text ):
-                query = session.query(sql.Feature).filter( "name LIKE :word or freetext LIKE :text" ).params(word=word, text=text)
-                query = list(query[:20])
-                return query
-
-            # a little heuristics to match most likely target
-            targets = [ 
-                (param.word+'%', 'No match'), # match beginning
-                ('%'+param.word+'%', 'No match'), # match name anywhere
-                ('%'+param.word+'%', '%'+param.word+'%'), # match json anywhere
-            ]
-            for word, text in targets:
-                query = search_query( word=word, text=text)
-                if query:
-                    break
-        else:
-            query = []
-
-        return trans.fill_template_mako('genetrack/search.html', param=param, query=query, dataset_id=dataset_id)
-
-    @web.expose
-    def index(self, trans, dataset_id=None, **kwds):
-        """
-        Main request handler
-        """
-        color = cycle( [LIGHT, WHITE] )
-        data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataset_id )
-        if not data:
-            raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) )
-        # the main configuration file
-        conf = BaseConf(
-            TITLE = "<i>%s</i>: %s" % (data.metadata.dbkey, data.metadata.label),
-            HDF_DATABASE = os.path.join( data.extra_files_path, data.metadata.hdf ),
-            SQL_URI = "sqlite:///%s" % os.path.join( data.extra_files_path, data.metadata.sqlite ),
-            LABEL = data.metadata.label,
-            FIT_LABEL = "%s-SIGMA-%d" % (data.metadata.label, 20),
-            PRED_LABEL = "PRED-%s-SIGMA-%d" % (data.metadata.label, 20),
-            )
-
-        try:
-            session = sql.get_session( conf.SQL_URI )
-        except:
-            return trans.fill_template_mako('genetrack/invalid.html', dataset_id=dataset_id)
-
-        if os.path.exists( conf.HDF_DATABASE ):
-            db = hdf.hdf_open( conf.HDF_DATABASE, mode='r' )
-            conf.CHROM_FIELDS = [(x,x) for x in hdf.GroupData(db=db, name=conf.LABEL).labels]
-            db.close()
-        else:
-            query = session.execute(sql.select([sql.feature_table.c.chrom]).distinct())
-            conf.CHROM_FIELDS = [(x.chrom,x.chrom) for x in query]
-
-        # generate a new form based on the configuration
-        form = formlib.main_form( conf )
-        
-        # clear the tempdir every once in a while
-        atlas_utils.clear_tempdir( dir=conf.IMAGE_DIR, days=1, chance=10)
-
-        incoming = form.defaults()
-        incoming.update( kwds )
-        
-        # manage the zoom and pan requests
-        incoming = formlib.zoom_change( kdict=incoming, levels=conf.LEVELS)
-        incoming = formlib.pan_view( kdict=incoming )
-        
-        # process the form
-        param = atlas.Param( **incoming )
-        form.process( incoming )
-
-        if kwds and form.isSuccessful():
-            # adds the sucessfull parameters
-            param.update( form.values() )
-
-        # if it was a search word not a number go to search page
-        try:
-            center = int( param.feature )
-        except ValueError:
-            # go and search for these
-            return trans.response.send_redirect( web.url_for( controller='genetrack', action='search', word=param.feature, dataset_id=dataset_id ) )
-
-        param.width  = min( [2000, int(param.img_size)] )
-        param.xscale = [ param.start, param.end ] 
-        param.show_labels = ( param.end - param.start ) <= SHOW_LABEL_LIMIT    
-        
-        # get the template and the function used to generate the tracks
-        tmpl_name, track_maker  = conf.PLOT_MAPPER[param.plot]
-        
-        # check against a hash, display an image that already exists if it was previously created.
-        hash = new_secure_hash()
-        hash.update(str(dataset_id))
-        for key in sorted(kwds.keys()):
-            hash.update(str(kwds[key]))
-        fname = "%s.png" % hash.hexdigest()
-        fpath = os.path.join(conf.IMAGE_DIR, fname)
-
-        charts = []
-        param.fname  = fname
-        
-        # The SHA1 hash should uniquely identify the qs that created the plot...
-        if os.path.exists(fpath):
-            os.utime(fpath, (time.time(), time.time()))
-            return trans.fill_template_mako(tmpl_name, conf=conf, form=form, param=param, dataset_id=dataset_id)
-        
-        # If the hashed filename doesn't exist, create it.
-        if track_maker is not None and os.path.exists( conf.HDF_DATABASE ):
-            # generate the fit track
-            charts = track_maker( param=param, conf=conf )
-            
-        for label in list_labels( session ):
-            charts.extend( feature_chart(param=param, session=session, label=label.name, label_dict={label.name:label.id}, color=color))
-
-        track_chart = consolidate_charts( charts, param )
-        track_chart.save(fname=fpath)
-
-        return trans.fill_template_mako(tmpl_name, conf=conf, form=form, param=param, dataset_id=dataset_id)
diff -r 411b53d32b78 -r 4a7f9fdead89 tool-data/shared/genetrack/genetrack_sites.txt
--- a/tool-data/shared/genetrack/genetrack_sites.txt	Fri Nov 06 09:57:05 2009 -0500
+++ b/tool-data/shared/genetrack/genetrack_sites.txt	Fri Nov 06 14:22:44 2009 -0500
@@ -1,3 +1,7 @@
 # GeneTrack sites
-main	http://www.genetrack.org/
-test	http://www.test.genetrack.org/
+#
+# The key(s) should be added to universe_wsgi.ini as the value of
+# genetrack_display_sites
+#
+# key		site url
+psu_galaxy	http://genetrack.g2.bx.psu.edu/galaxy
diff -r 411b53d32b78 -r 4a7f9fdead89 tool_conf.xml.sample
--- a/tool_conf.xml.sample	Fri Nov 06 09:57:05 2009 -0500
+++ b/tool_conf.xml.sample	Fri Nov 06 14:22:44 2009 -0500
@@ -125,6 +125,9 @@
     <tool file="visualization/GMAJ.xml" />
     <tool file="visualization/LAJ.xml" />
     <tool file="visualization/build_ucsc_custom_track.xml" />
+    <tool file="visualization/genetrack_indexer.xml" />
+    <tool file="visualization/genetrack_peak_prediction.xml" />
+  </section>
   </section>
   <section name="Regional Variation" id="regVar">
     <tool file="regVariation/windowSplitter.xml" />
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack.py
--- a/tools/visualization/genetrack.py	Fri Nov 06 09:57:05 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,189 +0,0 @@
-#!/usr/bin/env python
-"""
-Run GeneTrack(atlas) with a faked conf file to generate GeneTrack data files.
-
-usage: %prog
-    -l, --label=N: Data label for fit curve/peak plot
-    -1, --fits=N/N/N/N/N,...: Data files (interval format) for fit curve/peak plot
-    -2, --feats=N:M/N/N/N/N/N,...: Data files (interval format) for features.
-    -d, --data=N: Output path for hdf5 and sqlite databases.
-    -o, --output=N: Output path for export file.
-"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require("GeneTrack")
-pkg_resources.require("bx-python")
-
-import commands as oscommands
-from atlas import commands
-from atlas import sql
-from bx.cookbook import doc_optparse
-from bx.intervals import io
-
-import os
-import tempfile
-from functools import partial
-
-SIGMA = 20
-WIDTH = 5 * SIGMA
-EXCLUSION_ZONE = 147    
-
-def main(label, fit, feats, data_dir, output):
-    os.mkdir(data_dir)
-    conf = DummyConf(
-        __name__=label,
-        CLOBBER = True,
-        DATA_SIZE = 3*10**6,
-        MINIMUM_PEAK_SIZE = 0.1,
-        LOADER_ENABLED = False,
-        FITTER_ENABLED = False,
-        PREDICTOR_ENABLED = False,
-        EXPORTER_ENABLED = False,
-        LOADER = loader,
-        FITTER = fitter,
-        PREDICTOR = predictor,
-        EXPORTER = partial( commands.exporter, formatter=commands.bed_formatter),
-        HDF_DATABASE = os.path.join( data_dir, "data.hdf" ),
-        SQL_URI = "sqlite:///%s" % os.path.join( data_dir, "features.sqlite" ),
-        SIGMA = SIGMA,
-        WIDTH = WIDTH,
-        DATA_LABEL = label,
-        FIT_LABEL = "%s-SIGMA-%d" % ( label,SIGMA ),
-        PEAK_LABEL = "PRED-%s-SIGMA-%d" % ( label,SIGMA ),
-        EXCLUSION_ZONE = EXCLUSION_ZONE,
-        LEFT_SHIFT = EXCLUSION_ZONE / 2,
-        RIGHT_SHIFT = EXCLUSION_ZONE / 2,
-        EXPORT_LABELS = [ "PRED-%s-SIGMA-%d" % ( label,SIGMA ) ],
-        EXPORT_DIR = os.path.join( data_dir ),
-        DATA_FILE=fit and fit[1] or None,
-        fit=fit,
-        feats=feats,
-        )
-    if fit:
-        # Turn on fit processing.
-        conf.LOADER_ENABLED = True,
-        conf.FITTER_ENABLED = True,
-        conf.PREDICTOR_ENABLED = True,
-        conf.EXPORTER_ENABLED = True,
-    for feat in feats:
-        load_feature_files(conf, feats)
-    commands.execute(conf)
-    outname = "%s.%s.txt" % (conf.__name__, conf.EXPORT_LABELS[0] )
-    if os.path.exists( os.path.join(data_dir, outname) ):
-        os.rename( os.path.join(data_dir, outname), output)
-    
-# mod454 seems to be a module without a package.  The necessary funcitons are
-# stubbed out here until I'm sure of their final home. INS
-
-def loader( conf ):
-    from atlas import hdf
-    from mod454.schema import Mod454Schema as Schema
-    last_chrom = table = None
-    db = hdf.hdf_open( conf.HDF_DATABASE, mode='a', title='HDF database')
-    gp = hdf.create_group( db=db, name=conf.DATA_LABEL, desc='data group', clobber=conf.CLOBBER ) 
-    fit_meta = conf.fit[2]
-    # iterate over the file and insert into table
-    for line in open( conf.fit[1], "r" ):
-        if line.startswith("chrom"): continue  #Skip possible header
-        if line.startswith("#"): continue
-        fields = line.rstrip('\r\n').split('\t')
-        chrom = fields[fit_meta.chromCol]
-        if chrom != last_chrom:
-            if table: table.flush()
-            table = hdf.create_table( db=db, name=chrom, where=gp, schema=Schema, clobber=False )
-            last_chrom = chrom
-        try:
-            position = int(fields[fit_meta.positionCol])
-            forward = float(fields[fit_meta.forwardCol])
-            reverse = fit_meta.reverseCol > -1 and float(fields[fit_meta.reverseCol]) or 0.0
-            row = ( position, forward, reverse, forward+reverse, )
-            table.append( [ row ] )
-        except ValueError:
-            # Ignore bad lines
-            pass
-    table.flush()
-    db.close()
-    
-def fitter( conf ):
-    from mod454.fitter import fitter as mod454_fitter 
-    return mod454_fitter( conf )
-
-def predictor( conf ):
-    from mod454.predictor import predictor as mod454_predictor
-    return mod454_predictor( conf )
-
-def load_feature_files( conf, feats):
-    """
-    Loads features from file names
-    """
-    engine = sql.get_engine( conf.SQL_URI )
-    sql.drop_indices(engine)
-    conn = engine.connect()
-    for label, fname, col_spec in feats:
-        label_id = sql.make_label(engine, name=label, clobber=False)
-        reader = io.NiceReaderWrapper( open(fname,"r"),
-                                       chrom_col=col_spec.chromCol,
-                                       start_col=col_spec.startCol,
-                                       end_col=col_spec.endCol,
-                                       strand_col=col_spec.strandCol,
-                                       fix_strand=False )
-        values = list()
-        for interval in reader:
-            print interval
-            if not type( interval ) is io.GenomicInterval: continue
-            row = {'label_id':label_id, 
-                   'name':col_spec.nameCol == -1 and "%s-%s" % (str(interval.start), str(interval.end)) or interval.fields[col_spec.nameCol],
-                   'altname':"",
-                   'chrom':interval.chrom,
-                   'start':interval.start,
-                   'end':interval.end,
-                   'strand':interval.strand,
-                   'value':0,
-                   'freetext':""}
-            values.append(row)
-        insert = sql.feature_table.insert()
-        conn.execute( insert, values)
-    conn.close()
-    sql.create_indices(engine)
-
-
-class Bunch( object ):
-    def __init__(self, **kwargs):
-        for key,value in kwargs.items():
-            setattr( self, key, value ) 
-
-class DummyConf( Bunch ):
-    """
-    Fake conf module for genetrack/atlas.
-    """
-    pass
-
-if __name__ == "__main__":
-    options, args = doc_optparse.parse( __doc__ )
-    try:
-        label = options.label
-        if options.fits:
-            fit_name, fit_meta = options.fits.split(':')[0], [int(x)-1 for x in options.fits.split(':')[1:]]
-            fit_meta = Bunch(chromCol=fit_meta[0], positionCol=fit_meta[1], forwardCol=fit_meta[2], reverseCol=fit_meta[3])
-            fit = ( label, fit_name, fit_meta, )
-        else:
-            fit = []
-        # split apart the string into nested lists, preserves order
-        if options.feats:
-            feats = [ ( 
-                    feat_label,
-                    fname,
-                    Bunch(chromCol=int(chromCol)-1, startCol=int(startCol)-1, endCol=int(endCol)-1, 
-                         strandCol=int(strandCol)-1, nameCol=int(nameCol)-1),
-                    ) 
-                 for feat_label, fname, chromCol, startCol, endCol, strandCol, nameCol
-                 in ( feat.split(':') for feat in options.feats.split(',') if len(feat) > 0 )]
-        else:
-            feats = []
-        data_dir = options.data
-        output = options.output
-    except:
-        doc_optparse.exception()
-    
-    main(label, fit, feats, data_dir, output)
-    
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack.xml
--- a/tools/visualization/genetrack.xml	Fri Nov 06 09:57:05 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="genetrack1" name="GeneTrack">
-  
-    <description>Track creator/viewer</description>
-	
-	<code file="genetrack_code.py">
-         <hook exec_after_process="exec_after_process" />
-       </code>
-  	
-    <command interpreter="python">
-        genetrack.py -l $data_label
-        #if not str($fit_data) == "None"
-          -1 
-          ${fit_data}:${fit_data.metadata.chromCol}:${fit_data.metadata.positionCol}:${fit_data.metadata.forwardCol}:${fit_data.metadata.reverseCol}
-        #end if
-        #if $feature_data
-          -2
-        #end if 
-        #for $data in $feature_data
-          ${data.name}:${data.input}:${data.input.metadata.chromCol}:${data.input.metadata.startCol}:${data.input.metadata.endCol}:${data.input.metadata.strandCol}:${data.input.metadata.nameCol},
-        #end for
-        -d ${genetrack.files_path}
-        -o ${bed_out}
-    </command>
-    
-    <inputs>
-    	  <param name="data_label" type="text" label="Track Label" size="50">
-	        <validator type="regex" message="Please name the track with only alphanumeric characters.">[a-zA-Z0-9]{1,25}</validator>
-    	  </param>
-          <param name="fit_data" type="data" format="coverage" label="Coverage Dataset (optional)" optional="true" />
-    	  <repeat name="feature_data" title="Features">
-    	  	<param name="input" type="data" format="interval" label="Dataset" />
-    	  	<param name="name" type="text" label="Feature Type (mRNA, ESTs, ORFs, etc.)" size="25">
-	        	<validator type="regex" message="Please name the feature with only alphanumeric characters.">[a-zA-Z0-9]{1,25}</validator>
-		</param>
-    	  </repeat>
-   </inputs>
-
-   <outputs>  
-       <data format="genetrack" name="genetrack" />
-       <data format="bed" name="bed_out" />
-   </outputs>
-   
-   <requirements>
-     <requirement type="python-module">tables</requirement>
-     <requirement type="python-module">atlas</requirement>
-     <requirement type="python-module">pychartdir</requirement>
-     <requirement type="python-module">numpy</requirement>
-   </requirements>
- <help>
-This tool takes the input Fit Data and creates a peak and curve plot
-showing the reads and fitness on each base pair.  Features can be
-plotted below as tracks.  Fit data is coverage output from tools like
-the Lastz tool.  Features are simply interval datasets that may be
-plotted as tracks below the optional fit data.  Both the fit data and
-feature datasets are optional, but at least one of either is required
-to generate a track.
-
------
-
-**Syntax**
-
-- **Track Label** is the name of the generated track.
-
-- **Fit Data** is the dataset to calculate coverage/reads across
-    base pairs and generate a curve.  This is optional, and tracks may
-    be created simply showing features.
-
-- **Features** are datasets (interval format) to be plotted as tracks.
-    These are also optional, but at least 1 feature track or 1 fit
-    data is required to generate a track.
-
-</help>
-</tool>
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_code.py
--- a/tools/visualization/genetrack_code.py	Fri Nov 06 09:57:05 2009 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-import os
-from galaxy import eggs
-from galaxy import jobs
-from galaxy.tools.parameters import DataToolParameter
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """
-    Copy data_label to genetrack.metadata.label
-    """
-    out_data['genetrack'].metadata.label = param_dict['data_label']
-    out_data['genetrack'].info = "Use the link below to view the custom track."
-    out_data['bed_out'].info = ""
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_indexer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/visualization/genetrack_indexer.py	Fri Nov 06 14:22:44 2009 -0500
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+Wraps genetrack.scripts.tabs2genetrack so the tool can be executed from Galaxy.
+
+usage: %prog input output shift
+"""
+
+import sys
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "GeneTrack" )
+
+from genetrack.scripts import tabs2genetrack
+from genetrack import logger
+
+if __name__ == "__main__":
+
+    parser = tabs2genetrack.option_parser()
+
+    options, args = parser.parse_args()
+
+    # uppercase the format
+    options.format = options.format.upper()
+
+    if options.format not in ('BED', 'GFF'):
+        sys.stdout = sys.stderr
+        parser.print_help()
+        sys.exit(-1)
+
+    logger.disable(options.verbosity)
+
+    # missing file names
+    if not (options.inpname and options.outname and options.format):
+        parser.print_help()
+        sys.exit(-1)
+    else:
+        tabs2genetrack.transform(inpname=options.inpname, outname=options.outname,\
+            format=options.format, shift=options.shift, index=options.index)
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_indexer.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/visualization/genetrack_indexer.xml	Fri Nov 06 14:22:44 2009 -0500
@@ -0,0 +1,54 @@
+<tool id="bed2genetrack" name="Visualize BED file">
+  
+  <description> - creates a visualization from a bed file</description>
+
+  <command interpreter="python">
+    genetrack_indexer.py -i $input -o $output -s $shift -v 0 -f BED -x 
+  </command>
+    
+  <inputs>
+    
+    <param format="bed" name="input" type="data" help="Input data">
+      <label>Select input bed file</label>
+    </param>
+    
+    <param name="shift" size="4" type="integer" value="0" help="distance in basepairs">
+        <label>Shift at 5' end</label>
+    </param>
+
+    <!-- this parameter is currently not used, may not be feasible to use it
+    <param name="coverage" type="select" label="Full coverage">
+      <option value="no">NO</option>
+      <option value="yes">YES</option>
+    </param>
+    -->
+  
+  </inputs>
+
+  <outputs>  
+    <data format="genetrack" name="output" />
+  </outputs>
+   
+<help>
+**Help**
+
+This tool will create a visualization of the bed file that is selected. 
+
+**Parameters**
+
+- **Shift at 5' end** should be used when the location of interest is at a fixed distance from
+  the 5' end for **all sequenced fragments**! 
+  
+  For example if the sequenced sample consists
+  mono-nucleosomal DNA (146bp) we should expect that 
+  each nucleosome midpoint is located at 73 bp from the 5' end of the fragment. 
+  Therefore we would enter 73 as the shift parameter. Once corrected the reads 
+  on each strand will coincide and indicate the actual midpoints 
+  of the nucleosomes.
+  
+  When shifting the averaging process in GeneTrack is able correct for longer or shorter
+  than expected fragment sizes as long as the errors are reasonably random.
+
+</help>
+
+</tool>
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_peak_prediction.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/visualization/genetrack_peak_prediction.py	Fri Nov 06 14:22:44 2009 -0500
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+"""
+Wraps genetrack.scripts.peakpred so the tool can be executed from Galaxy.
+
+usage: %prog input output level sigma mode exclusion strand
+"""
+
+import sys
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "GeneTrack" )
+
+from genetrack.scripts import peakpred
+from genetrack import logger
+
+if __name__ == "__main__":
+
+    parser = peakpred.option_parser()
+
+    options, args = parser.parse_args()
+
+    logger.disable(options.verbosity)
+
+    from genetrack import conf
+
+    # trigger test mode
+    if options.test:
+        options.inpname = conf.testdata('test-hdflib-input.gtrack')
+        options.outname = conf.testdata('predictions.bed')
+
+    # missing input file name
+    if not options.inpname and not options.outname:
+        parser.print_help()
+    else:
+        print 'Sigma = %s' % options.sigma
+        print 'Minimum peak = %s' % options.level
+        print 'Peak-to-peak = %s' % options.exclude
+
+        peakpred.predict(options.inpname, options.outname, options)
diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_peak_prediction.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/visualization/genetrack_peak_prediction.xml	Fri Nov 06 14:22:44 2009 -0500
@@ -0,0 +1,59 @@
+<tool id="predict2genetrack" name="Peak predictor">
+  
+  <description> - generates peak predictions from an index file</description>
+
+  <command interpreter="python">
+      genetrack_peak_prediction.py -i $input -o $output --level=$level --sigma=$sigma --mode=$mode --exclusion=$exclusion --strand=$strand -v 0 
+  </command>
+    
+  <inputs>
+    
+    <param format="genetrack" name="input" type="data" help="Input data" label="Select input data"/>
+ 
+    <param name="method" type="select" label="Smoothing method" help="The function used to average nearby read values">
+      <option value="gauss">Gaussian kernel</option>
+      <!-- <option value="yes">Moving averages</option> -->
+    </param>
+  
+    <param name="sigma" size="4" type="integer" value="10" label="Smoothing factor" help="The interval over which each read is averaged" />
+        
+
+    <param name="mode" type="select" label="Peak prediction" help="Peak prediction method"> 
+      <option value="nolap">Maximal non-overlapping</option>
+      <!-- <option value="above">Above a threshold</option> -->
+      <option value="all">All peaks</option>
+    </param>
+  
+    <param name="exclusion" type="integer" size="4" value="0" help="The minimal distance between peaks"  label="Peak-to-peak distance">
+    </param>
+
+    <param name="level" size="4" type="float" value="1" label="Threshold" help="Return only peaks above this value" />
+    
+    <param name="strand" type="select" label="Strands" help="Combine strand data or predict on each strand separately">
+      <option value="all">Merge strands</option>
+      <!-- <option value="yes1">Above a threshold</option> -->
+      <option value="two">Separate strands</option>
+    </param>
+
+  </inputs>
+
+  <outputs>  
+    <data format="bed" name="output" />
+  </outputs>
+   
+<help>
+**Help**
+
+This tool will generate genome wide peak prediction from an index file.
+
+**Parameters**
+
+- **Smoothing method** the function used to average nearby values
+
+- **Smoothing value** the factor used in the method
+
+- **Prediction method** the function used to average nearby values
+
+</help>
+
+</tool>
diff -r 411b53d32b78 -r 4a7f9fdead89 universe_wsgi.ini.sample
--- a/universe_wsgi.ini.sample	Fri Nov 06 09:57:05 2009 -0500
+++ b/universe_wsgi.ini.sample	Fri Nov 06 14:22:44 2009 -0500
@@ -93,7 +93,8 @@
 # Comma separated list of UCSC / gbrowse / GeneTrack browsers to use for viewing
 ucsc_display_sites = main,test,archaea,ucla
 gbrowse_display_sites = main,test,tair
-genetrack_display_sites = main,test
+# Define your GeneTrack servers in tool-data/shared/genetrack/genetrack_sites.txt
+#genetrack_display_sites =
 
 # Serving static files (needed if running standalone)
 static_enabled = True

    

[galaxy-dev] [hg] galaxy 2976: Add Genetrack tools and display site

Greg Von Kuster