details: http://www.bx.psu.edu/hg/galaxy/rev/4a7f9fdead89 changeset: 2976:4a7f9fdead89 user: Nate Coraor <nate@bx.psu.edu> date: Fri Nov 06 14:22:44 2009 -0500 description: Add Genetrack tools and display site diffstat: lib/galaxy/datatypes/tracks.py | 22 +- lib/galaxy/web/controllers/genetrack.py | 293 -------------------------- tool-data/shared/genetrack/genetrack_sites.txt | 8 +- tool_conf.xml.sample | 3 + tools/visualization/genetrack.py | 189 ----------------- tools/visualization/genetrack.xml | 72 ------ tools/visualization/genetrack_code.py | 12 - tools/visualization/genetrack_indexer.py | 39 +++ tools/visualization/genetrack_indexer.xml | 54 ++++ tools/visualization/genetrack_peak_prediction.py | 40 +++ tools/visualization/genetrack_peak_prediction.xml | 59 +++++ universe_wsgi.ini.sample | 3 +- 12 files changed, 214 insertions(+), 580 deletions(-) diffs (871 lines): diff -r 411b53d32b78 -r 4a7f9fdead89 lib/galaxy/datatypes/tracks.py --- a/lib/galaxy/datatypes/tracks.py Fri Nov 06 09:57:05 2009 -0500 +++ b/lib/galaxy/datatypes/tracks.py Fri Nov 06 14:22:44 2009 -0500 @@ -20,26 +20,26 @@ class GeneTrack( data.Binary ): file_ext = "genetrack" - MetadataElement( name="hdf", default="data.hdf", desc="HDF DB", readonly=True, visible=True, no_value=0 ) - MetadataElement( name="sqlite", default="features.sqlite", desc="SQLite Features DB", readonly=True, visible=True, no_value=0 ) + MetadataElement( name="genetrack", default="data.genetrack", desc="HDF index", readonly=True, visible=True, no_value=0 ) MetadataElement( name="label", default="Custom", desc="Track Label", readonly=True, visible=True, no_value="Custom" ) def __init__(self, **kwargs): super( GeneTrack, self ).__init__( **kwargs ) - self.add_display_app( 'genetrack', 'View in GeneTrack', '', 'genetrack_link' ) + self.add_display_app( 'genetrack', 'View in', '', 'genetrack_link' ) def get_display_links( self, dataset, type, app, base_url, target_frame='galaxy_main', **kwd ): return data.Binary.get_display_links( self, dataset, type, app, base_url, target_frame=target_frame, **kwd ) def genetrack_link( self, hda, type, app, base_url ): ret_val = [] if hda.has_data: - # Get the disk file name + # Get the disk file name and data id file_name = hda.dataset.get_file_name() + data_id = hda.dataset.id # Make it secure - a = hmac_new( app.config.tool_secret, file_name ) - b = binascii.hexlify( file_name ) - encoded_file_name = "%s:%s" % ( a, b ) - for site_name, site_url in util.get_genetrack_sites(): - if site_name in app.config.genetrack_display_sites: - link = "%s?filename=%s" % ( site_url, encoded_file_name ) - ret_val.append( ( site_name, link ) ) + hashkey = hmac_new( app.config.tool_secret, file_name ) + encoded = binascii.hexlify( file_name ) + for name, url in util.get_genetrack_sites(): + if name.lower() in app.config.genetrack_display_sites: + # send both parameters filename and hashkey + link = "%s?filename=%s&hashkey=%s&id=%s&GALAXY_URL=%s" % ( url, encoded, hashkey, data_id, base_url ) + ret_val.append( ( name, link ) ) return ret_val diff -r 411b53d32b78 -r 4a7f9fdead89 lib/galaxy/web/controllers/genetrack.py --- a/lib/galaxy/web/controllers/genetrack.py Fri Nov 06 09:57:05 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,293 +0,0 @@ -import time, glob, os, sys -from itertools import cycle -from mako import exceptions -from mako.template import Template -from mako.lookup import TemplateLookup -from galaxy.web.base.controller import * -from galaxy.util.hash_util import * - -try: - import pkg_resources - pkg_resources.require("GeneTrack") - import atlas - from atlas import sql - from atlas import hdf - from atlas import util as atlas_utils - from atlas.web import formlib, feature_query, feature_filter - from atlas.web import label_cache as atlas_label_cache - from atlas.plotting.const import * - from atlas.plotting.tracks import prefab - from atlas.plotting.tracks import chart - from atlas.plotting import tracks -except Exception, exc: - raise ControllerUnavailable("GeneTrack could not import a required dependency: %s" % str(exc)) - -pkg_resources.require( "Paste" ) -import paste.httpexceptions - -# Database helpers -SHOW_LABEL_LIMIT = 10000 - -def list_labels(session): - """ - Returns a list of labels that will be plotted in order. - """ - labels = sql.Label - query = session.query(labels).order_by("-id") - return query - -def open_databases( conf ): - """ - A helper function that returns handles to the hdf and sql databases - """ - db = hdf.hdf_open( conf.HDF_DATABASE, mode='r' ) - session = sql.get_session( conf.SQL_URI ) - return db, session - -def hdf_query(db, name, param, autosize=False ): - """ - Schema specific hdf query. - Note that returns data as columns not rows. - """ - if not hdf.has_node(db=db, name=name): - atlas.warn( 'missing label %s' % name ) - return [], [], [], [] - data = hdf.GroupData( db=db, name=name) - istart, iend = data.get_indices(label=param.chrom, start=param.start, stop=param.end) - table = data.get_table(label=param.chrom) - if autosize: - # attempts to reduce the number of points - size = len( table.cols.ix[istart:iend] ) - step = max( [1, size/1200] ) - else: - step = 1 - - ix = table.cols.ix[istart:iend:step].tolist() - wx = table.cols.wx[istart:iend:step].tolist() - cx = table.cols.cx[istart:iend:step].tolist() - ax = table.cols.ax[istart:iend:step].tolist() - return ix, wx, cx, ax - -# Chart helpers -def build_tracks( param, conf, data_label, fit_label, pred_label, strand, show=False ): - """ - Builds tracks - """ - # gets all the labels for a fast lookup - label_cache = atlas_label_cache( conf ) - - # get database handles for hdf and sql - db, session = open_databases( conf ) - - # fetching x and y coordinates for bar and fit (line) for - # each strand plus (p), minus (m), all (a) - bix, bpy, bmy, bay = hdf_query( db=db, name=data_label, param=param ) - fix, fpy, fmy, fay = hdf_query( db=db, name=fit_label, param=param ) - - # close the hdf database - db.close() - - # get all features within the range - all = feature_query( session=session, param=param ) - - # draws the barchart and the nucleosome chart below it - if strand == 'composite': - bar = prefab.composite_bartrack( fix=fix, fay=fay, bix=bix, bay=bay, param=param) - else: - bar = prefab.twostrand_bartrack( fix=fix, fmy=fmy, fpy=fpy, bix=bix, bmy=bmy, bpy=bpy, param=param) - - charts = list() - charts.append( bar ) - - return charts - -def feature_chart(param=None, session=None, label=None, label_dict={}, color=cycle( [LIGHT, WHITE] ) ): - all = feature_filter(feature_query(session=session, param=param), name=label, kdict=label_dict) - flipped = [] - for feature in all: - if feature.strand == "-": - feature.start, feature.end = feature.end, feature.start - flipped.append(feature) - opts = track_options( - xscale=param.xscale, w=param.width, fgColor=PURPLE, - show_labels=param.show_labels, ylabel=str(label), - bgColor=color.next() - ) - return [ - tracks.split_tracks(features=flipped, options=opts, split=param.show_labels, track_type='vector') - ] - -def consolidate_charts( charts, param ): - # create the multiplot - opt = chart_options( w=param.width ) - multi = chart.MultiChart(options=opt, charts=charts) - return multi - -# SETUP Track Builders -import functools -def twostrand_tracks( param=None, conf=None ): - return build_tracks( data_label=conf.LABEL, fit_label=conf.FIT_LABEL, pred_label=conf.PRED_LABEL, param=param, conf=conf, strand='twostrand') -def composite_tracks( param=None, conf=None ): - return build_tracks( data_label=conf.LABEL, fit_label=conf.FIT_LABEL, pred_label=conf.PRED_LABEL, param=param, conf=conf, strand='composite') - -class BaseConf( object ): - """ - Fake web_conf for atlas. - """ - IMAGE_DIR = "static/genetrack/plots/" - LEVELS = [str(x) for x in [ 50, 100, 250, 500, 1000, 2500, 5000, 10000, 20000, 50000, 100000, 200000 ]] - ZOOM_LEVELS = zip(LEVELS, LEVELS) - PLOT_SETUP = [ - ('comp-id', 'Composite' , 'genetrack/index.html', composite_tracks ), - ('two-id' , 'Two Strand', 'genetrack/index.html', twostrand_tracks ), - ] - PLOT_CHOICES = [ (id, name) for (id, name, page, func) in PLOT_SETUP ] - PLOT_MAPPER = dict( [ (id, (page, func)) for (id, name, page, func) in PLOT_SETUP ] ) - - def __init__(self, **kwds): - for key,value in kwds.items(): - setattr( self, key, value) - -class WebRoot(BaseController): - @web.expose - def search(self, trans, word='', dataset_id=None, submit=''): - """ - Default search page - """ - data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataset_id ) - if not data: - raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) ) - # the main configuration file - conf = BaseConf( - TITLE = "<i>%s</i>: %s" % (data.metadata.dbkey, data.metadata.label), - HDF_DATABASE = os.path.join( data.extra_files_path, data.metadata.hdf ), - SQL_URI = "sqlite:///%s" % os.path.join( data.extra_files_path, data.metadata.sqlite ), - LABEL = data.metadata.label, - FIT_LABEL = "%s-SIGMA-%d" % (data.metadata.label, 20), - PRED_LABEL = "PRED-%s-SIGMA-%d" % (data.metadata.label, 20), - ) - - param = atlas.Param( word=word ) - - # search for a given - try: - session = sql.get_session( conf.SQL_URI ) - except: - return trans.fill_template_mako('genetrack/invalid.html', dataset_id=dataset_id) - - if param.word: - def search_query( word, text ): - query = session.query(sql.Feature).filter( "name LIKE :word or freetext LIKE :text" ).params(word=word, text=text) - query = list(query[:20]) - return query - - # a little heuristics to match most likely target - targets = [ - (param.word+'%', 'No match'), # match beginning - ('%'+param.word+'%', 'No match'), # match name anywhere - ('%'+param.word+'%', '%'+param.word+'%'), # match json anywhere - ] - for word, text in targets: - query = search_query( word=word, text=text) - if query: - break - else: - query = [] - - return trans.fill_template_mako('genetrack/search.html', param=param, query=query, dataset_id=dataset_id) - - @web.expose - def index(self, trans, dataset_id=None, **kwds): - """ - Main request handler - """ - color = cycle( [LIGHT, WHITE] ) - data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataset_id ) - if not data: - raise paste.httpexceptions.HTTPRequestRangeNotSatisfiable( "Invalid reference dataset id: %s." % str( dataset_id ) ) - # the main configuration file - conf = BaseConf( - TITLE = "<i>%s</i>: %s" % (data.metadata.dbkey, data.metadata.label), - HDF_DATABASE = os.path.join( data.extra_files_path, data.metadata.hdf ), - SQL_URI = "sqlite:///%s" % os.path.join( data.extra_files_path, data.metadata.sqlite ), - LABEL = data.metadata.label, - FIT_LABEL = "%s-SIGMA-%d" % (data.metadata.label, 20), - PRED_LABEL = "PRED-%s-SIGMA-%d" % (data.metadata.label, 20), - ) - - try: - session = sql.get_session( conf.SQL_URI ) - except: - return trans.fill_template_mako('genetrack/invalid.html', dataset_id=dataset_id) - - if os.path.exists( conf.HDF_DATABASE ): - db = hdf.hdf_open( conf.HDF_DATABASE, mode='r' ) - conf.CHROM_FIELDS = [(x,x) for x in hdf.GroupData(db=db, name=conf.LABEL).labels] - db.close() - else: - query = session.execute(sql.select([sql.feature_table.c.chrom]).distinct()) - conf.CHROM_FIELDS = [(x.chrom,x.chrom) for x in query] - - # generate a new form based on the configuration - form = formlib.main_form( conf ) - - # clear the tempdir every once in a while - atlas_utils.clear_tempdir( dir=conf.IMAGE_DIR, days=1, chance=10) - - incoming = form.defaults() - incoming.update( kwds ) - - # manage the zoom and pan requests - incoming = formlib.zoom_change( kdict=incoming, levels=conf.LEVELS) - incoming = formlib.pan_view( kdict=incoming ) - - # process the form - param = atlas.Param( **incoming ) - form.process( incoming ) - - if kwds and form.isSuccessful(): - # adds the sucessfull parameters - param.update( form.values() ) - - # if it was a search word not a number go to search page - try: - center = int( param.feature ) - except ValueError: - # go and search for these - return trans.response.send_redirect( web.url_for( controller='genetrack', action='search', word=param.feature, dataset_id=dataset_id ) ) - - param.width = min( [2000, int(param.img_size)] ) - param.xscale = [ param.start, param.end ] - param.show_labels = ( param.end - param.start ) <= SHOW_LABEL_LIMIT - - # get the template and the function used to generate the tracks - tmpl_name, track_maker = conf.PLOT_MAPPER[param.plot] - - # check against a hash, display an image that already exists if it was previously created. - hash = new_secure_hash() - hash.update(str(dataset_id)) - for key in sorted(kwds.keys()): - hash.update(str(kwds[key])) - fname = "%s.png" % hash.hexdigest() - fpath = os.path.join(conf.IMAGE_DIR, fname) - - charts = [] - param.fname = fname - - # The SHA1 hash should uniquely identify the qs that created the plot... - if os.path.exists(fpath): - os.utime(fpath, (time.time(), time.time())) - return trans.fill_template_mako(tmpl_name, conf=conf, form=form, param=param, dataset_id=dataset_id) - - # If the hashed filename doesn't exist, create it. - if track_maker is not None and os.path.exists( conf.HDF_DATABASE ): - # generate the fit track - charts = track_maker( param=param, conf=conf ) - - for label in list_labels( session ): - charts.extend( feature_chart(param=param, session=session, label=label.name, label_dict={label.name:label.id}, color=color)) - - track_chart = consolidate_charts( charts, param ) - track_chart.save(fname=fpath) - - return trans.fill_template_mako(tmpl_name, conf=conf, form=form, param=param, dataset_id=dataset_id) diff -r 411b53d32b78 -r 4a7f9fdead89 tool-data/shared/genetrack/genetrack_sites.txt --- a/tool-data/shared/genetrack/genetrack_sites.txt Fri Nov 06 09:57:05 2009 -0500 +++ b/tool-data/shared/genetrack/genetrack_sites.txt Fri Nov 06 14:22:44 2009 -0500 @@ -1,3 +1,7 @@ # GeneTrack sites -main http://www.genetrack.org/ -test http://www.test.genetrack.org/ +# +# The key(s) should be added to universe_wsgi.ini as the value of +# genetrack_display_sites +# +# key site url +psu_galaxy http://genetrack.g2.bx.psu.edu/galaxy diff -r 411b53d32b78 -r 4a7f9fdead89 tool_conf.xml.sample --- a/tool_conf.xml.sample Fri Nov 06 09:57:05 2009 -0500 +++ b/tool_conf.xml.sample Fri Nov 06 14:22:44 2009 -0500 @@ -125,6 +125,9 @@ <tool file="visualization/GMAJ.xml" /> <tool file="visualization/LAJ.xml" /> <tool file="visualization/build_ucsc_custom_track.xml" /> + <tool file="visualization/genetrack_indexer.xml" /> + <tool file="visualization/genetrack_peak_prediction.xml" /> + </section> </section> <section name="Regional Variation" id="regVar"> <tool file="regVariation/windowSplitter.xml" /> diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack.py --- a/tools/visualization/genetrack.py Fri Nov 06 09:57:05 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,189 +0,0 @@ -#!/usr/bin/env python -""" -Run GeneTrack(atlas) with a faked conf file to generate GeneTrack data files. - -usage: %prog - -l, --label=N: Data label for fit curve/peak plot - -1, --fits=N/N/N/N/N,...: Data files (interval format) for fit curve/peak plot - -2, --feats=N:M/N/N/N/N/N,...: Data files (interval format) for features. - -d, --data=N: Output path for hdf5 and sqlite databases. - -o, --output=N: Output path for export file. -""" -from galaxy import eggs -import pkg_resources -pkg_resources.require("GeneTrack") -pkg_resources.require("bx-python") - -import commands as oscommands -from atlas import commands -from atlas import sql -from bx.cookbook import doc_optparse -from bx.intervals import io - -import os -import tempfile -from functools import partial - -SIGMA = 20 -WIDTH = 5 * SIGMA -EXCLUSION_ZONE = 147 - -def main(label, fit, feats, data_dir, output): - os.mkdir(data_dir) - conf = DummyConf( - __name__=label, - CLOBBER = True, - DATA_SIZE = 3*10**6, - MINIMUM_PEAK_SIZE = 0.1, - LOADER_ENABLED = False, - FITTER_ENABLED = False, - PREDICTOR_ENABLED = False, - EXPORTER_ENABLED = False, - LOADER = loader, - FITTER = fitter, - PREDICTOR = predictor, - EXPORTER = partial( commands.exporter, formatter=commands.bed_formatter), - HDF_DATABASE = os.path.join( data_dir, "data.hdf" ), - SQL_URI = "sqlite:///%s" % os.path.join( data_dir, "features.sqlite" ), - SIGMA = SIGMA, - WIDTH = WIDTH, - DATA_LABEL = label, - FIT_LABEL = "%s-SIGMA-%d" % ( label,SIGMA ), - PEAK_LABEL = "PRED-%s-SIGMA-%d" % ( label,SIGMA ), - EXCLUSION_ZONE = EXCLUSION_ZONE, - LEFT_SHIFT = EXCLUSION_ZONE / 2, - RIGHT_SHIFT = EXCLUSION_ZONE / 2, - EXPORT_LABELS = [ "PRED-%s-SIGMA-%d" % ( label,SIGMA ) ], - EXPORT_DIR = os.path.join( data_dir ), - DATA_FILE=fit and fit[1] or None, - fit=fit, - feats=feats, - ) - if fit: - # Turn on fit processing. - conf.LOADER_ENABLED = True, - conf.FITTER_ENABLED = True, - conf.PREDICTOR_ENABLED = True, - conf.EXPORTER_ENABLED = True, - for feat in feats: - load_feature_files(conf, feats) - commands.execute(conf) - outname = "%s.%s.txt" % (conf.__name__, conf.EXPORT_LABELS[0] ) - if os.path.exists( os.path.join(data_dir, outname) ): - os.rename( os.path.join(data_dir, outname), output) - -# mod454 seems to be a module without a package. The necessary funcitons are -# stubbed out here until I'm sure of their final home. INS - -def loader( conf ): - from atlas import hdf - from mod454.schema import Mod454Schema as Schema - last_chrom = table = None - db = hdf.hdf_open( conf.HDF_DATABASE, mode='a', title='HDF database') - gp = hdf.create_group( db=db, name=conf.DATA_LABEL, desc='data group', clobber=conf.CLOBBER ) - fit_meta = conf.fit[2] - # iterate over the file and insert into table - for line in open( conf.fit[1], "r" ): - if line.startswith("chrom"): continue #Skip possible header - if line.startswith("#"): continue - fields = line.rstrip('\r\n').split('\t') - chrom = fields[fit_meta.chromCol] - if chrom != last_chrom: - if table: table.flush() - table = hdf.create_table( db=db, name=chrom, where=gp, schema=Schema, clobber=False ) - last_chrom = chrom - try: - position = int(fields[fit_meta.positionCol]) - forward = float(fields[fit_meta.forwardCol]) - reverse = fit_meta.reverseCol > -1 and float(fields[fit_meta.reverseCol]) or 0.0 - row = ( position, forward, reverse, forward+reverse, ) - table.append( [ row ] ) - except ValueError: - # Ignore bad lines - pass - table.flush() - db.close() - -def fitter( conf ): - from mod454.fitter import fitter as mod454_fitter - return mod454_fitter( conf ) - -def predictor( conf ): - from mod454.predictor import predictor as mod454_predictor - return mod454_predictor( conf ) - -def load_feature_files( conf, feats): - """ - Loads features from file names - """ - engine = sql.get_engine( conf.SQL_URI ) - sql.drop_indices(engine) - conn = engine.connect() - for label, fname, col_spec in feats: - label_id = sql.make_label(engine, name=label, clobber=False) - reader = io.NiceReaderWrapper( open(fname,"r"), - chrom_col=col_spec.chromCol, - start_col=col_spec.startCol, - end_col=col_spec.endCol, - strand_col=col_spec.strandCol, - fix_strand=False ) - values = list() - for interval in reader: - print interval - if not type( interval ) is io.GenomicInterval: continue - row = {'label_id':label_id, - 'name':col_spec.nameCol == -1 and "%s-%s" % (str(interval.start), str(interval.end)) or interval.fields[col_spec.nameCol], - 'altname':"", - 'chrom':interval.chrom, - 'start':interval.start, - 'end':interval.end, - 'strand':interval.strand, - 'value':0, - 'freetext':""} - values.append(row) - insert = sql.feature_table.insert() - conn.execute( insert, values) - conn.close() - sql.create_indices(engine) - - -class Bunch( object ): - def __init__(self, **kwargs): - for key,value in kwargs.items(): - setattr( self, key, value ) - -class DummyConf( Bunch ): - """ - Fake conf module for genetrack/atlas. - """ - pass - -if __name__ == "__main__": - options, args = doc_optparse.parse( __doc__ ) - try: - label = options.label - if options.fits: - fit_name, fit_meta = options.fits.split(':')[0], [int(x)-1 for x in options.fits.split(':')[1:]] - fit_meta = Bunch(chromCol=fit_meta[0], positionCol=fit_meta[1], forwardCol=fit_meta[2], reverseCol=fit_meta[3]) - fit = ( label, fit_name, fit_meta, ) - else: - fit = [] - # split apart the string into nested lists, preserves order - if options.feats: - feats = [ ( - feat_label, - fname, - Bunch(chromCol=int(chromCol)-1, startCol=int(startCol)-1, endCol=int(endCol)-1, - strandCol=int(strandCol)-1, nameCol=int(nameCol)-1), - ) - for feat_label, fname, chromCol, startCol, endCol, strandCol, nameCol - in ( feat.split(':') for feat in options.feats.split(',') if len(feat) > 0 )] - else: - feats = [] - data_dir = options.data - output = options.output - except: - doc_optparse.exception() - - main(label, fit, feats, data_dir, output) - diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack.xml --- a/tools/visualization/genetrack.xml Fri Nov 06 09:57:05 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -<tool id="genetrack1" name="GeneTrack"> - - <description>Track creator/viewer</description> - - <code file="genetrack_code.py"> - <hook exec_after_process="exec_after_process" /> - </code> - - <command interpreter="python"> - genetrack.py -l $data_label - #if not str($fit_data) == "None" - -1 - ${fit_data}:${fit_data.metadata.chromCol}:${fit_data.metadata.positionCol}:${fit_data.metadata.forwardCol}:${fit_data.metadata.reverseCol} - #end if - #if $feature_data - -2 - #end if - #for $data in $feature_data - ${data.name}:${data.input}:${data.input.metadata.chromCol}:${data.input.metadata.startCol}:${data.input.metadata.endCol}:${data.input.metadata.strandCol}:${data.input.metadata.nameCol}, - #end for - -d ${genetrack.files_path} - -o ${bed_out} - </command> - - <inputs> - <param name="data_label" type="text" label="Track Label" size="50"> - <validator type="regex" message="Please name the track with only alphanumeric characters.">[a-zA-Z0-9]{1,25}</validator> - </param> - <param name="fit_data" type="data" format="coverage" label="Coverage Dataset (optional)" optional="true" /> - <repeat name="feature_data" title="Features"> - <param name="input" type="data" format="interval" label="Dataset" /> - <param name="name" type="text" label="Feature Type (mRNA, ESTs, ORFs, etc.)" size="25"> - <validator type="regex" message="Please name the feature with only alphanumeric characters.">[a-zA-Z0-9]{1,25}</validator> - </param> - </repeat> - </inputs> - - <outputs> - <data format="genetrack" name="genetrack" /> - <data format="bed" name="bed_out" /> - </outputs> - - <requirements> - <requirement type="python-module">tables</requirement> - <requirement type="python-module">atlas</requirement> - <requirement type="python-module">pychartdir</requirement> - <requirement type="python-module">numpy</requirement> - </requirements> - <help> -This tool takes the input Fit Data and creates a peak and curve plot -showing the reads and fitness on each base pair. Features can be -plotted below as tracks. Fit data is coverage output from tools like -the Lastz tool. Features are simply interval datasets that may be -plotted as tracks below the optional fit data. Both the fit data and -feature datasets are optional, but at least one of either is required -to generate a track. - ------ - -**Syntax** - -- **Track Label** is the name of the generated track. - -- **Fit Data** is the dataset to calculate coverage/reads across - base pairs and generate a curve. This is optional, and tracks may - be created simply showing features. - -- **Features** are datasets (interval format) to be plotted as tracks. - These are also optional, but at least 1 feature track or 1 fit - data is required to generate a track. - -</help> -</tool> diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_code.py --- a/tools/visualization/genetrack_code.py Fri Nov 06 09:57:05 2009 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -import os -from galaxy import eggs -from galaxy import jobs -from galaxy.tools.parameters import DataToolParameter - -def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): - """ - Copy data_label to genetrack.metadata.label - """ - out_data['genetrack'].metadata.label = param_dict['data_label'] - out_data['genetrack'].info = "Use the link below to view the custom track." - out_data['bed_out'].info = "" diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_indexer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/visualization/genetrack_indexer.py Fri Nov 06 14:22:44 2009 -0500 @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +Wraps genetrack.scripts.tabs2genetrack so the tool can be executed from Galaxy. + +usage: %prog input output shift +""" + +import sys +from galaxy import eggs +import pkg_resources +pkg_resources.require( "GeneTrack" ) + +from genetrack.scripts import tabs2genetrack +from genetrack import logger + +if __name__ == "__main__": + + parser = tabs2genetrack.option_parser() + + options, args = parser.parse_args() + + # uppercase the format + options.format = options.format.upper() + + if options.format not in ('BED', 'GFF'): + sys.stdout = sys.stderr + parser.print_help() + sys.exit(-1) + + logger.disable(options.verbosity) + + # missing file names + if not (options.inpname and options.outname and options.format): + parser.print_help() + sys.exit(-1) + else: + tabs2genetrack.transform(inpname=options.inpname, outname=options.outname,\ + format=options.format, shift=options.shift, index=options.index) diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_indexer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/visualization/genetrack_indexer.xml Fri Nov 06 14:22:44 2009 -0500 @@ -0,0 +1,54 @@ +<tool id="bed2genetrack" name="Visualize BED file"> + + <description> - creates a visualization from a bed file</description> + + <command interpreter="python"> + genetrack_indexer.py -i $input -o $output -s $shift -v 0 -f BED -x + </command> + + <inputs> + + <param format="bed" name="input" type="data" help="Input data"> + <label>Select input bed file</label> + </param> + + <param name="shift" size="4" type="integer" value="0" help="distance in basepairs"> + <label>Shift at 5' end</label> + </param> + + <!-- this parameter is currently not used, may not be feasible to use it + <param name="coverage" type="select" label="Full coverage"> + <option value="no">NO</option> + <option value="yes">YES</option> + </param> + --> + + </inputs> + + <outputs> + <data format="genetrack" name="output" /> + </outputs> + +<help> +**Help** + +This tool will create a visualization of the bed file that is selected. + +**Parameters** + +- **Shift at 5' end** should be used when the location of interest is at a fixed distance from + the 5' end for **all sequenced fragments**! + + For example if the sequenced sample consists + mono-nucleosomal DNA (146bp) we should expect that + each nucleosome midpoint is located at 73 bp from the 5' end of the fragment. + Therefore we would enter 73 as the shift parameter. Once corrected the reads + on each strand will coincide and indicate the actual midpoints + of the nucleosomes. + + When shifting the averaging process in GeneTrack is able correct for longer or shorter + than expected fragment sizes as long as the errors are reasonably random. + +</help> + +</tool> diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_peak_prediction.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/visualization/genetrack_peak_prediction.py Fri Nov 06 14:22:44 2009 -0500 @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +Wraps genetrack.scripts.peakpred so the tool can be executed from Galaxy. + +usage: %prog input output level sigma mode exclusion strand +""" + +import sys +from galaxy import eggs +import pkg_resources +pkg_resources.require( "GeneTrack" ) + +from genetrack.scripts import peakpred +from genetrack import logger + +if __name__ == "__main__": + + parser = peakpred.option_parser() + + options, args = parser.parse_args() + + logger.disable(options.verbosity) + + from genetrack import conf + + # trigger test mode + if options.test: + options.inpname = conf.testdata('test-hdflib-input.gtrack') + options.outname = conf.testdata('predictions.bed') + + # missing input file name + if not options.inpname and not options.outname: + parser.print_help() + else: + print 'Sigma = %s' % options.sigma + print 'Minimum peak = %s' % options.level + print 'Peak-to-peak = %s' % options.exclude + + peakpred.predict(options.inpname, options.outname, options) diff -r 411b53d32b78 -r 4a7f9fdead89 tools/visualization/genetrack_peak_prediction.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/visualization/genetrack_peak_prediction.xml Fri Nov 06 14:22:44 2009 -0500 @@ -0,0 +1,59 @@ +<tool id="predict2genetrack" name="Peak predictor"> + + <description> - generates peak predictions from an index file</description> + + <command interpreter="python"> + genetrack_peak_prediction.py -i $input -o $output --level=$level --sigma=$sigma --mode=$mode --exclusion=$exclusion --strand=$strand -v 0 + </command> + + <inputs> + + <param format="genetrack" name="input" type="data" help="Input data" label="Select input data"/> + + <param name="method" type="select" label="Smoothing method" help="The function used to average nearby read values"> + <option value="gauss">Gaussian kernel</option> + <!-- <option value="yes">Moving averages</option> --> + </param> + + <param name="sigma" size="4" type="integer" value="10" label="Smoothing factor" help="The interval over which each read is averaged" /> + + + <param name="mode" type="select" label="Peak prediction" help="Peak prediction method"> + <option value="nolap">Maximal non-overlapping</option> + <!-- <option value="above">Above a threshold</option> --> + <option value="all">All peaks</option> + </param> + + <param name="exclusion" type="integer" size="4" value="0" help="The minimal distance between peaks" label="Peak-to-peak distance"> + </param> + + <param name="level" size="4" type="float" value="1" label="Threshold" help="Return only peaks above this value" /> + + <param name="strand" type="select" label="Strands" help="Combine strand data or predict on each strand separately"> + <option value="all">Merge strands</option> + <!-- <option value="yes1">Above a threshold</option> --> + <option value="two">Separate strands</option> + </param> + + </inputs> + + <outputs> + <data format="bed" name="output" /> + </outputs> + +<help> +**Help** + +This tool will generate genome wide peak prediction from an index file. + +**Parameters** + +- **Smoothing method** the function used to average nearby values + +- **Smoothing value** the factor used in the method + +- **Prediction method** the function used to average nearby values + +</help> + +</tool> diff -r 411b53d32b78 -r 4a7f9fdead89 universe_wsgi.ini.sample --- a/universe_wsgi.ini.sample Fri Nov 06 09:57:05 2009 -0500 +++ b/universe_wsgi.ini.sample Fri Nov 06 14:22:44 2009 -0500 @@ -93,7 +93,8 @@ # Comma separated list of UCSC / gbrowse / GeneTrack browsers to use for viewing ucsc_display_sites = main,test,archaea,ucla gbrowse_display_sites = main,test,tair -genetrack_display_sites = main,test +# Define your GeneTrack servers in tool-data/shared/genetrack/genetrack_sites.txt +#genetrack_display_sites = # Serving static files (needed if running standalone) static_enabled = True