- galaxy-commits - lists.galaxyproject.org

galaxy-dist commit 1e8d7c6ad88b: Add support for tool dependency injection to all cluster runners
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User James Taylor <james(a)jamestaylor.org> # Date 1288983797 14400 # Node ID 1e8d7c6ad88b074b33b2029f3baa15c2eb929e70 # Parent 32e5efb7a7d5606a4dc1c8808fe4888ade2892e8 Add support for tool dependency injection to all cluster runners --- a/lib/galaxy/tools/deps/__init__.py +++ b/lib/galaxy/tools/deps/__init__.py @@ -51,7 +51,7 @@ class DependencyManager( object ): if os.path.exists( script ): return script, path, version else: - return None, None + return None, None, None def _find_dep_default( self, name ): version = None @@ -64,6 +64,6 @@ class DependencyManager( object ): if os.path.exists( script ): return script, real_path, real_version else: - return None, None + return None, None, None --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -0,0 +1,32 @@ +import os, os.path + +class BaseJobRunner( object ): + + def build_command_line( self, job_wrapper, include_metadata=False ): + """ + Compose the sequence of commands neccesary to execute a job. This will + currently include: + - environment settings corresponding to any requirement tags + - command line taken from job wrapper + - commands to set metadata (if include_metadata is True) + """ + commands = job_wrapper.get_command_line() + # All job runners currently handle this case which should never + # occur + if not commands: + return None + # Prepend dependency injection + if job_wrapper.dependency_shell_commands: + commands = "; ".join( job_wrapper.dependency_shell_commands + [ commands ] ) + # Append metadata setting commands, we don't want to overwrite metadata + # that was copied over in init_meta(), as per established behavior + if include_metadata and self.app.config.set_metadata_externally: + commands += "; cd %s; " % os.path.abspath( os.getcwd() ) + commands += job_wrapper.setup_external_metadata( + exec_dir = os.path.abspath( os.getcwd() ), + tmp_dir = self.app.config.new_file_path, + dataset_files_path = self.app.model.Dataset.file_path, + output_fnames = output_fnames, + set_extension = False, + kwds = { 'overwrite' : False } ) + return commands --- a/lib/galaxy/jobs/runners/local.py +++ b/lib/galaxy/jobs/runners/local.py @@ -5,13 +5,14 @@ import threading from galaxy import model from galaxy.datatypes.data import nice_size +from galaxy.jobs.runners import BaseJobRunner import os, errno from time import sleep log = logging.getLogger( __name__ ) -class LocalJobRunner( object ): +class LocalJobRunner( BaseJobRunner ): """ Job runner backed by a finite pool of worker threads. FIFO scheduling """ @@ -53,9 +54,7 @@ class LocalJobRunner( object ): # Prepare the job to run try: job_wrapper.prepare() - command_line = job_wrapper.get_command_line() - if job_wrapper.dependency_shell_commands: - command_line = "; ".join( job_wrapper.dependency_shell_commands + [ command_line ] ) + command_line = self.build_command_line( job_wrapper ) except: job_wrapper.fail( "failure preparing job", exception=True ) log.exception("failure running job %d" % job_wrapper.job_id) --- a/lib/galaxy/jobs/runners/sge.py +++ b/lib/galaxy/jobs/runners/sge.py @@ -2,6 +2,8 @@ import os, logging, threading, time from Queue import Queue, Empty from galaxy import model +from galaxy.jobs.runners import BaseJobRunner + from paste.deploy.converters import asbool import pkg_resources @@ -58,7 +60,7 @@ class SGEJobState( object ): self.efile = None self.runner_url = None -class SGEJobRunner( object ): +class SGEJobRunner( BaseJobRunner ): """ Job runner backed by a finite pool of worker threads. FIFO scheduling """ @@ -144,7 +146,7 @@ class SGEJobRunner( object ): try: job_wrapper.prepare() - command_line = job_wrapper.get_command_line() + command_line = self.build_command_line( job_wrapper, include_metadata = True ) except: job_wrapper.fail( "failure preparing job", exception=True ) log.exception("failure running job %d" % job_wrapper.job_id) @@ -191,14 +193,7 @@ class SGEJobRunner( object ): jt.nativeSpecification = ' '.join(nativeSpec) script = sge_template % (job_wrapper.galaxy_lib_dir, os.path.abspath( job_wrapper.working_directory ), command_line) - if self.app.config.set_metadata_externally: - script += "cd %s\n" % os.path.abspath( os.getcwd() ) - script += "%s\n" % job_wrapper.setup_external_metadata( exec_dir = os.path.abspath( os.getcwd() ), - tmp_dir = self.app.config.new_file_path, - dataset_files_path = self.app.model.Dataset.file_path, - output_fnames = job_wrapper.get_output_fnames(), - set_extension = False, - kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior + fh = file( jt.remoteCommand, "w" ) fh.write( script ) fh.close() --- a/lib/galaxy/jobs/runners/drmaa.py +++ b/lib/galaxy/jobs/runners/drmaa.py @@ -2,6 +2,8 @@ import os, logging, threading, time from Queue import Queue, Empty from galaxy import model +from galaxy.jobs.runners import BaseJobRunner + from paste.deploy.converters import asbool import pkg_resources @@ -58,7 +60,7 @@ class DRMAAJobState( object ): self.efile = None self.runner_url = None -class DRMAAJobRunner( object ): +class DRMAAJobRunner( BaseJobRunner ): """ Job runner backed by a finite pool of worker threads. FIFO scheduling """ @@ -120,7 +122,7 @@ class DRMAAJobRunner( object ): try: job_wrapper.prepare() - command_line = job_wrapper.get_command_line() + command_line = self.build_command_line( job_wrapper, include_metadata=True ) except: job_wrapper.fail( "failure preparing job", exception=True ) log.exception("failure running job %d" % job_wrapper.job_id) @@ -154,14 +156,6 @@ class DRMAAJobRunner( object ): jt.nativeSpecification = native_spec script = drm_template % (job_wrapper.galaxy_lib_dir, os.path.abspath( job_wrapper.working_directory ), command_line) - if self.app.config.set_metadata_externally: - script += "cd %s\n" % os.path.abspath( os.getcwd() ) - script += "%s\n" % job_wrapper.setup_external_metadata( exec_dir = os.path.abspath( os.getcwd() ), - tmp_dir = self.app.config.new_file_path, - dataset_files_path = self.app.model.Dataset.file_path, - output_fnames = job_wrapper.get_output_fnames(), - set_extension = False, - kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior fh = file( jt.remoteCommand, "w" ) fh.write( script ) fh.close() --- a/lib/galaxy/jobs/runners/pbs.py +++ b/lib/galaxy/jobs/runners/pbs.py @@ -5,6 +5,7 @@ from Queue import Queue, Empty from galaxy import model from galaxy.datatypes.data import nice_size from galaxy.util.bunch import Bunch +from galaxy.jobs.runners import BaseJobRunner from paste.deploy.converters import asbool @@ -80,7 +81,7 @@ class PBSJobState( object ): self.check_count = 0 self.stop_job = False -class PBSJobRunner( object ): +class PBSJobRunner( BaseJobRunner ): """ Job runner backed by a finite pool of worker threads. FIFO scheduling """ @@ -183,7 +184,7 @@ class PBSJobRunner( object ): try: job_wrapper.prepare() - command_line = job_wrapper.get_command_line() + command_line = self.build_command_line( job_wrapper, include_metadata=not( self.app.config.pbs_stage_path ) ) except: job_wrapper.fail( "failure preparing job", exception=True ) log.exception("failure running job %d" % job_wrapper.job_id) @@ -253,14 +254,6 @@ class PBSJobRunner( object ): script = pbs_symlink_template % (job_wrapper.galaxy_lib_dir, " ".join(job_wrapper.get_input_fnames() + output_files), self.app.config.pbs_stage_path, exec_dir, command_line) else: script = pbs_template % ( job_wrapper.galaxy_lib_dir, exec_dir, command_line ) - if self.app.config.set_metadata_externally: - script += "cd %s\n" % os.path.abspath( os.getcwd() ) - script += "%s\n" % job_wrapper.setup_external_metadata( exec_dir = os.path.abspath( os.getcwd() ), - tmp_dir = self.app.config.new_file_path, - dataset_files_path = self.app.model.Dataset.file_path, - output_fnames = output_fnames, - set_extension = False, - kwds = { 'overwrite' : False } ) #we don't want to overwrite metadata that was copied over in init_meta(), as per established behavior job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id) fh = file(job_file, "w") fh.write(script)

1 0

galaxy-dist commit eb79ab327351: Remove missing hapmapmart tool from sample tool conf
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288983276 14400 # Node ID eb79ab327351e4789a89748269fc34428f791463 # Parent 11abfc8eed60c0bdd231d06992f72001a2c785fa Remove missing hapmapmart tool from sample tool conf --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -319,7 +319,6 @@ <label text="Data: Import and upload" id="rgimport" /><tool file="data_source/upload.xml"/><tool file="data_source/access_libraries.xml" /> - <tool file="data_source/hapmapmart.xml" /><label text="Data: Filter and Clean" id="rgfilter" /><tool file="rgenetics/rgClean.xml"/><tool file="rgenetics/rgPedSub.xml"/>

1 0

galaxy-dist commit 7e9ae4605105: Add a new "Files uploaded via FTP" grid to the upload form and related parameter types, form fields, etc.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288969490 14400 # Node ID 7e9ae46051057d822aff82a9b1f3aec0200086f2 # Parent cdd6ce1f38b0e22802ed63e3e0cb4f6a58cb93b2 Add a new "Files uploaded via FTP" grid to the upload form and related parameter types, form fields, etc. --- a/tools/data_source/upload.xml +++ b/tools/data_source/upload.xml @@ -1,6 +1,6 @@ <?xml version="1.0"?> -<tool name="Upload File" id="upload1" version="1.1.1"> +<tool name="Upload File" id="upload1" version="1.1.2"><description> from your computer </description> @@ -29,6 +29,7 @@ <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator></param><param name="url_paste" type="text" area="true" size="5x35" label="URL/Text" help="Here you may specify a list of URLs (one per line) or paste the contents of a file."/> + <param name="ftp_files" type="ftpfile" label="Files uploaded via FTP"/><param name="space_to_tab" type="select" display="checkboxes" multiple="True" label="Convert spaces to tabs" help="Use this option if you are entering intervals by hand."><option value="Yes">Yes</option></param> --- a/lib/galaxy/web/form_builder.py +++ b/lib/galaxy/web/form_builder.py @@ -2,9 +2,10 @@ Classes for generating HTML forms """ -import logging,sys +import logging, sys, os, time from cgi import escape -from galaxy.util import restore_text +from galaxy.util import restore_text, relpath, nice_size +from galaxy.web import url_for log = logging.getLogger(__name__) @@ -145,6 +146,68 @@ class FileField(BaseField): ajax_text = ' galaxy-ajax-upload="true"' return '<input type="file" name="%s%s"%s%s>' % ( prefix, self.name, ajax_text, value_text ) +class FTPFileField(BaseField): + """ + An FTP file upload input. + """ + thead = ''' + <table id="grid-table" class="grid"> + <thead id="grid-table-header"> + <tr> + <th id="select-header"></th> + <th id="name-header"> + File + </th> + <th id="size-header"> + Size + </th> + <th id="date-header"> + Date + </th> + </tr> + </thead> + <tbody id="grid-table-body"> + ''' + trow = ''' + <tr> + <td><input type="checkbox" name="%s%s" value="%s"/></td> + <td>%s</td> + <td>%s</td> + <td>%s</td> + </tr> + ''' + tfoot = ''' + </tbody> + </table> + ''' + def __init__( self, name, dir, ftp_site, value = None ): + self.name = name + self.dir = dir + self.ftp_site = ftp_site + self.value = value + def get_html( self, prefix="" ): + rval = FTPFileField.thead + if self.dir is None: + rval += '<tr><td colspan="3"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', referer=url_for( controller='root' ) ) ) + elif not os.path.exists( self.dir ): + rval += '<tr><td colspan="3"><em>Your FTP upload directory contains no files.</em></td></tr>' + else: + uploads = [] + for ( dirpath, dirnames, filenames ) in os.walk( self.dir ): + for filename in filenames: + path = relpath( os.path.join( dirpath, filename ), self.dir ) + statinfo = os.lstat( os.path.join( dirpath, filename ) ) + uploads.append( dict( path=path, + size=nice_size( statinfo.st_size ), + ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) ) + if not uploads: + rval += '<tr><td colspan="3"><em>Your FTP upload directory contains no files.</em></td></tr>' + for upload in uploads: + rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] ) + rval += FTPFileField.tfoot + rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP. To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password).</div>' % self.ftp_site + return rval + class HiddenField(BaseField): """ A hidden field. --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -40,6 +40,11 @@ class ToolParameter( object ): for elem in param.findall("validator"): self.validators.append( validation.Validator.from_element( self, elem ) ) + @property + def visible( self ): + """Return true if the parameter should be rendered on the form""" + return True + def get_label( self ): """Return user friendly name for the parameter""" if self.label: return self.label @@ -362,6 +367,41 @@ class FileToolParameter( ToolParameter ) def get_initial_value( self, trans, context ): return None +class FTPFileToolParameter( ToolParameter ): + """ + Parameter that takes a file uploaded via FTP as a value. + """ + def __init__( self, tool, elem ): + """ + Example: C{<param name="bins" type="file" />} + """ + ToolParameter.__init__( self, tool, elem ) + @property + def visible( self ): + if self.tool.app.config.ftp_upload_dir is None or self.tool.app.config.ftp_upload_site is None: + return False + return True + def get_html_field( self, trans=None, value=None, other_values={} ): + if trans is None or trans.user is None: + user_ftp_dir = None + else: + user_ftp_dir = os.path.join( trans.app.config.ftp_upload_dir, trans.user.email ) + return form_builder.FTPFileField( self.name, user_ftp_dir, trans.app.config.ftp_upload_site, value = value ) + def from_html( self, value, trans=None, other_values={} ): + return util.listify( value ) + def to_string( self, value, app ): + if value in [ None, '' ]: + return None + elif isinstance( value, unicode ) or isinstance( value, str ) or isinstance( value, list ): + return value + def to_python( self, value, app ): + if value is None: + return None + elif isinstance( value, unicode ) or isinstance( value, str ) or isinstance( value, list ): + return value + def get_initial_value( self, trans, context ): + return None + class HiddenToolParameter( ToolParameter ): """ Parameter that takes one of two values. @@ -1427,6 +1467,7 @@ parameter_types = dict( text = Te hidden = HiddenToolParameter, baseurl = BaseURLToolParameter, file = FileToolParameter, + ftpfile = FTPFileToolParameter, data = DataToolParameter, drill_down = DrillDownSelectToolParameter ) --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -480,6 +480,32 @@ def umask_fix_perms( path, umask, unmask current_group, e ) ) +def nice_size(size): + """ + Returns a readably formatted string with the size + + >>> nice_size(100) + '100.0 bytes' + >>> nice_size(10000) + '9.8 Kb' + >>> nice_size(1000000) + '976.6 Kb' + >>> nice_size(100000000) + '95.4 Mb' + """ + words = [ 'bytes', 'Kb', 'Mb', 'Gb' ] + try: + size = float( size ) + except: + return '??? bytes' + for ind, word in enumerate(words): + step = 1024 ** (ind + 1) + if step > size: + size = size / float(1024 ** ind) + out = "%.1f %s" % (size, word) + return out + return '??? bytes' + galaxy_root_path = os.path.join(__path__[0], "..","..","..") # The dbnames list is used in edit attributes and the upload tool dbnames = read_dbnames( os.path.join( galaxy_root_path, "tool-data", "shared", "ucsc", "builds.txt" ) ) --- a/universe_wsgi.ini.sample +++ b/universe_wsgi.ini.sample @@ -324,6 +324,18 @@ use_interactive = True # Enable the (experimental! beta!) Web API. Documentation forthcoming. #enable_api = False +# Enable Galaxy's "Upload via FTP" interface. You'll need to install and +# configure an FTP server (we've used ProFTPd since it can use Galaxy's +# database for authentication) and set the following two options. + +# This should point to a directory containing subdirectories matching users' +# email addresses, where Galaxy will look for files. +#ftp_upload_dir = None + +# This should be the hostname of your FTP server, which will be provided to +# users in the help text. +#ftp_upload_site = None + # -- Job Execution # If running multiple Galaxy processes, one can be designated as the job --- a/templates/tool_form.mako +++ b/templates/tool_form.mako @@ -51,7 +51,9 @@ function checkUncheckAll( name, check ) <%def name="do_inputs( inputs, tool_state, errors, prefix, other_values=None )"><% other_values = ExpressionContext( tool_state, other_values ) %> %for input_index, input in enumerate( inputs.itervalues() ): - %if input.type == "repeat": + %if not input.visible: + <% pass %> + %elif input.type == "repeat": <div class="repeat-group"><div class="form-title-row"><b>${input.title_plural}</b></div><% repeat_state = tool_state[input.name] %> --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -929,7 +929,11 @@ class Tool: assert isinstance( out_data, odict ) return 'tool_executed.mako', dict( out_data=out_data ) except: - return 'message.mako', dict( status='error', message='odict not returned from tool execution', refresh_frames=[] ) + if isinstance( out_data, str ): + message = out_data + else: + message = 'Failure executing tool (odict not returned from tool execution)' + return 'message.mako', dict( status='error', message=message, refresh_frames=[] ) # Otherwise move on to the next page else: state.page += 1 --- a/lib/galaxy/tools/parameters/grouping.py +++ b/lib/galaxy/tools/parameters/grouping.py @@ -12,11 +12,14 @@ import StringIO, os, urllib from galaxy.datatypes import sniff from galaxy.util.bunch import Bunch from galaxy.util.odict import odict -from galaxy.util import json +from galaxy.util import json, relpath class Group( object ): def __init__( self ): self.name = None + @property + def visible( self ): + return True def value_to_basic( self, value, app ): """ Convert value to a (possibly nested) representation using only basic @@ -267,6 +270,7 @@ class UploadDataset( Group ): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] + ftp_files = context['ftp_files'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) space_to_tab = False @@ -281,6 +285,31 @@ class UploadDataset( Group ): if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) + # look for files uploaded via FTP + valid_files = [] + if ftp_files: + if trans.user is None: + log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files ) + ftp_files = [] + # TODO: warning to the user (could happen if session has become invalid) + else: + user_ftp_dir = os.path.join( trans.app.config.ftp_upload_dir, trans.user.email ) + for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): + for filename in filenames: + path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) + if not os.path.islink( os.path.join( dirpath, filename ) ): + valid_files.append( path ) + for ftp_file in ftp_files: + if ftp_file not in valid_files: + log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file ) + continue + # TODO: warning to the user (could happen if file is already imported) + ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ), + 'filename' : os.path.basename( ftp_file ) } + file_bunch = get_data_file_filename( ftp_data_file, override_name = name, override_info = info ) + if file_bunch.path: + file_bunch.space_to_tab = space_to_tab + rval.append( file_bunch ) return rval file_type = self.get_file_type( context ) d_type = self.get_datatype( trans, context ) --- a/static/june_2007_style/blue/base.css +++ b/static/june_2007_style/blue/base.css @@ -39,8 +39,7 @@ div.form-title-row{padding:5px 10px;} div.repeat-group-item{border-left:solid #d8b365 5px;margin-left:10px;margin-bottom:10px;} div.form-row-error{background:#FFCCCC;} div.form-row label{font-weight:bold;display:block;margin-bottom:.2em;} -div.form-row-input{float:left;width:300px;} -div.form-row-input > input{max-width:300px;} +div.form-row-input{float:left;} div.form-row-input label{font-weight:normal;display:inline;} div.form-row-error-message{width:300px;float:left;color:red;font-weight:bold;padding:3px 0 0 1em;} select,input,textarea{font:inherit;font-size:115%;} --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -419,36 +419,16 @@ class Newick( Text ): # ------------- Utility methods -------------- +# nice_size used to be here, but to resolve cyclical dependencies it's been +# moved to galaxy.util. It belongs there anyway since it's used outside +# datatypes. +nice_size = util.nice_size + def get_test_fname( fname ): """Returns test data filename""" path, name = os.path.split(__file__) full_path = os.path.join( path, 'test', fname ) return full_path -def nice_size(size): - """ - Returns a readably formatted string with the size - - >>> nice_size(100) - '100.0 bytes' - >>> nice_size(10000) - '9.8 Kb' - >>> nice_size(1000000) - '976.6 Kb' - >>> nice_size(100000000) - '95.4 Mb' - """ - words = [ 'bytes', 'Kb', 'Mb', 'Gb' ] - try: - size = float( size ) - except: - return '??? bytes' - for ind, word in enumerate(words): - step = 1024 ** (ind + 1) - if step > size: - size = size / float(1024 ** ind) - out = "%.1f %s" % (size, word) - return out - return '??? bytes' def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ): """ Returns the first LINE_COUNT lines wrapped to WIDTH --- a/lib/galaxy/tools/actions/upload.py +++ b/lib/galaxy/tools/actions/upload.py @@ -21,7 +21,7 @@ class UploadToolAction( ToolAction ): upload_common.cleanup_unused_precreated_datasets( precreated_datasets ) if not uploaded_datasets: - return 'No data was entered in the upload form, please go back and choose data to upload.' + return None, 'No data was entered in the upload form, please go back and choose data to upload.' json_file_path = upload_common.create_paramfile( trans, uploaded_datasets ) data_list = [ ud.data for ud in uploaded_datasets ] --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -99,6 +99,10 @@ class Configuration( object ): self.user_library_import_dir = kwargs.get( 'user_library_import_dir', None ) if self.user_library_import_dir is not None and not os.path.exists( self.user_library_import_dir ): raise ConfigurationError( "user_library_import_dir specified in config (%s) does not exist" % self.user_library_import_dir ) + self.ftp_upload_dir = kwargs.get( 'ftp_upload_dir', None ) + if self.ftp_upload_dir is not None and not os.path.exists( self.ftp_upload_dir ): + os.makedirs( self.ftp_upload_dir ) + self.ftp_upload_site = kwargs.get( 'ftp_upload_site', None ) self.allow_library_path_paste = kwargs.get( 'allow_library_path_paste', False ) self.disable_library_comptypes = kwargs.get( 'disable_library_comptypes', '' ).lower().split( ',' ) # Location for dependencies --- a/static/june_2007_style/base.css.tmpl +++ b/static/june_2007_style/base.css.tmpl @@ -212,11 +212,6 @@ div.form-row label { div.form-row-input { float: left; - width: 300px; -} - -div.form-row-input > input { - max-width: 300px; } div.form-row-input label {

1 0

galaxy-dist commit 9545e8675a5d: Fix uploads when ftp_file is not defined.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288978480 14400 # Node ID 9545e8675a5d43408d75c66fcf3115cbdaf4af8a # Parent 7e9ae46051057d822aff82a9b1f3aec0200086f2 Fix uploads when ftp_file is not defined. --- a/lib/galaxy/tools/parameters/grouping.py +++ b/lib/galaxy/tools/parameters/grouping.py @@ -287,7 +287,7 @@ class UploadDataset( Group ): rval.append( file_bunch ) # look for files uploaded via FTP valid_files = [] - if ftp_files: + if ftp_files is not None: if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files ) ftp_files = [] @@ -299,6 +299,8 @@ class UploadDataset( Group ): path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): valid_files.append( path ) + else: + ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file )

1 0

galaxy-dist commit cdd6ce1f38b0: Enable filter tool to operate correctly on files with a variable number of columns (such as SAM files). Tool prepares and processes only columns up to and including the right-most column (i.e. column with the largest index) in the filtering condition, and subsequent columns are ignored. New functional test added for this functionality as well.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1288902634 14400 # Node ID cdd6ce1f38b0e22802ed63e3e0cb4f6a58cb93b2 # Parent f372ea5a601443a9b9cd1d3f7e04ddc24db5b57a Enable filter tool to operate correctly on files with a variable number of columns (such as SAM files). Tool prepares and processes only columns up to and including the right-most column (i.e. column with the largest index) in the filtering condition, and subsequent columns are ignored. New functional test added for this functionality as well. --- a/tools/stats/filtering.py +++ b/tools/stats/filtering.py @@ -60,10 +60,18 @@ for operand in operands: except: if operand in secured: stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) ) + +# Find the largest column used in the filter. +largest_col_index = -1 +for match in re.finditer( 'c(\d)+', cond_text ): + col_index = int( match.group()[1:] ) + if col_index > largest_col_index: + largest_col_index = col_index -# Prepare the column variable names and wrappers for column data types +# Prepare the column variable names and wrappers for column data types. Only +# prepare columns up to largest column in condition. cols, type_casts = [], [] -for col in range( 1, in_columns + 1 ): +for col in range( 1, largest_col_index + 1 ): col_name = "c%d" % col cols.append( col_name ) col_type = in_column_types[ col - 1 ] @@ -72,7 +80,7 @@ for col in range( 1, in_columns + 1 ): col_str = ', '.join( cols ) # 'c1, c2, c3, c4' type_cast_str = ', '.join( type_casts ) # 'str(c1), int(c2), int(c3), str(c4)' -assign = "%s = line.split( '\\t' )" % col_str +assign = "%s, = line.split( '\\t' )[:%i]" % ( col_str, largest_col_index ) wrap = "%s = %s" % ( col_str, type_cast_str ) skipped_lines = 0 first_invalid_line = 0 --- a/tools/stats/filtering.xml +++ b/tools/stats/filtering.xml @@ -23,6 +23,13 @@ <param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/><output name="out_file1" file="filter1_test2.bed"/></test> +  + <test> + <param name="input" value="filter1_in3.sam"/> + <param name="cond" value="c3=='chr1' and c5>5"/> + <output name="out_file1" file="filter1_test3.sam"/> + </test> + </tests><help> --- /dev/null +++ b/test-data/filter1_in3.sam @@ -0,0 +1,100 @@ +HWI-EAS269B:8:34:797:623 145 chr1 16632182 255 27M = 16631977 0 CCATTTCCTGTATGCTGTAAAGTACAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:40:39:1184 145 chr1 24620331 3 27M = 24620115 0 ATTTATGTGGTTTCGTTTACCTTCTAT IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:2 CC:Z:chrM CP:i:9060 +HWI-EAS269B:8:58:533:1198 81 chr1 88426079 255 27M = 88423429 0 GAAGAGGAAGAAGGTGGGGAGGAAGAG IIIG?IIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:89:1776:1815 97 chr1 134085638 255 27M = 134085824 0 GAATGATTCTCTGGGTGTTACTTTGCA IIIIIIIIIIIIIIIDIII:IIII>F5 NM:i:0 NH:i:1 +HWI-EAS269B:8:74:1134:1670 161 chr1 138166886 255 27M = 138167084 0 TTACTAGTGTCTCTCTTACCATCATAT .IIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 NH:i:1 +HWI-EAS269:3:59:1321:1427 147 chr1 173149715 255 27M = 173149555 0 AAGGGCTAGGGTGACAGGCAGGGGACG -C<CID?IIIIIIIDIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:8:164:1678 145 chr1 178660716 255 27M = 178660493 0 CAATTGGTGTTTTTCTTAAGAGACTCA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:1:1048:638 137 chr10 12456478 1 27M * 0 0 TAAAANAAATAAAACAAAACAATAAAA $'&(+")'%$(&*&#&$$#%%$$%%$$ NM:i:2 NH:i:4 CC:Z:chr16 CP:i:21639623 +HWI-EAS269:3:62:1211:798 147 chr10 28135294 255 27M = 28135117 0 ACATGGTGTGGGGACAGAGATGTGAAG I;IIIIIIAIIIIIIIIIIIIIIIIII NM:i:1 NH:i:1 +HWI-EAS269:3:27:410:424 145 chr10 76950070 255 27M = 76949871 0 GAGTCTGTGTCCAGGCCAATTCACTAT 25-8I+.6B.IIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:48:1180:1122 73 chr10 83772157 0 27M * 0 0 GATCATCTTTTCTAAAACAATAAAGAC /IIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:6 CC:Z:chr11 CP:i:93811140 +HWI-EAS269:3:77:654:131 99 chr10 93419810 255 27M = 93419993 0 TTGCATCCCTAGGAACTGGAGTTATAG IIIIIIGIHIIIED@CIIH5I3D9G6: NM:i:0 NH:i:1 +HWI-EAS269B:8:98:895:1810 177 chr11 3371472 255 27M = 37952212 0 CTACATAGTGGGAACCGGCGACCGCTG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:2 NH:i:1 +HWI-EAS269B:8:58:1126:883 97 chr11 30080486 255 27M = 30080702 0 TACTTCCACCAGGCTCCAGTTTTGTGA IIIIIIIIIIIIIIIIIIIIIIIIII5 NM:i:0 NH:i:1 +HWI-EAS269B:8:89:1763:1446 81 chr11 59589605 255 27M = 59589400 0 GTACCTGGCCACTGATGCAGCTTAGAA II<IIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:6:739:1329 161 chr11 68629628 3 27M = 68629835 0 GTTTTTGGTTGATAGTTGAGCAAACTG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:2 CC:Z:chr4 CP:i:126661877 +HWI-EAS269:3:19:1683:1350 81 chr11 70846915 255 27M = 70846725 0 CACGGGCAGATAAGCTGCTGAGACTAA H50BB:IF=IIGIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:9:1691:1087 97 chr11 84640783 255 27M = 84641010 0 ACATTATCCATCTCTCTGTCATTGTCC IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:48:1350:424 81 chr11 94150331 255 27M = 94150113 0 TTGCAAGCCAACCCTGAGTGAAGTGTC IIDHIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:22:1176:1405 97 chr11 98629843 255 27M = 98630030 0 CCCTCGGAGGGCAACGGGAAAGGAGAA IIIIIIIIIIIDIIIII?III/79E14 NM:i:0 NH:i:1 +HWI-EAS269:3:67:1311:1512 97 chr11 100488190 255 27M = 100488374 0 CATGCCCTGAGACTTAGCAAGACTCTT IIIIIIIIIIIIIIIEIA3,I57GBI@ NM:i:0 NH:i:1 +HWI-EAS269B:8:41:1142:1307 161 chr11 101320424 255 27M = 101320625 0 GCCTCCCTACATAGCAAAGGAAAGAAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:31:1101:21 97 chr11 116711287 255 27M = 116711485 0 CATAAGCAAAAGATACACCATGTTTTA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:84:637:1203 145 chr12 35829424 255 27M = 35829229 0 CTCCCTCAAGGATCTTGTGGGGCATCA </'>I,I5B26$II;EB=%IIII1III NM:i:0 NH:i:1 +HWI-EAS269:3:65:1158:1081 163 chr12 52234332 255 27M = 52234505 0 TTTTTTTTTTAAGACAGGGAGTTTTTT IIIIIIIIIIIC4,,*>II%-,%III> NM:i:1 NH:i:1 +HWI-EAS269B:8:65:1325:1287 137 chr12 52234513 3 27M * 0 0 GCAGCACCTTGTGACATTTAATTTAGT IIIII+IIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:2 CC:Z:chr15 CP:i:98780443 +HWI-EAS269:3:38:488:1093 97 chr12 80471642 255 27M = 80471839 0 TGGAGAGATGACTCCTTGTTTGAGACA IIIIIIIIIIIIIIIIIIIBAE0B<7D NM:i:0 NH:i:1 +HWI-EAS269:3:76:290:1451 99 chr12 117745566 0 27M = 117745739 0 CAATCTGAAGATCCACAATCTTTTATA IIIIIIIIIIIIIIFII5I9IIII+I7 NM:i:0 NH:i:5 CC:Z:chr4 CP:i:133520935 +HWI-EAS269B:8:39:353:1526 65 chr13 18122558 255 27M = 18122761 0 ACTCTACTCAAAACCACACTAAGCCTC @IIIIII6IIIIIDII9IIIIII<III NM:i:0 NH:i:1 +HWI-EAS269:3:63:1260:365 99 chr13 41278147 255 27M = 41278319 0 AAGACAAGAACTTATCCACCAATATGT IIIIIIIIIIIIIII<IIA<CIIDII> NM:i:0 NH:i:1 +HWI-EAS269:3:70:152:1609 161 chr13 55421426 255 27M = 55421687 0 CGCTTGACCATTCCAGCCCAGACAAGA IIIIIIIII8IIII3IIII-C(3-%B' NM:i:0 NH:i:1 +HWI-EAS269:3:9:1343:1414 99 chr13 83721797 255 27M = 83721967 0 AACTACAGCTGAGGCAGCCTCCTGCCT IIIIIIIIIIIIIIIII;E+?7&819& NM:i:0 NH:i:1 +HWI-EAS269:3:4:1480:1956 137 chr14 46703849 0 27M * 0 0 GTGGAGCCCAGTGACACCATCGAGAAC IIIIIIIIIIIII?C3IH?1@I@@=27 NM:i:0 NH:i:8 CC:Z:= CP:i:46704077 +HWI-EAS269:3:86:616:1519 137 chr14 56244763 255 27M * 0 0 CATGGCCTGAAGTTCCTGAGCTTTATC IIIIIIIII3DIBI9II3)73BIG'G+ NM:i:0 NH:i:1 +HWI-EAS269:3:42:656:1216 97 chr14 100271567 255 27M = 100271751 0 ATAAGTCTCAGTTCTTGGGCCAGATCA IIIIIIIIICIIIIII3IA854I1C+) NM:i:0 NH:i:1 +HWI-EAS269:3:87:95:767 73 chr16 7368880 255 27M * 0 0 AATATCTGAAACATTCAAATGAGCAAT F>II/4I@9H=II?IIIIIIIIIIII- NM:i:0 NH:i:1 +HWI-EAS269B:8:50:1572:711 145 chr16 32230400 255 27M = 32230196 0 TACAGCAACAACAAATTCAACGACACG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:56:1436:121 147 chr16 49887529 255 27M = 49887356 0 TTAAGGCCCAGCTCTACATAAAACACT IIIIAHIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:53:584:1696 99 chr17 45705815 255 27M = 45705987 0 CCTTCTTGTCCAAGATCTCCTTCATGA IIIIIIIIIIIIBI6IIII=II:GI@< NM:i:0 NH:i:1 +HWI-EAS269B:8:16:643:1950 177 chr18 38000738 255 27M = 38000570 0 ATTCTGGTTCAGCCTGGGCAGCTTGGG III@==I0<IIGIII;IIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:66:1760:1539 137 chr18 38624681 3 27M * 0 0 TTTGGAACTTTTGAGAGGATCCCTAGC IIIIIIIIIIIIIII1II.IIIII1II NM:i:0 NH:i:2 +HWI-EAS269:3:2:1723:1277 163 chr19 16341705 255 27M = 16341870 0 ATGGGCTCGTCGCAGCTCAGCGGCTGG IIIIIIIIIIIIICI=8<&I7:F(+;2 NM:i:1 NH:i:1 +HWI-EAS269B:8:66:317:1676 97 chr19 47099316 255 27M = 47099535 0 ATCTGGTCAATCAACACCACCAGCAGA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:10:1428:1315 81 chr2 22444224 3 27M = 22444039 0 GGAATTGCGATAATTATAGTGGCTGAT ?6>CI>GIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:2 CC:Z:chrM CP:i:6245 +HWI-EAS269:3:72:152:785 97 chr2 22796881 255 27M = 22797074 0 TGAGTACTCCAGATAATCGTTACACAA IIIIIIII6IIIIIIII0IA@?,7752 NM:i:0 NH:i:1 +HWI-EAS269:3:79:321:1095 99 chr2 62083760 255 27M = 62083938 0 CAAAAATTGAGATATCAAAAAGCTCTT IIIIIIIII2IGIBIIC8:4IB1H4I; NM:i:0 NH:i:1 +HWI-EAS269:3:93:1529:881 97 chr2 129998362 255 27M = 129998827 0 CGCATGCCAGGAGGGGGCATGCCCATT IIIFIIIIFIIIIIFII?72I.,020* NM:i:0 NH:i:1 +HWI-EAS269B:8:20:813:376 113 chr2 130534278 255 27M = 130534454 0 TGTATTAGCAGGAGGTGGGGAGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:89:1221:1082 83 chr2 151285071 0 27M = 151284944 0 ACCAGTGCACAGGTCTCCAGGGCTTCT -IC=B+8IIIEIIFI5IIIIIIIIIII NM:i:0 NH:i:5 CC:Z:chr9 CP:i:31743980 +HWI-EAS269:3:66:46:336 163 chr2 156679837 255 27M = 156680016 0 TATTTTCCTTTTGCTGTGGTTTGTGTT IIIIIIIIIIIIIEI@E-I?GI&B%3I NM:i:0 NH:i:1 +HWI-EAS269B:8:52:1139:1381 73 chr2 174953004 0 27M * 0 0 AATGCTCAACTCTTAGTTTCTTATTCA IIIIIIIIIIIIIIIIB@?IIIII?+0 NM:i:0 NH:i:19 CC:Z:= CP:i:175018018 +HWI-EAS269:3:98:585:19 161 chr3 26005087 255 27M = 26005271 0 ACCTAACACATGTAAACTTAAATTCAT I:IIIIIIII<IIE==DDI):,D1--8 NM:i:0 NH:i:1 +HWI-EAS269:3:74:447:1309 83 chr3 80492396 255 27M = 80492220 0 TCGGATGCCTCTCACCACTTTGACAAT )H():F=85IIIIEIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:74:425:1427 161 chr3 96447603 255 27M = 96447820 0 TGGAGTAGGAGTCTCAGGAGGAGTAGA IIIIIIIIIIIIIIIIIIIII:I?II< NM:i:0 NH:i:1 +HWI-EAS269:3:10:739:1474 163 chr3 124089371 255 27M = 124089535 0 AAAAAAACAATCTTATTCCGAGCATTC IIIIIIIIIIIIIIIIIIIIIIIGIII NM:i:0 NH:i:1 +HWI-EAS269:3:20:357:1709 65 chr4 128895987 255 27M = 128895822 0 CCTACCTTCTTCCCTTGGCAGCTGACT IIII>IIIBIIA:I18):,*3&,/*'+ NM:i:0 NH:i:1 +HWI-EAS269B:8:22:623:129 113 chr4 135492867 255 27M = 135490613 0 CCACTTTCCTGTACTGGCCAGAAAATG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:48:824:702 65 chr4 139584010 255 27M = 139582899 0 CTGGGCAGTGCAGCGGTACATGGAGCC IIIIIIIIIIIIIIIIII<AIIB;I>: NM:i:0 NH:i:1 +HWI-EAS269B:8:33:527:1211 73 chr5 37627410 255 27M * 0 0 GGGAAGGTGTGAGTACAACAGCCAAAG IIIIIIIIIIIIIIIIIIIIIIII@II NM:i:0 NH:i:1 +HWI-EAS269:3:2:1518:599 163 chr5 136908928 255 27M = 136909101 0 CTATTGCCAAAAAACTATGTTCACAAA IIIIIIIIIIIIIIIIIIIIIIIII=I NM:i:0 NH:i:1 +HWI-EAS269B:8:62:1493:1455 161 chr5 138715332 255 27M = 138715534 0 CCCAAATGAAAAAATAAATATTATGAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:69:384:1894 147 chr5 143665243 255 27M = 143665064 0 TGTTTGCTCCAACCAACTGCTGTCGCC &++*,2H-IGI+IFIGI?IIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:56:1358:762 113 chr6 15024174 255 7M12235N20M = 51485453 0 TGGGTACTTTCTCTAGCTCCTCCATTG /@)3I?IIIIIIIIIIIIIIIIIIIII NM:i:2 XS:A:- NH:i:1 +HWI-EAS269:3:100:228:1799 163 chr6 37756378 3 27M = 37756558 0 GTGTGGGGCTGCGTGGCCTGGCTGGTG DII4A+>5EI.)F634820&1(0%&&& NM:i:2 NH:i:2 +HWI-EAS269B:8:50:188:1253 161 chr6 52296684 1 27M = 52296901 0 ACTTTTCAGGGTTTTCAATAGTCACAC IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 NH:i:3 CC:Z:= CP:i:140695441 +HWI-EAS269B:8:19:440:1687 161 chr6 52850377 255 27M = 52850580 0 TACAAAGATGGACTTTTAAAATTCATT IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:47:1555:1520 161 chr6 67231655 255 27M = 67233919 0 ATTCAGTTATGGACCATCATTTCCGGA IIIIIIIIIIIIIIIIIIIIII<AII1 NM:i:0 NH:i:1 +HWI-EAS269B:8:79:1563:1318 161 chr6 89267414 255 27M = 89267614 0 GTGCGTGTTCCAGGCAGAGCTGGAAGA IIIIIIIIIIIIIIIIIIIIIII=;IA NM:i:0 NH:i:1 +HWI-EAS269B:8:54:954:565 161 chr6 120882513 255 27M = 120882716 0 ACGTCATGGCTGACCAGGACAGAGGTG IIIIIIIIIIIIIII?IEG;II<II*8 NM:i:0 NH:i:1 +HWI-EAS269B:8:16:570:1775 145 chr7 26081268 255 27M = 26080313 0 GAAAGAGTGACACAAATCAATAGTAAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:13:280:189 161 chr7 29184615 255 27M = 29184814 0 CTTTTCCAACAGCGAGAAAAATGTACA IIIIIIIIIIIIIIIIIIIIIIIIDII NM:i:0 NH:i:1 +HWI-EAS269B:8:19:72:273 137 chr7 52258521 255 27M * 0 0 ATCAGAGGCACAGGGACAGGGTAAGGA &&2IIIII;E(IIIIIIIIIIIIIIII NM:i:1 NH:i:1 +HWI-EAS269B:8:40:235:869 161 chr7 52631974 255 27M = 52632179 0 GGCGCTGACTCCATCAGATATCCATTC IIIIIII7IIIIIII6IIIIIIIBIII NM:i:1 NH:i:1 +HWI-EAS269B:8:18:304:346 129 chr7 107789557 255 27M = 33158390 0 CACGTACTGTCACCTTGTAACATTTGG IIIIIIIIIIIIIIIIIIIIIIIIIIF NM:i:0 NH:i:1 +HWI-EAS269B:8:3:698:297 161 chr7 110976065 255 27M = 110976385 0 GCAGTTATCACTTTCTTGCCATGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:50:1269:965 145 chr7 118217605 255 27M = 118216239 0 ACCTGTAGATCCACATGATCATGAAGA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:38:1381:1429 83 chr7 136141344 255 27M = 136141280 0 ATCTGAAGTATCCCACATGTTGAGCTC <III@IIIIIIIII>IIIIIIIIIIII NM:i:1 NH:i:1 +HWI-EAS269:3:25:781:386 83 chr8 29844005 255 27M = 29843841 0 TAAGGGAGGAAAGTGTTTCAGAGTGTA ;83<159<<III@IIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:25:979:679 163 chr8 72664237 255 27M = 72664413 0 CTGATGGGAGCCCTGCGTGGTAAGAGG IIIIIIIIIII=III@I(II$27I>I4 NM:i:0 NH:i:1 +HWI-EAS269B:8:23:1069:816 145 chr8 74704433 255 27M = 74704222 0 TGTTCTCAGTTGTGGACAAGTGACAGC I<GII@IIIIIIII;IIIIIIIIII0I NM:i:0 NH:i:1 +HWI-EAS269B:8:34:435:1679 73 chr8 87366211 3 27M * 0 0 AAGCCTAGGGCTTCTCCTCTACACCCC I556I;FCAIIIFI<IIIIIIIIIIII NM:i:0 NH:i:2 +HWI-EAS269B:8:32:1486:294 97 chr8 124121215 255 27M = 124121411 0 TGTTACCATACGCCCTTCTGCTGAGGC IIIIIIIIIIIIIIIIIIDIIIIGIII NM:i:0 NH:i:1 +HWI-EAS269B:8:32:142:1471 81 chr8 124496752 3 27M = 124496536 0 GGATCTGCTTCATGAGTTGCCACATTG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 NH:i:2 +HWI-EAS269B:8:60:82:1806 97 chr8 125945215 255 27M = 125945423 0 AGATGCTGGCCATCCAGAGTAAGAACA IIIIIIIIIIIIIIIIIIIHIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:16:1162:495 81 chr8 125945584 255 27M = 125945381 0 GTTGCTCGCAGCTGGGGTGTGGGGCCA <CIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:1:165:1889 81 chr8 126016885 255 27M = 126016469 0 TGAGCAGGAAAACACTTTAAACCAGAT IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:66:939:1154 145 chr9 17537149 255 27M = 17536959 0 CTCCTTCAAGTACGCCTGGGTCTTAGA IDIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:68:1189:285 81 chr9 61249807 255 27M = 61242460 0 AGTCAAGCTCACTTGGCGGTGAAGGAT @.0;;A,?-F5I;7IIIIGIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:15:784:1754 163 chr9 74846937 255 27M = 74847108 0 GCACAGCACTGAGGAAAGGATCATCTC IIIIIIIIIIIIIIIIIIGII1CI7II NM:i:0 NH:i:1 +HWI-EAS269B:8:26:126:1382 145 chrM 3413 255 27M = 3190 0 GTTATTCTTTATAGCAGAGTACACTAA IIII8IB328I9IIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:90:320:1336 73 chrM 6326 255 27M * 0 0 ATGATCTCCAGCTATCCTATGAGCCTT IIIIIIIIA.ICI.4'(=,C>7-*&@8 NM:i:1 NH:i:1 +HWI-EAS269B:8:23:469:1215 161 chrM 7472 3 27M = 7676 0 ATTTCATCTGAAGACGTCCTCCACTCA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:2 +HWI-EAS269:3:73:36:783 161 chrM 7958 3 27M = 8152 0 CCCCAACAATAATAGGATTCCCAATCG IIIIIIIIIIIIIIIIIIIII09I>>I NM:i:0 NH:i:2 +HWI-EAS269:3:33:1528:954 99 chrM 8012 3 27M = 8186 0 TCCTATTCCCATCCTCAAAACGCCTAA IIIIIIIIIIIIIIIIIIIIIIIDII: NM:i:0 NH:i:2 +HWI-EAS269:3:2:192:1456 161 chrM 9071 3 27M = 9255 0 ACGAAACCACATAAATCAAGCCCTACT III;IEIIDI7III+III*?I@CH+5I NM:i:0 NH:i:2 +HWI-EAS269:3:63:192:470 97 chrM 14787 255 27M = 14982 0 GCAGATAAAATTCCATTTCACCCCTAC IIIIIIIIIIIIIIFIII=B5042:4E NM:i:0 NH:i:1 +HWI-EAS269B:8:49:528:63 83 chrM 14963 255 27M = 14829 0 TTTCCTATTTGCATACGCCATTCTACG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:98:856:1516 145 chrX 20257960 255 27M = 20256746 0 TACCCGGATTTAAGATGTACCCCATTG IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:28:1174:110 145 chrX 53916348 255 27M = 53916153 0 TGAATGTCAGCATCATTGACCCACAAA IIIIFIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:58:663:410 147 chrX 75006902 255 27M = 75006834 0 TCAGGTGGTTTACAGTGTTCTGACAAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 --- /dev/null +++ b/test-data/filter1_test3.sam @@ -0,0 +1,6 @@ +HWI-EAS269B:8:34:797:623 145 chr1 16632182 255 27M = 16631977 0 CCATTTCCTGTATGCTGTAAAGTACAA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:58:533:1198 81 chr1 88426079 255 27M = 88423429 0 GAAGAGGAAGAAGGTGGGGAGGAAGAG IIIG?IIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269:3:89:1776:1815 97 chr1 134085638 255 27M = 134085824 0 GAATGATTCTCTGGGTGTTACTTTGCA IIIIIIIIIIIIIIIDIII:IIII>F5 NM:i:0 NH:i:1 +HWI-EAS269B:8:74:1134:1670 161 chr1 138166886 255 27M = 138167084 0 TTACTAGTGTCTCTCTTACCATCATAT .IIIIIIIIIIIIIIIIIIIIIIIIII NM:i:1 NH:i:1 +HWI-EAS269:3:59:1321:1427 147 chr1 173149715 255 27M = 173149555 0 AAGGGCTAGGGTGACAGGCAGGGGACG -C<CID?IIIIIIIDIIIIIIIIIIII NM:i:0 NH:i:1 +HWI-EAS269B:8:8:164:1678 145 chr1 178660716 255 27M = 178660493 0 CAATTGGTGTTTTTCTTAAGAGACTCA IIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 NH:i:1

1 0

galaxy-dist commit 6d94416b2ccf: Automatically find new .sample files in tool-data/.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288984386 14400 # Node ID 6d94416b2ccf9205283ec197154d99522615cbf8 # Parent 1e8d7c6ad88b074b33b2029f3baa15c2eb929e70 Automatically find new .sample files in tool-data/. --- a/run.sh +++ b/run.sh @@ -5,47 +5,22 @@ cd `dirname $0` python ./scripts/check_python.py [ $? -ne 0 ] && exit 1 -FROM_SAMPLE=" - datatypes_conf.xml - reports_wsgi.ini - tool_conf.xml - tool_data_table_conf.xml - universe_wsgi.ini - tool-data/add_scores.loc - tool-data/alignseq.loc - tool-data/annotation_profiler_options.xml - tool-data/annotation_profiler_valid_builds.txt - tool-data/bfast_indexes.loc - tool-data/binned_scores.loc - tool-data/blastdb.loc - tool-data/blastdb_p.loc - tool-data/bowtie_indices.loc - tool-data/bowtie_indices_color.loc - tool-data/codingSnps.loc - tool-data/encode_datasets.loc - tool-data/funDo.loc - tool-data/lastz_seqs.loc - tool-data/liftOver.loc - tool-data/maf_index.loc - tool-data/maf_pairwise.loc - tool-data/microbial_data.loc - tool-data/phastOdds.loc - tool-data/perm_base_index.loc - tool-data/perm_color_index.loc - tool-data/quality_scores.loc - tool-data/regions.loc - tool-data/sam_fa_indices.loc - tool-data/sift_db.loc - tool-data/srma_index.loc - tool-data/twobit.loc - tool-data/shared/ucsc/builds.txt +SAMPLES=" + datatypes_conf.xml.sample + reports_wsgi.ini.sample + tool_conf.xml.sample + tool_data_table_conf.xml.sample + universe_wsgi.ini.sample + tool-data/shared/ucsc/builds.txt.sample + tool-data/*.sample " # Create any missing config/location files -for file in $FROM_SAMPLE; do - if [ ! -f "$file" -a -f "$file.sample" ]; then - echo "Initializing $file from `basename $file`.sample" - cp $file.sample $file +for sample in $SAMPLES; do + file=`echo $sample | sed -e 's/\.sample$//'` + if [ ! -f "$file" -a -f "$sample" ]; then + echo "Initializing $file from `basename $sample`" + cp $sample $file fi done

1 0

galaxy-dist commit 702f4717a8f3: Bugfix for the abstracted job runner
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288986871 14400 # Node ID 702f4717a8f3fe38758c54136f81935f7679c937 # Parent 6d94416b2ccf9205283ec197154d99522615cbf8 Bugfix for the abstracted job runner --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -26,7 +26,7 @@ class BaseJobRunner( object ): exec_dir = os.path.abspath( os.getcwd() ), tmp_dir = self.app.config.new_file_path, dataset_files_path = self.app.model.Dataset.file_path, - output_fnames = output_fnames, + output_fnames = job_wrapper.get_output_fnames(), set_extension = False, kwds = { 'overwrite' : False } ) return commands

1 0

galaxy-dist commit 32e5efb7a7d5: Remove the need for setup.sh - All configuration is done in run.sh/galaxy.config.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Nate Coraor <nate(a)bx.psu.edu> # Date 1288983302 14400 # Node ID 32e5efb7a7d5606a4dc1c8808fe4888ade2892e8 # Parent eb79ab327351e4789a89748269fc34428f791463 Remove the need for setup.sh - All configuration is done in run.sh/galaxy.config. --- a/run.sh +++ b/run.sh @@ -2,6 +2,53 @@ cd `dirname $0` +python ./scripts/check_python.py +[ $? -ne 0 ] && exit 1 + +FROM_SAMPLE=" + datatypes_conf.xml + reports_wsgi.ini + tool_conf.xml + tool_data_table_conf.xml + universe_wsgi.ini + tool-data/add_scores.loc + tool-data/alignseq.loc + tool-data/annotation_profiler_options.xml + tool-data/annotation_profiler_valid_builds.txt + tool-data/bfast_indexes.loc + tool-data/binned_scores.loc + tool-data/blastdb.loc + tool-data/blastdb_p.loc + tool-data/bowtie_indices.loc + tool-data/bowtie_indices_color.loc + tool-data/codingSnps.loc + tool-data/encode_datasets.loc + tool-data/funDo.loc + tool-data/lastz_seqs.loc + tool-data/liftOver.loc + tool-data/maf_index.loc + tool-data/maf_pairwise.loc + tool-data/microbial_data.loc + tool-data/phastOdds.loc + tool-data/perm_base_index.loc + tool-data/perm_color_index.loc + tool-data/quality_scores.loc + tool-data/regions.loc + tool-data/sam_fa_indices.loc + tool-data/sift_db.loc + tool-data/srma_index.loc + tool-data/twobit.loc + tool-data/shared/ucsc/builds.txt +" + +# Create any missing config/location files +for file in $FROM_SAMPLE; do + if [ ! -f "$file" -a -f "$file.sample" ]; then + echo "Initializing $file from `basename $file`.sample" + cp $file.sample $file + fi +done + # explicitly attempt to fetch eggs before running FETCH_EGGS=1 for arg in "$@"; do @@ -21,10 +68,4 @@ if [ $FETCH_EGGS -eq 1 ]; then fi fi -# Temporary: since builds.txt is now removed from source control, create it -# from the sample if necessary -if [ ! -f "tool-data/shared/ucsc/builds.txt" ]; then - cp tool-data/shared/ucsc/builds.txt.sample tool-data/shared/ucsc/builds.txt -fi - python ./scripts/paster.py serve universe_wsgi.ini $@ --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -94,14 +94,8 @@ class Configuration( object ): self.blog_url = kwargs.get( 'blog_url', None ) self.screencasts_url = kwargs.get( 'screencasts_url', None ) self.library_import_dir = kwargs.get( 'library_import_dir', None ) - if self.library_import_dir is not None and not os.path.exists( self.library_import_dir ): - raise ConfigurationError( "library_import_dir specified in config (%s) does not exist" % self.library_import_dir ) self.user_library_import_dir = kwargs.get( 'user_library_import_dir', None ) - if self.user_library_import_dir is not None and not os.path.exists( self.user_library_import_dir ): - raise ConfigurationError( "user_library_import_dir specified in config (%s) does not exist" % self.user_library_import_dir ) self.ftp_upload_dir = kwargs.get( 'ftp_upload_dir', None ) - if self.ftp_upload_dir is not None and not os.path.exists( self.ftp_upload_dir ): - os.makedirs( self.ftp_upload_dir ) self.ftp_upload_site = kwargs.get( 'ftp_upload_site', None ) self.allow_library_path_paste = kwargs.get( 'allow_library_path_paste', False ) self.disable_library_comptypes = kwargs.get( 'disable_library_comptypes', '' ).lower().split( ',' ) @@ -159,9 +153,16 @@ class Configuration( object ): return default def check( self ): # Check that required directories exist - for path in self.root, self.file_path, self.tool_path, self.tool_data_path, self.template_path, self.job_working_directory, self.cluster_files_directory: + for path in self.root, self.tool_path, self.tool_data_path, self.template_path: if not os.path.isdir( path ): raise ConfigurationError("Directory does not exist: %s" % path ) + # Create the directories that it makes sense to create + for path in self.file_path, self.new_file_path, self.job_working_directory, self.cluster_files_directory, self.template_cache, self.ftp_upload_dir, self.library_import_dir, self.user_library_import_dir, self.nginx_upload_store, './static/genetrack/plots', os.path.join( self.tool_data_path, 'shared', 'jars' ): + if path not in [ None, False ] and not os.path.isdir( path ): + try: + os.makedirs( path ) + except Exception, e: + raise ConfigurationError( "Unable to create missing directory: %s\n%s" % ( path, e ) ) # Check that required files exist for path in self.tool_config, self.datatypes_config: if not os.path.isfile(path):

1 0

galaxy-dist commit b1ec8342053f: Adding NGS simulation tool
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User Kelly Vincent <kpvincent(a)bx.psu.edu> # Date 1288806772 14400 # Node ID b1ec8342053f3cdcd0a081ff28f077a76bd188cc # Parent 9d68027b01096e2b32101234878d11946c03d08c Adding NGS simulation tool --- /dev/null +++ b/tool-data/ngs_sim_fasta.loc.sample @@ -0,0 +1,20 @@ +#This is a sample file distributed with Galaxy that enables the NGS simulation +#tool to use some FASTA files. You will need to make sure that these FASTA files +#are in place and then create the ngs_sim.loc file similar to this one (store it +#in this directory) that points to the locations of those files. The ngs_sim.loc +#file has this format (white space characters are TAB characters): +# +#<unique_build_id><dbkey><display_name><file_base_path> +# +#So, for example, if you had hg18chrM.fa in +#/data/path/hg18/seq/, +#then the ngs_sim.loc entry would look like this: +# +#hg18chrM hg18 hg18chrM /data/path/hg18/seq/hg18chrM.fa +# +#Your ngs_sim.loc file should include an entry per line for each FASTA file you +#have stored. +# +#hg18chrM hg18 hg18chrM /data/path/hg18/seq/hg18chrM.fa +#phiX174 phiX phiX174 /data/path/genome/phiX/seq/phiX.fa +#pUC18 pUC18 pUC18 /data/path/genome/pUC18/seq/pUC18.fa Binary file test-data/ngs_simulation_out3.png has changed --- /dev/null +++ b/tools/ngs_simulation/ngs_simulation.xml @@ -0,0 +1,217 @@ +<tool id="ngs_simulation" name="Simulate" version="1.0.0"> + + <description>Illumina runs</description> + <command interpreter="python"> + ngs_simulation.py + #if $in_type.input_type == "built-in" + --input="${ filter( lambda x: str( x[0] ) == str( $in_type.genome ), $__app__.tool_data_tables[ 'ngs_sim_fasta' ].get_fields() )[0][-1] }" + --genome=$genome + #else + --input=$in_type.input1 + #end if + --read_len=$read_len + --avg_coverage=$avg_coverage + --error_rate=$error_rate + --num_sims=$num_sims + --polymorphism=$polymorphism + --detection_thresh=$detection_thresh + --output_png=$output_png + --summary_out=$summary_out + --output_summary=$output_summary + --new_file_path=$__new_file_path__ + </command> + + <inputs> + <conditional name="in_type"> + <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?"> + <option value="built-in">Built-in</option> + <option value="history">History file</option> + </param> + <when value="built-in"> + <param name="genome" type="select" label="Select a built-in genome" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_data_table="ngs_sim_fasta" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="fasta" label="Input genome (FASTA format)" /> + </when> + </conditional> + <param name="read_len" type="integer" value="76" label="Read length" /> + <param name="avg_coverage" type="integer" value="200" label="Average coverage" /> + <param name="error_rate" type="float" value="0.001" label="Error rate or quality score" help="Quality score if integer 1 or greater; error rate if between 0 and 1" /> + <param name="num_sims" type="integer" value="100" label="The number of simulations to run" /> + <param name="polymorphism" type="select" multiple="true" label="Frequency/ies for minor allele"> + <option value="0.001">0.001</option> + <option value="0.002">0.002</option> + <option value="0.003">0.003</option> + <option value="0.004">0.004</option> + <option value="0.005">0.005</option> + <option value="0.006">0.006</option> + <option value="0.007">0.007</option> + <option value="0.008">0.008</option> + <option value="0.009">0.009</option> + <option value="0.01">0.01</option> + <option value="0.02">0.02</option> + <option value="0.03">0.03</option> + <option value="0.04">0.04</option> + <option value="0.05">0.05</option> + <option value="0.06">0.06</option> + <option value="0.07">0.07</option> + <option value="0.08">0.08</option> + <option value="0.09">0.09</option> + <option value="0.1">0.1</option> + <option value="0.2">0.2</option> + <option value="0.3">0.3</option> + <option value="0.4">0.4</option> + <option value="0.5">0.5</option> + <option value="0.6">0.6</option> + <option value="0.7">0.7</option> + <option value="0.8">0.8</option> + <option value="0.9">0.9</option> + <option value="1.0">1.0</option> + </param> + <param name="detection_thresh" type="select" multiple="true" label="Detection thresholds"> + <option value="0.001">0.001</option> + <option value="0.002">0.002</option> + <option value="0.003">0.003</option> + <option value="0.004">0.004</option> + <option value="0.005">0.005</option> + <option value="0.006">0.006</option> + <option value="0.007">0.007</option> + <option value="0.008">0.008</option> + <option value="0.009">0.009</option> + <option value="0.01">0.01</option> + <option value="0.02">0.02</option> + <option value="0.03">0.03</option> + <option value="0.04">0.04</option> + <option value="0.05">0.05</option> + <option value="0.06">0.06</option> + <option value="0.07">0.07</option> + <option value="0.08">0.08</option> + <option value="0.09">0.09</option> + <option value="0.1">0.1</option> + <option value="0.2">0.2</option> + <option value="0.3">0.3</option> + <option value="0.4">0.4</option> + <option value="0.5">0.5</option> + <option value="0.6">0.6</option> + <option value="0.7">0.7</option> + <option value="0.8">0.8</option> + <option value="0.9">0.9</option> + <option value="1.0">1.0</option> + </param> + <param name="summary_out" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Include a (text) summary file for all the simulations" /> + + </inputs> + <outputs> + <data format="png" name="output_png" /> + <data format="tabular" name="output_summary"> + <filter>summary_out == True</filter> + </data> + + </outputs> + <tests> +  +  + </tests> + <help> + +**What it does** + +This tool simulates an Illumina run and provides plots of false positives and false negatives. It allows for a range of simulation parameters to be set. Note that this simulation sets only one (randomly chosen) position in the genome as polymorphic, according to the value specified. Superimposed on this are "sequencing errors", which are uniformly (and randomly) distributed. Polymorphisms are assigned using the detection threshold, so if the detection threshold is set to the same as the minor allele frequency, the expected false negative rate is 50%. + +**Parameter list** + +These are the parameters that should be set for the simulation:: + + Read length (which is the same for all reads) + Average Coverage + Frequency for Minor Allele + Sequencing Error Rate + Detection Threshold + Number of Simulations + +You also should choose to use either a built-in genome or supply your own FASTA file. + +**Output** + +There are one or two. The first is a png that contains two different plots and is always generated. The second is optional and is a text file with some summary information about the simulations that were run. Below are some example outputs for a 10-simulation run on phiX with the default settings:: + + Read length 76 + Average coverage 200 + Error rate/quality score 0.001 + Number of simulations 100 + Frequencies for minor allele 0.002 + 0.004 + Detection thresholds 0.003 + 0.005 + 0.007 + Include summary file Yes + +Plot output (png): + +.. image:: ../static/images/ngs_simulation.png + +Summary output (txt):: + + FP FN GENOMESIZE.5386 fprate hetcol errcol + Min. : 71.0 Min. :0.0 Mode:logical Min. :0.01318 Min. :0.004 Min. :0.007 + 1st Qu.:86.0 1st Qu.:1.0 NA's:10 1st Qu.:0.01597 1st Qu.:0.004 1st Qu.:0.007 + Median :92.5 Median :1.0 NA Median :0.01717 Median :0.004 Median :0.007 + Mean :93.6 Mean :0.9 NA Mean :0.01738 Mean :0.004 Mean :0.007 + 3rd Qu.:100.8 3rd Qu.:1.0 NA 3rd Qu.:0.01871 3rd Qu.:0.004 3rd Qu.:0.007 + Max. :123.0 Max. :1.0 NA Max. :0.02284 Max. :0.004 Max. :0.007 + + False Positive Rate Summary + 0.003 0.005 0.007 + 0.001 0.17711 0.10854 0.01673 + 0.009 0.18049 0.10791 0.01738 + + False Negative Rate Summary + 0.003 0.005 0.007 + 0.001 1.0 0.8 1.0 + 0.009 0.4 0.7 0.9 + + + </help> +</tool> + + Binary file test-data/ngs_simulation_out1.png has changed --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -312,6 +312,9 @@ <tool file="genetrack/genetrack_indexer.xml" /><tool file="genetrack/genetrack_peak_prediction.xml" /></section> + <section name="NGS: Simulation" id="ngs-simulation"> + <tool file="ngs_simulation/ngs_simulation.xml" /> + </section><section name="SNP/WGA: Data; Filters" id="rgdat"><label text="Data: Import and upload" id="rgimport" /><tool file="data_source/upload.xml"/> --- /dev/null +++ b/test-data/ngs_simulation_in1.fasta @@ -0,0 +1,41 @@ +>gi|209210|gb|L09136.1|SYNPUC18CV pUC18c cloning vector (beta-galactosidase mRNA on complementary strand) +TCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCT +GTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCTGG +CTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCGGTGTGAAATACCGCACAGAT +GCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATC +GGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTA +ACGCCAGGGTTTTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGCCAAGCTTGCATGCCTGCAGGTCG +ACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCGTAATCATGGTCATAGCTGTTTCCTGTGTGAAATT +GTTATCCGCTCACAATTCCACACAACATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCTAATG +AGTGAGCTAACTCACATTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAG +CTGCATTAATGAATCGGCCAACGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGC +TCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACG +GTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAAC +CGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGAC +GCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCT +CGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTG +GCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTG +TGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGT +AAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGT +GCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTC +TGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAG +CGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATC +TTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAA +AAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTA +AACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCA +TCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTG +CTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAG +GGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCT +AGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCAC +GCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCAT +GTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTA +TCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGA +CTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTC +AATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGG +CGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGAT +CTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAA +GGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTAT +CAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGC +GCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAA +TAGGCGTATCACGAGGCCCTTTCGTC + --- a/tool_data_table_conf.xml.sample +++ b/tool_data_table_conf.xml.sample @@ -1,22 +1,27 @@ <tables> -  - <table name="indexed_maf_files"> - <columns>name, value, dbkey, species</columns> - <file path="tool-data/maf_index.loc" /> +  + <table name="bfast_indexes" comment_char="#"> + <columns>value, dbkey, formats, name, path</columns> + <file path="tool-data/bfast_indexes.loc" /> + </table> +  + <table name="bowtie_indexes"> + <columns>name, value</columns> + <file path="tool-data/bowtie_indices.loc" /></table><table name="bwa_indexes"><columns>name, value</columns><file path="tool-data/bwa_index.loc" /></table> -  - <table name="bowtie_indexes"> - <columns>name, value</columns> - <file path="tool-data/bowtie_indices.loc" /> +  + <table name="indexed_maf_files"> + <columns>name, value, dbkey, species</columns> + <file path="tool-data/maf_index.loc" /></table> -  - <table name="bfast_indexes" comment_char="#"> - <columns>value, dbkey, formats, name, path</columns> - <file path="tool-data/bfast_indexes.loc" /> +  + <table name="ngs_sim_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/ngs_sim_fasta.loc" /></table></tables> --- /dev/null +++ b/tools/ngs_simulation/ngs_simulation.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python + +""" +Runs Ben's simulation. + +usage: %prog [options] + -i, --input=i: Input genome (FASTA format) + -g, --genome=g: If built-in, the genome being used + -l, --read_len=l: Read length + -c, --avg_coverage=c: Average coverage + -e, --error_rate=e: Error rate (0-1) + -n, --num_sims=n: Number of simulations to run + -p, --polymorphism=p: Frequency/ies for minor allele (comma-separate list of 0-1) + -d, --detection_thresh=d: Detection thresholds (comma-separate list of 0-1) + -p, --output_png=p: Plot output + -s, --summary_out=s: Whether or not to output a file with summary of all simulations + -m, --output_summary=m: File name for output summary of all simulations + -f, --new_file_path=f: Directory for summary output files + +""" +# removed output of all simulation results on request (not working) +# -r, --sim_results=r: Output all tabular simulation results (number of polymorphisms times number of detection thresholds) +# -o, --output=o: Base name for summary output for each run + +from rpy import * +import os +import random, sys, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + options, args = doc_optparse.parse( __doc__ ) + # validate parameters + error = '' + try: + read_len = int( options.read_len ) + if read_len <= 0: + raise Exception, ' greater than 0' + except TypeError, e: + error = ': %s' % str( e ) + if error: + stop_err( 'Make sure your number of reads is an integer value%s' % error ) + error = '' + try: + avg_coverage = int( options.avg_coverage ) + if avg_coverage <= 0: + raise Exception, ' greater than 0' + except Exception, e: + error = ': %s' % str( e ) + if error: + stop_err( 'Make sure your average coverage is an integer value%s' % error ) + error = '' + try: + error_rate = float( options.error_rate ) + if error_rate >= 1.0: + error_rate = 10 ** ( -error_rate / 10.0 ) + elif error_rate < 0: + raise Exception, ' between 0 and 1' + except Exception, e: + error = ': %s' % str( e ) + if error: + stop_err( 'Make sure the error rate is a decimal value%s or the quality score is at least 1' % error ) + try: + num_sims = int( options.num_sims ) + except TypeError, e: + stop_err( 'Make sure the number of simulations is an integer value: %s' % str( e ) ) + if len( options.polymorphism ) > 0: + polymorphisms = [ float( p ) for p in options.polymorphism.split( ',' ) ] + else: + stop_err( 'Select at least one polymorphism value to use' ) + if len( options.detection_thresh ) > 0: + detection_threshes = [ float( dt ) for dt in options.detection_thresh.split( ',' ) ] + else: + stop_err( 'Select at least one detection threshold to use' ) + + # mutation dictionaries + hp_dict = { 'A':'G', 'G':'A', 'C':'T', 'T':'C', 'N':'N' } # heteroplasmy dictionary + mt_dict = { 'A':'C', 'C':'A', 'G':'T', 'T':'G', 'N':'N'} # misread dictionary + + # read fasta file to seq string + all_lines = open( options.input, 'rb' ).readlines() + seq = '' + for line in all_lines: + line = line.rstrip() + if line.startswith('>'): + pass + else: + seq += line.upper() + seq_len = len( seq ) + + # output file name template +# removed output of all simulation results on request (not working) +# if options.sim_results == "true": +# out_name_template = os.path.join( options.new_file_path, 'primary_output%s_' + options.output + '_visible_tabular' ) +# else: +# out_name_template = tempfile.NamedTemporaryFile().name + '_%s' + out_name_template = tempfile.NamedTemporaryFile().name + '_%s' + print 'out_name_template:', out_name_template + + # set up output files + outputs = {} + i = 1 + for p in polymorphisms: + outputs[ p ] = {} + for d in detection_threshes: + outputs[ p ][ d ] = out_name_template % i + i += 1 + + # run sims + for polymorphism in polymorphisms: + for detection_thresh in detection_threshes: + output = open( outputs[ polymorphism ][ detection_thresh ], 'wb' ) + output.write( 'FP\tFN\tGENOMESIZE=%s\n' % seq_len ) + sim_count = 0 + while sim_count < num_sims: + # randomly pick heteroplasmic base index + hbase = random.choice( range( 0, seq_len ) ) + #hbase = seq_len/2#random.randrange( 0, seq_len ) + # create 2D quasispecies list + qspec = map( lambda x: [], [0] * seq_len ) + # simulate read indices and assign to quasispecies + i = 0 + while i < ( avg_coverage * ( seq_len / read_len ) ): # number of reads (approximates coverage) + start = random.choice( range( 0, seq_len ) ) + #start = seq_len/2#random.randrange( 0, seq_len ) # assign read start + if random.random() < 0.5: # positive sense read + end = start + read_len # assign read end + if end > seq_len: # overshooting origin + read = range( start, seq_len ) + range( 0, ( end - seq_len ) ) + else: # regular read + read = range( start, end ) + else: # negative sense read + end = start - read_len # assign read end + if end < -1: # overshooting origin + read = range( start, -1, -1) + range( ( seq_len - 1 ), ( seq_len + end ), -1 ) + else: # regular read + read = range( start, end, -1 ) + # assign read to quasispecies list by index + for j in read: + if j == hbase and random.random() < polymorphism: # heteroplasmic base is variant with p = het + ref = hp_dict[ seq[ j ] ] + else: # ref is the verbatim reference nucleotide (all positions) + ref = seq[ j ] + if random.random() < error_rate: # base in read is misread with p = err + qspec[ j ].append( mt_dict[ ref ] ) + else: # otherwise we carry ref through to the end + qspec[ j ].append(ref) + # last but not least + i += 1 + bases, fpos, fneg = {}, 0, 0 # last two will be outputted to summary file later + for i, nuc in enumerate( seq ): + cov = len( qspec[ i ] ) + bases[ 'A' ] = qspec[ i ].count( 'A' ) + bases[ 'C' ] = qspec[ i ].count( 'C' ) + bases[ 'G' ] = qspec[ i ].count( 'G' ) + bases[ 'T' ] = qspec[ i ].count( 'T' ) + # calculate max NON-REF deviation + del bases[ nuc ] + maxdev = float( max( bases.values() ) ) / cov + # deal with non-het sites + if i != hbase: + if maxdev >= detection_thresh: # greater than detection threshold = false positive + fpos += 1 + # deal with het sites + if i == hbase: + hnuc = hp_dict[ nuc ] # let's recover het variant + if ( float( bases[ hnuc ] ) / cov ) < detection_thresh: # less than detection threshold = false negative + fneg += 1 + del bases[ hnuc ] # ignore het variant + maxdev = float( max( bases.values() ) ) / cov # check other non-ref bases at het site + if maxdev >= detection_thresh: # greater than detection threshold = false positive (possible) + fpos += 1 + # output error sums and genome size to summary file + output.write( '%d\t%d\n' % ( fpos, fneg ) ) + sim_count += 1 + # close output up + output.close() + + # Parameters (heteroplasmy, error threshold, colours) + r( ''' + het=c(%s) + err=c(%s) + grade = (0:32)/32 + hues = rev(gray(grade)) + ''' % ( ','.join( [ str( p ) for p in polymorphisms ] ), ','.join( [ str( d ) for d in detection_threshes ] ) ) ) + + # Suppress warnings + r( 'options(warn=-1)' ) + + # Create allsum (for FP) and allneg (for FN) objects + r( 'allsum <- data.frame()' ) + for polymorphism in polymorphisms: + for detection_thresh in detection_threshes: + output = outputs[ polymorphism ][ detection_thresh ] + cmd = ''' + ngsum = read.delim('%s', header=T) + ngsum$fprate <- ngsum$FP/%s + ngsum$hetcol <- %s + ngsum$errcol <- %s + allsum <- rbind(allsum, ngsum) + ''' % ( output, seq_len, polymorphism, detection_thresh ) + r( cmd ) + + if os.path.getsize( output ) == 0: + for p in outputs.keys(): + for d in outputs[ p ].keys(): + sys.stderr.write(outputs[ p ][ d ] + ' '+str( os.path.getsize( outputs[ p ][ d ] ) )+'\n') + + if options.summary_out == "true": + r( 'write.table(summary(ngsum), file="%s", quote=FALSE, sep="\t", row.names=FALSE)' % options.output_summary ) + + # Summary objects (these could be printed) + r( ''' + tr_pos <- tapply(allsum$fprate,list(allsum$hetcol,allsum$errcol), mean) + tr_neg <- tapply(allsum$FN,list(allsum$hetcol,allsum$errcol), mean) + cat('\nFalse Positive Rate Summary\n\t', file='%s', append=T, sep='\t') + write.table(format(tr_pos, digits=4), file='%s', append=T, quote=F, sep='\t') + cat('\nFalse Negative Rate Summary\n\t', file='%s', append=T, sep='\t') + write.table(format(tr_neg, digits=4), file='%s', append=T, quote=F, sep='\t') + ''' % tuple( [ options.output_summary ] * 4 ) ) + + # Setup graphs + #pdf(paste(prefix,'_jointgraph.pdf',sep=''), 15, 10) + r( ''' + png('%s', width=800, height=500, units='px', res=250) + layout(matrix(data=c(1,2,1,3,1,4), nrow=2, ncol=3), widths=c(4,6,2), heights=c(1,10,10)) + ''' % options.output_png ) + + # Main title + genome = '' + if options.genome: + genome = '%s: ' % options.genome + r( ''' + par(mar=c(0,0,0,0)) + plot(1, type='n', axes=F, xlab='', ylab='') + text(1,1,paste('%sVariation in False Positives and Negatives (', %s, ' simulations, coverage ', %s,')', sep=''), font=2, family='sans', cex=0.7) + ''' % ( genome, options.num_sims, options.avg_coverage ) ) + + # False positive boxplot + r( ''' + par(mar=c(5,4,2,2), las=1, cex=0.35) + boxplot(allsum$fprate ~ allsum$errcol, horizontal=T, ylim=rev(range(allsum$fprate)), cex.axis=0.85) + title(main='False Positives', xlab='false positive rate', ylab='') + ''' ) + + # False negative heatmap (note zlim command!) + num_polys = len( polymorphisms ) + num_dets = len( detection_threshes ) + r( ''' + par(mar=c(5,4,2,1), las=1, cex=0.35) + image(1:%s, 1:%s, tr_neg, zlim=c(0,1), col=hues, xlab='', ylab='', axes=F, border=1) + axis(1, at=1:%s, labels=rownames(tr_neg), lwd=1, cex.axis=0.85, axs='i') + axis(2, at=1:%s, labels=colnames(tr_neg), lwd=1, cex.axis=0.85) + title(main='False Negatives', xlab='minor allele frequency', ylab='detection threshold') + ''' % ( num_polys, num_dets, num_polys, num_dets ) ) + + # Scale alongside + r( ''' + par(mar=c(2,2,2,3), las=1) + image(1, grade, matrix(grade, ncol=length(grade), nrow=1), col=hues, xlab='', ylab='', xaxt='n', las=1, cex.axis=0.85) + title(main='Key', cex=0.35) + mtext('false negative rate', side=1, cex=0.35) + ''' ) + + # Close graphics + r( ''' + layout(1) + dev.off() + ''' ) + + # Tidy up +# r( 'rm(folder,prefix,sim,cov,het,err,grade,hues,i,j,ngsum)' ) + +if __name__ == "__main__" : __main__() --- /dev/null +++ b/test-data/ngs_simulation_out2.tabular @@ -0,0 +1,19 @@ + FP FN GENOMESIZE.2686 fprate hetcol errcol +Min. :0.00 Min. :0 Mode:logical Min. :0.000e+00 Min. :0.1 Min. :0.02 +1st Qu.:0.00 1st Qu.:0 NA's:25 1st Qu.:0.000e+00 1st Qu.:0.1 1st Qu.:0.02 +Median :0.00 Median :0 NA Median :0.000e+00 Median :0.1 Median :0.02 +Mean :0.04 Mean :0 NA Mean :1.489e-05 Mean :0.1 Mean :0.02 +3rd Qu.:0.00 3rd Qu.:0 NA 3rd Qu.:0.000e+00 3rd Qu.:0.1 3rd Qu.:0.02 +Max. :1.00 Max. :0 NA Max. :3.723e-04 Max. :0.1 Max. :0.02 + +False Positive Rate Summary + 0.01 0.02 +0.02 9.710e-03 4.468e-05 +0.04 9.680e-03 1.489e-05 +0.1 9.695e-03 1.489e-05 + +False Negative Rate Summary + 0.01 0.02 +0.02 0.16 0.52 +0.04 0.00 0.04 +0.1 0.00 0.00

1 0

galaxy-dist commit 9d68027b0109: Add options to Tophat wrapper for specifying own splice junctions.
by commits-noreply＠bitbucket.org 20 Nov '10

20 Nov '10

# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks(a)emory.edu> # Date 1288805582 14400 # Node ID 9d68027b01096e2b32101234878d11946c03d08c # Parent 49f0e8441a4da6b1ec03250448ab84854f07aa77 Add options to Tophat wrapper for specifying own splice junctions. --- a/tools/ngs_rna/tophat_wrapper.py +++ b/tools/ngs_rna/tophat_wrapper.py @@ -29,6 +29,24 @@ def __main__(): help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' ) parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' ) parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' ) + parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) + parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) + + # Options for supplying own junctions + parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \ + TopHat will use the exon records in this file to build \ + a set of known splice junctions for each gene, and will \ + attempt to align reads to these junctions even if they \ + would not normally be covered by the initial mapping.') + parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \ + specified one per line, in a tab-delimited format. Records \ + look like: <chrom><left><right><+/-> left and right are \ + zero-based coordinates, and specify the last character of the \ + left sequenced to be spliced to the first character of the right \ + sequence, inclusive.') + parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \ + supplied GFF file. (ignored without -G)") + # Types of search. parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.') parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)') parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' ) @@ -41,8 +59,6 @@ def __main__(): parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' ) parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' ) parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' ) - parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' ) - parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' ) # Wrapper options. parser.add_option( '-1', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) @@ -107,6 +123,15 @@ def __main__(): if float( options.junction_filter ) != 0.0: opts += ' -F %s' % options.junction_filter opts += ' -g %s' % options.max_multihits + # Custom junctions options. + if options.gene_model_annotations: + opts += ' -G %s' % options.gene_model_annotations + if options.raw_juncs: + opts += ' -j %s' % options.raw_juncs + if options.no_novel_juncs: + opts += ' --no-novel-juncs' + + # Search type options. if options.coverage_search: opts += ' --coverage-search --min-coverage-intron %s --max-coverage-intron %s' % ( options.min_coverage_intron, options.max_coverage_intron ) else: --- a/tools/ngs_rna/tophat_wrapper.xml +++ b/tools/ngs_rna/tophat_wrapper.xml @@ -45,6 +45,21 @@ --max-segment-intron $singlePaired.sParams.max_segment_intron --seg-mismatches=$singlePaired.sParams.seg_mismatches --seg-length=$singlePaired.sParams.seg_length + + ## Supplying junctions parameters. + #if $singlePaired.sParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why a string cast is necessary, but it is: + #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + #if $singlePaired.sParams.closure_search.use_search == "Yes": --closure-search --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon @@ -60,8 +75,8 @@ #else: --no-coverage-search #end if - ## No idea why the type conversion is necessary, but it seems to be. - #if str ($singlePaired.sParams.microexon_search) == "Yes": + ## TODO: No idea why the type conversion is necessary, but it seems to be. + #if str($singlePaired.sParams.microexon_search) == "Yes": --microexon-search #end if #end if @@ -81,6 +96,21 @@ --max-segment-intron $singlePaired.pParams.max_segment_intron --seg-mismatches=$singlePaired.pParams.seg_mismatches --seg-length=$singlePaired.pParams.seg_length + + ## Supplying junctions parameters. + #if $singlePaired.pParams.own_junctions.use_junctions == "Yes": + #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes": + -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model + #end if + #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes": + -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs + #end if + ## TODO: No idea why type cast is necessary, but it is: + #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes": + --no-novel-juncs + #end if + #end if + #if $singlePaired.pParams.closure_search.use_search == "Yes": --closure-search --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon @@ -96,7 +126,7 @@ #else: --no-coverage-search #end if - ## No idea why the type conversion is necessary, but it seems to be. + ## TODO: No idea why the type conversion is necessary, but it seems to be. #if str ($singlePaired.pParams.microexon_search) == "Yes": --microexon-search #end if @@ -146,6 +176,42 @@ <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /><param name="seg_mismatches" type="integer" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> + +  + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional> + <conditional name="closure_search"><param name="use_search" type="select" label="Use Closure Search"> @@ -201,6 +267,41 @@ <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /><param name="seg_mismatches" type="integer" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /><param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> +  + <conditional name="own_junctions"> + <param name="use_junctions" type="select" label="Use Own Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <conditional name="gene_model_ann"> + <param name="use_annotations" type="select" label="Use Gene Annotation Model"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/> + </when> + </conditional> + <conditional name="raw_juncs"> + <param name="use_juncs" type="select" label="Use Raw Junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" /> + <when value="Yes"> + <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/> + </when> + </conditional> + <param name="no_novel_juncs" type="select" label="Only look for supplied junctions"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + </when> + <when value="No" /> + </conditional> + <conditional name="closure_search"><param name="use_search" type="select" label="Use Closure Search"> @@ -385,8 +486,11 @@ This is a list of implemented Tophat opt -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the filter. The default is 0.15. - -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many + -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many alignments. The default is 40. + -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. + -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. + -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default. --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp) --no-coverage-search Disables the coverage based search for junctions.

1 0