12 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/1092b0d54bf6/ Changeset: 1092b0d54bf6 User: ross...@gmail.com Date: 2013-08-04 09:44:45 Summary: update bowtie2 wrapper Affected #: 3 files diff -r 8ca6cfea19091f0d44be3eec3b3d18794133a39a -r 1092b0d54bf690b9d148276bcb14baae80f429e2 tools/ngs_rna/tophat2_wrapper.xml --- a/tools/ngs_rna/tophat2_wrapper.xml +++ b/tools/ngs_rna/tophat2_wrapper.xml @@ -273,19 +273,19 @@ </stdio><outputs> - <data format="tabular" name="fusions" label="${tool.name} on ${on_string}: fusions" from_work_dir="tophat_out/fusions.out"> + <data format="tabular" name="fusions" label="${on_string}_tophat2fus.xls" from_work_dir="tophat_out/fusions.out"><filter>(params['settingsType'] == 'full' and params['fusion_search']['do_search'] == 'Yes')</filter></data> - <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed"> + <data format="bed" name="insertions" label="${on_string}_tophat2ins.bed" from_work_dir="tophat_out/insertions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed"> + <data format="bed" name="deletions" label="${on_string}_tophhat2del.bed" from_work_dir="tophat_out/deletions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"> + <data format="bed" name="junctions" label="${on_string}tophat2sj.bed" from_work_dir="tophat_out/junctions.bed"><expand macro="dbKeyActions" /></data> - <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"> + <data format="bam" name="accepted_hits" label="${on_string}tophat2hits.bam" from_work_dir="tophat_out/accepted_hits.bam"><expand macro="dbKeyActions" /></data></outputs> diff -r 8ca6cfea19091f0d44be3eec3b3d18794133a39a -r 1092b0d54bf690b9d148276bcb14baae80f429e2 tools/rgenetics/rgFastQC.py --- a/tools/rgenetics/rgFastQC.py +++ b/tools/rgenetics/rgFastQC.py @@ -27,6 +27,7 @@ from rgutils import getFileString import zipfile import gzip +import bz2 class FastQC(): """wrapper @@ -62,27 +63,43 @@ infname = self.opts.inputfilename linf = infname.lower() trimext = False + isgz = False + isbz2 = False + iszip = False # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf # patched may 29 2013 until this is fixed properly - if ( linf.endswith('.gz') or linf.endswith('.gzip') ): - f = gzip.open(self.opts.input) - try: + f = gzip.open(self.opts.input) + try: testrow = f.readline() - except: - trimext = True - f.close() - elif linf.endswith('bz2'): - f = bz2.open(self.opts.input,'rb') - try: - f.readline() - except: - trimext = True - f.close() - elif linf.endswith('.zip'): - if not zipfile.is_zipfile(self.opts.input): - trimext = True + if not (linf.endswith('.gz') or linf.endswith('.gzip')): + isgz = True + except: + if linf.endswith('.gz') or linf.endswith('.gzip'): + trimext = True + f.close() + f = bz2.BZ2File(self.opts.input) + try: + x = read(f,2) # will work if a real bz2 + if not linf.endswith('bz2'): + isbz2 = True + except: + if linf.endswith('bz2'): + trimext = True + f.close() + if zipfile.is_zipfile(self.opts.input): + if not linf.endswith('.zip'): + iszip = True + else: + if linf.endswith('.zip'): + trimext = True if trimext: infname = os.path.splitext(infname)[0] + elif isgz: + infname = '%s.gz' % infname + elif isbz2: + infname = '%s.bzip2' % infname + elif iszip: + infname = '%s.zip' % infname fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) link_name = os.path.join(self.opts.outputdir, fastqinfilename) os.symlink(self.opts.input, link_name) diff -r 8ca6cfea19091f0d44be3eec3b3d18794133a39a -r 1092b0d54bf690b9d148276bcb14baae80f429e2 tools/sr_mapping/bowtie2_wrapper.xml --- a/tools/sr_mapping/bowtie2_wrapper.xml +++ b/tools/sr_mapping/bowtie2_wrapper.xml @@ -113,6 +113,8 @@ </stdio><inputs> + <param name="jobtitle" type="text" value="bowtie2" label="Memorable short reminder of this job's importance for outputs" /> + <!-- single/paired --><conditional name="library"><param name="type" type="select" label="Is this library mate-paired?"> @@ -216,7 +218,7 @@ <!-- define outputs --><outputs> - <data format="fastqsanger" name="output_unaligned_reads_l" label="${tool.name} on ${on_string}: unaligned reads (L)" > + <data format="fastqsanger" name="output_unaligned_reads_l" label="${on_string}_{jobtitle}_unaligL.fastq" ><filter>unaligned_file is True</filter><actions><action type="format"> @@ -224,7 +226,7 @@ </action></actions></data> - <data format="fastqsanger" name="output_unaligned_reads_r" label="${tool.name} on ${on_string}: unaligned reads (R)"> + <data format="fastqsanger" name="output_unaligned_reads_r" label="${on_string}_{jobtitle}_unaligR.fastq)"><filter>library['type'] == "paired" and unaligned_file is True</filter><actions><action type="format"> @@ -232,7 +234,7 @@ </action></actions></data> - <data format="bam" name="output" label="${tool.name} on ${on_string}: aligned reads"> + <data format="bam" name="output" label="${tool.name} on ${on_string}_{jobtitle}_aligned.bam"><actions><conditional name="reference_genome.source"><when value="indexed"> https://bitbucket.org/galaxy/galaxy-central/commits/f0cdc6bebfaa/ Changeset: f0cdc6bebfaa User: ross...@gmail.com Date: 2013-08-04 09:48:20 Summary: more baker stuff Affected #: 191 files diff -r 1092b0d54bf690b9d148276bcb14baae80f429e2 -r f0cdc6bebfaa8757bb093d74508332515d5eec38 tools/rgedgeR/rgToolFactory.py --- /dev/null +++ b/tools/rgedgeR/rgToolFactory.py @@ -0,0 +1,605 @@ +# rgToolFactory.py +# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 +# +# all rights reserved +# Licensed under the LGPL +# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# july 2013 +# added ability to combine images and individual log files into html output +# just make sure there's a log file foo.log and it will be output +# together with all images named like "foo_*.pdf +# otherwise old format for html +# +# January 2013 +# problem pointed out by Carlos Borroto +# added escaping for <>$ - thought I did that ages ago... +# +# August 11 2012 +# changed to use shell=False and cl as a sequence + +# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye. +# It also serves as the wrapper for the new tool. +# +# you paste and run your script +# Only works for simple scripts that read one input from the history. +# Optionally can write one new history dataset, +# and optionally collect any number of outputs into links on an autogenerated HTML page. + +# DO NOT install on a public or important site - please. + +# installed generated tools are fine if the script is safe. +# They just run normally and their user cannot do anything unusually insecure +# but please, practice safe toolshed. +# Read the fucking code before you install any tool +# especially this one + +# After you get the script working on some test data, you can +# optionally generate a toolshed compatible gzip file +# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for +# safe and largely automated installation in a production Galaxy. + +# If you opt for an HTML output, you get all the script outputs arranged +# as a single Html history item - all output files are linked, thumbnails for all the pdfs. +# Ugly but really inexpensive. +# +# Patches appreciated please. +# +# +# long route to June 2012 product +# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them +# derived from an integrated script model +# called rgBaseScriptWrapper.py +# Note to the unwary: +# This tool allows arbitrary scripting on your Galaxy as the Galaxy user +# There is nothing stopping a malicious user doing whatever they choose +# Extremely dangerous!! +# Totally insecure. So, trusted users only +# +# preferred model is a developer using their throw away workstation instance - ie a private site. +# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool. +# + +import sys +import shutil +import subprocess +import os +import time +import tempfile +import optparse +import tarfile +import re +import shutil +import math + +progname = os.path.split(sys.argv[0])[1] +myversion = 'V000.2 June 2012' +verbose = False +debug = False +toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory' + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +html_escape_table = { + "&": "&", + ">": ">", + "<": "<", + "$": "\$" + } + +def html_escape(text): + """Produce entities within text.""" + return "".join(html_escape_table.get(c,c) for c in text) + +def cmd_exists(cmd): + return subprocess.call("type " + cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + + +class ScriptRunner: + """class is a wrapper for an arbitrary script + """ + + def __init__(self,opts=None,treatbashSpecial=True): + """ + cleanup inputs, setup some outputs + + """ + self.useGM = cmd_exists('gm') + self.useIM = cmd_exists('convert') + self.useGS = cmd_exists('gs') + self.treatbashSpecial = treatbashSpecial + if opts.output_dir: # simplify for the tool tarball + os.chdir(opts.output_dir) + self.thumbformat = 'png' + self.opts = opts + self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but.. + self.toolid = self.toolname + self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later + self.pyfile = self.myname # crude but efficient - the cruft won't hurt much + self.xmlfile = '%s.xml' % self.toolname + s = open(self.opts.script_path,'r').readlines() + s = [x.rstrip() for x in s] # remove pesky dos line endings if needed + self.script = '\n'.join(s) + fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter)) + tscript = open(self.sfile,'w') # use self.sfile as script source for Popen + tscript.write(self.script) + tscript.close() + self.indentedScript = '\n'.join([' %s' % x for x in s]) # for restructured text in help + self.escapedScript = '\n'.join([html_escape(x) for x in s]) + self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname) + if opts.output_dir: # may not want these complexities + self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname) + art = '%s.%s' % (self.toolname,opts.interpreter) + artpath = os.path.join(self.opts.output_dir,art) # need full path + artifact = open(artpath,'w') # use self.sfile as script source for Popen + artifact.write(self.script) + artifact.close() + self.cl = [] + self.html = [] + a = self.cl.append + a(opts.interpreter) + if self.treatbashSpecial and opts.interpreter in ['bash','sh']: + a(self.sfile) + else: + a('-') # stdin + a(opts.input_tab) + a(opts.output_tab) + self.outFormats = 'tabular' # TODO make this an option at tool generation time + self.inputFormats = 'tabular' # TODO make this an option at tool generation time + self.test1Input = '%s_test1_input.xls' % self.toolname + self.test1Output = '%s_test1_output.xls' % self.toolname + self.test1HTML = '%s_test1_output.html' % self.toolname + + def makeXML(self): + """ + Create a Galaxy xml tool wrapper for the new script as a string to write out + fixme - use templating or something less fugly than this example of what we produce + + <tool id="reverse" name="reverse" version="0.01"> + <description>a tabular file</description> + <command interpreter="python"> + reverse.py --script_path "$runMe" --interpreter "python" + --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file" + </command> + <inputs> + <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/> + + </inputs> + <outputs> + <data format="tabular" name="tab_file" label="${job_name}"/> + + </outputs> + <help> + +**What it Does** + +Reverse the columns in a tabular file + + </help> + <configfiles> + <configfile name="runMe"> + +# reverse order of columns in a tabular file +import sys +inp = sys.argv[1] +outp = sys.argv[2] +i = open(inp,'r') +o = open(outp,'w') +for row in i: + rs = row.rstrip().split('\t') + rs.reverse() + o.write('\t'.join(rs)) + o.write('\n') +i.close() +o.close() + + + </configfile> + </configfiles> + </tool> + + """ + newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s"> + %(tooldesc)s + %(command)s + <inputs> + %(inputs)s + </inputs> + <outputs> + %(outputs)s + </outputs> + <configfiles> + <configfile name="runMe"> + %(script)s + </configfile> + </configfiles> + %(tooltests)s + <help> + %(help)s + </help> + </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto + + newCommand="""<command interpreter="python"> + %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" + --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s + </command>""" # may NOT be an input or htmlout + tooltestsTabOnly = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="tab_file" file="%(test1Output)s" ftype="tabular"/> + </test></tests>""" + tooltestsHTMLOnly = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/> + </test></tests>""" + tooltestsBoth = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="tab_file" file="%(test1Output)s" ftype="tabular" /> + <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/> + </test></tests>""" + xdict = {} + xdict['tool_version'] = self.opts.tool_version + xdict['test1Input'] = self.test1Input + xdict['test1HTML'] = self.test1HTML + xdict['test1Output'] = self.test1Output + if self.opts.make_HTML and self.opts.output_tab <> 'None': + xdict['tooltests'] = tooltestsBoth % xdict + elif self.opts.make_HTML: + xdict['tooltests'] = tooltestsHTMLOnly % xdict + else: + xdict['tooltests'] = tooltestsTabOnly % xdict + xdict['script'] = self.escapedScript + # configfile is least painful way to embed script to avoid external dependencies + # but requires escaping of <, > and $ to avoid Mako parsing + if self.opts.help_text: + xdict['help'] = open(self.opts.help_text,'r').read() + else: + xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation' + coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::'] + coda.append(self.indentedScript) + coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow())) + coda.append('See %s for details of that project' % (toolFactoryURL)) + coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ') + coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573') + xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda)) + if self.opts.tool_desc: + xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc + else: + xdict['tooldesc'] = '' + xdict['command_outputs'] = '' + xdict['outputs'] = '' + if self.opts.input_tab <> 'None': + xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something + xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats + else: + xdict['command_inputs'] = '' # assume no input - eg a random data generator + xdict['inputs'] = '' + xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname + xdict['toolname'] = self.toolname + xdict['toolid'] = self.toolid + xdict['interpreter'] = self.opts.interpreter + xdict['scriptname'] = self.sfile + if self.opts.make_HTML: + xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" ' + xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n' + if self.opts.output_tab <> 'None': + xdict['command_outputs'] += ' --output_tab "$tab_file"' + xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats + xdict['command'] = newCommand % xdict + xmls = newXML % xdict + xf = open(self.xmlfile,'w') + xf.write(xmls) + xf.write('\n') + xf.close() + # ready for the tarball + + + def makeTooltar(self): + """ + a tool is a gz tarball with eg + /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ... + """ + retval = self.run() + if retval: + print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry' + sys.exit(1) + self.makeXML() + tdir = self.toolname + os.mkdir(tdir) + if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe.. + testdir = os.path.join(tdir,'test-data') + os.mkdir(testdir) # make tests directory + shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input)) + if self.opts.output_tab <> 'None': + shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output)) + if self.opts.make_HTML: + shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML)) + if self.opts.output_dir: + shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log')) + op = '%s.py' % self.toolname # new name + outpiname = os.path.join(tdir,op) # path for the tool tarball + pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM) + notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),] + notes.append('# to make a new Galaxy tool called %s\n' % self.toolname) + notes.append('# User %s at %s\n' % (self.opts.user_email,timenow())) + pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm + notes += pi + outpi = open(outpiname,'w') + outpi.write(''.join(notes)) + outpi.write('\n') + outpi.close() + stname = os.path.join(tdir,self.sfile) + if not os.path.exists(stname): + shutil.copyfile(self.sfile, stname) + xtname = os.path.join(tdir,self.xmlfile) + if not os.path.exists(xtname): + shutil.copyfile(self.xmlfile,xtname) + tarpath = "%s.gz" % self.toolname + tar = tarfile.open(tarpath, "w:gz") + tar.add(tdir,arcname=self.toolname) + tar.close() + shutil.copyfile(tarpath,self.opts.new_tool) + shutil.rmtree(tdir) + ## TODO: replace with optional direct upload to local toolshed? + return retval + + + def compressPDF(self,inpdf=None,thumbformat='png'): + """need absolute path to pdf + """ + assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) + hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname) + sto = open(hlog,'w') + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] + x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval1 = x.wait() + if retval1 == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) + if self.useGM: + cl2 = ['gm convert', inpdf, outpng] + else: # assume imagemagick + cl2 = ['convert', inpdf, outpng] + x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval2 = x.wait() + sto.close() + retval = retval1 or retval2 + return retval + + + def getfSize(self,fpath,outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath,fpath) + if os.path.isfile(fp): + size = '0 B' + n = float(os.path.getsize(fp)) + if n > 2**20: + size = '%1.1f MB' % (n/2**20) + elif n > 2**10: + size = '%1.1f KB' % (n/2**10) + elif n > 0: + size = '%d B' % (int(n)) + return size + + def makeHtml(self): + """ Create an HTML file content to list all the artifacts found in the output_dir + """ + + galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> + <title></title> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + </head> + <body> + <div class="toolFormBody"> + """ + galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" + galhtmlpostfix = """</div></body></html>\n""" + + flist = os.listdir(self.opts.output_dir) + flist = [x for x in flist if x <> 'Rplots.pdf'] + flist.sort() + html = [] + html.append(galhtmlprefix % progname) + html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow())) + fhtml = [] + if len(flist) > 0: + logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections + logfiles.sort() + logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)] + logfiles.append(os.path.abspath(self.tlog)) # make it the last one + pdflist = [] + npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) + for rownum,fname in enumerate(flist): + dname,e = os.path.splitext(fname) + sfsize = self.getfSize(fname,self.opts.output_dir) + if e.lower() == '.pdf' : # compress and make a thumbnail + thumb = '%s.%s' % (dname,self.thumbformat) + pdff = os.path.join(self.opts.output_dir,fname) + retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) + if retval == 0: + pdflist.append((fname,thumb)) + if (rownum+1) % 2 == 0: + fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + else: + fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + for logfname in logfiles: # expect at least tlog - if more + if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later + sectionname = 'All tool run' + if (len(logfiles) > 1): + sectionname = 'Other' + ourpdfs = pdflist + else: + realname = os.path.basename(logfname) + sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log + ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] + pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove + nacross = 1 + npdf = len(ourpdfs) + + if npdf > 0: + nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) + if int(nacross)**2 != npdf: + nacross += 1 + nacross = int(nacross) + width = min(400,int(1200/nacross)) + html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) + html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') + ntogo = nacross # counter for table row padding with empty cells + html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') + for i,paths in enumerate(ourpdfs): + fname,thumb = paths + s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" + alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) + if ((i+1) % nacross == 0): + s += '</tr>\n' + ntogo = 0 + if i < (npdf - 1): # more to come + s += '<tr>' + ntogo = nacross + else: + ntogo -= 1 + html.append(s) + if html[-1].strip().endswith('</tr>'): + html.append('</table></div>\n') + else: + if ntogo > 0: # pad + html.append('<td> </td>'*ntogo) + html.append('</tr></table></div>\n') + logt = open(logfname,'r').readlines() + logtext = [x for x in logt if x.strip() > ''] + html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) + if len(logtext) > 1: + html.append('\n<pre>\n') + html += logtext + html.append('\n</pre>\n') + else: + html.append('%s is empty<br/>' % logfname) + if len(fhtml) > 0: + fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') + fhtml.append('</table></div><br/>') + html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') + html += fhtml # add all non-pdf files to the end of the display + else: + html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) + html.append(galhtmlpostfix) + htmlf = file(self.opts.output_html,'w') + htmlf.write('\n'.join(html)) + htmlf.write('\n') + htmlf.close() + self.html = html + + + def run(self): + """ + scripts must be small enough not to fill the pipe! + """ + if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: + retval = self.runBash() + else: + if self.opts.output_dir: + ste = open(self.elog,'w') + sto = open(self.tlog,'w') + sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) + p.stdin.write(self.script) + p.stdin.close() + retval = p.wait() + if self.opts.output_dir: + sto.close() + ste.close() + err = open(self.elog,'r').readlines() + if retval <> 0 and err: # problem + print >> sys.stderr,err + if self.opts.make_HTML: + self.makeHtml() + return retval + + def runBash(self): + """ + cannot use - for bash so use self.sfile + """ + if self.opts.output_dir: + s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) + sto = open(self.tlog,'w') + sto.write(s) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False) + retval = p.wait() + if self.opts.output_dir: + sto.close() + if self.opts.make_HTML: + self.makeHtml() + return retval + + +def main(): + u = """ + This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: + <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" + </command> + """ + op = optparse.OptionParser() + a = op.add_option + a('--script_path',default=None) + a('--tool_name',default=None) + a('--interpreter',default=None) + a('--output_dir',default=None) + a('--output_html',default=None) + a('--input_tab',default="None") + a('--output_tab',default="None") + a('--user_email',default='Unknown') + a('--bad_user',default=None) + a('--make_Tool',default=None) + a('--make_HTML',default=None) + a('--help_text',default=None) + a('--tool_desc',default=None) + a('--new_tool',default=None) + a('--tool_version',default=None) + opts, args = op.parse_args() + assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) + assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq' + assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript' + assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' + if opts.output_dir: + try: + os.makedirs(opts.output_dir) + except: + pass + r = ScriptRunner(opts) + if opts.make_Tool: + retcode = r.makeTooltar() + else: + retcode = r.run() + os.unlink(r.sfile) + if retcode: + sys.exit(retcode) # indicate failure to job runner + + +if __name__ == "__main__": + main() + + diff -r 1092b0d54bf690b9d148276bcb14baae80f429e2 -r f0cdc6bebfaa8757bb093d74508332515d5eec38 tools/rgedgeR/rgedgeRpaired.xml --- /dev/null +++ b/tools/rgedgeR/rgedgeRpaired.xml @@ -0,0 +1,1079 @@ +<tool id="rgDifferentialCount" name="Differential_Count" version="0.20"> + <description>models using BioConductor packages</description> + <requirements> + <requirement type="package" version="2.12">biocbasics</requirement> + <requirement type="package" version="3.0.1">r3</requirement> + <requirement type="package" version="1.3.18">graphicsmagick</requirement> + <requirement type="package" version="9.07">ghostscript</requirement> + </requirements> + + <command interpreter="python"> + rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" + --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" + </command> + <inputs> + <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample" + help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/> + <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" + help="Supply a meaningful name here to remind you what the outputs contain"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/> + <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" + multiple="true" use_header_names="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column."/> + </param> + <param name="control_name" type="text" value="Control" size="50" label="Control Name"/> + <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" + multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true"> + </param> + <param name="subjectids" type="text" optional="true" size="120" value = "" + label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input" + help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="," /></valid> + </sanitizer> + </param> + <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs" + help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/> + <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1" + label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples" + help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/> + + <conditional name="edgeR"> + <param name="doedgeR" type="select" + label="Run this model using edgeR" + help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates"> + <option value="F">Do not run edgeR</option> + <option value="T" selected="true">Run edgeR</option> + </param> + <when value="T"> + <param name="edgeR_priordf" type="integer" value="20" size="3" + label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df" + help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/> + </when> + <when value="F"></when> + </conditional> + <conditional name="DESeq2"> + <param name="doDESeq2" type="select" + label="Run the same model with DESeq2 and compare findings" + help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR"> + <option value="F" selected="true">Do not run DESeq2</option> + <option value="T">Run DESeq2</option> + </param> + <when value="T"> + <param name="DESeq_fitType" type="select"> + <option value="parametric" selected="true">Parametric (default) fit for dispersions</option> + <option value="local">Local fit - this will automagically be used if parametric fit fails</option> + <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option> + </param> + </when> + <when value="F"></when> + </conditional> + <param name="doVoom" type="select" + label="Run the same model with Voom/limma and compare findings" + help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma"> + <option value="F" selected="true">Do not run VOOM</option> + <option value="T">Run VOOM</option> + </param> + <conditional name="camera"> + <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets" + help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history"> + <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option> + <option value="T">Run GSEA tests with the Camera algorithm</option> + </param> + <when value="T"> + <conditional name="gmtSource"> + <param name="refgmtSource" type="select" + label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set"> + <option value="indexed" selected="true">Use a built-in gene set</option> + <option value="history">Use a gene set from my history</option> + <option value="both">Add a gene set from my history to a built in gene set</option> + </param> + <when value="indexed"> + <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis"> + <options from_data_table="gseaGMT_3.1"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No GMT v3.1 files are available - please install them"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" /> + </when> + <when value="both"> + <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" /> + <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis"> + <options from_data_table="gseaGMT_4"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/> + </options> + </param> + </when> + </conditional> + </when> + <when value="F"> + </when> + </conditional> + <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control" + help="Conventional default value of 0.05 recommended"/> + <param name="fdrtype" type="select" label="FDR (Type II error) control method" + help="Use fdr or bh typically to control for the number of tests in a reliable way"> + <option value="fdr" selected="true">fdr</option> + <option value="BH">Benjamini Hochberg</option> + <option value="BY">Benjamini Yukateli</option> + <option value="bonferroni">Bonferroni</option> + <option value="hochberg">Hochberg</option> + <option value="holm">Holm</option> + <option value="hommel">Hommel</option> + <option value="none">no control for multiple tests</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls"> + <filter>edgeR['doedgeR'] == "T"</filter> + </data> + <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls"> + <filter>DESeq2['doDESeq2'] == "T"</filter> + </data> + <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls"> + <filter>doVoom == "T"</filter> + </data> + <data format="html" name="html_file" label="${title}.html"/> + </outputs> + <stdio> + <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" /> + </stdio> + <tests> +<test> +<param name='input1' value='test_bams2mx.xls' ftype='tabular' /> + <param name='treatment_name' value='case' /> + <param name='title' value='edgeRtest' /> + <param name='useNDF' value='' /> + <param name='doedgeR' value='T' /> + <param name='doVoom' value='T' /> + <param name='doDESeq2' value='T' /> + <param name='fdrtype' value='fdr' /> + <param name='edgeR_priordf' value="8" /> + <param name='fdrthresh' value="0.05" /> + <param name='control_name' value='control' /> + <param name='subjectids' value='' /> + <param name='Treat_cols' value='3,4,5,9' /> + <param name='Control_cols' value='2,6,7,8' /> + <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' /> + <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' /> +</test> +</tests> + +<configfiles> +<configfile name="runme"> +<![CDATA[ +# +# edgeR.Rscript +# updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross +# Performs DGE on a count table containing n replicates of two conditions +# +# Parameters +# +# 1 - Output Dir + +# Original edgeR code by: S.Lunke and A.Kaspi +reallybig = log10(.Machine\$double.xmax) +reallysmall = log10(.Machine\$double.xmin) +library('stringr') +library('gplots') +library('edgeR') +hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') +{ +# Perform clustering for significant pvalues after controlling FWER + samples = colnames(cmat) + gu = unique(group) + gn = rownames(cmat) + if (length(gu) == 2) { + col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"} + pcols = unlist(lapply(group,col.map)) + } else { + colours = rainbow(length(gu),start=0,end=4/6) + pcols = colours[match(group,gu)] } + dm = cmat[(! is.na(gn)),] + # remove unlabelled hm rows + nprobes = nrow(dm) + # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance') + if (nprobes > nsamp) { + dm =dm[1:nsamp,] + #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total') + } + newcolnames = substr(colnames(dm),1,20) + colnames(dm) = newcolnames + pdf(outpdfname) + heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none', + Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5) + dev.off() +} + +hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here") +{ + # for 2 groups only was + #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"} + #pcols = unlist(lapply(group,col.map)) + gu = unique(group) + colours = rainbow(length(gu),start=0.3,end=0.6) + pcols = colours[match(group,gu)] + nrows = nrow(cmat) + mtitle = paste(myTitle,'Heatmap: n contigs =',nrows) + if (nrows > nsamp) { + cmat = cmat[c(1:nsamp),] + mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='') + } + newcolnames = substr(colnames(cmat),1,20) + colnames(cmat) = newcolnames + pdf(outpdfname) + heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols) + dev.off() +} + +qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...) +# stolen from https://gist.github.com/703512 +{ + o = -log10(sort(pvector,decreasing=F)) + e = -log10( 1:length(o)/length(o) ) + o[o==-Inf] = reallysmall + o[o==Inf] = reallybig + maint = descr + pdf(outpdf) + plot(e,o,pch=19,cex=1, main=maint, ..., + xlab=expression(Expected~~-log[10](italic(p))), + ylab=expression(Observed~~-log[10](italic(p))), + xlim=c(0,max(e)), ylim=c(0,max(o))) + lines(e,e,col="red") + grid(col = "lightgray", lty = "dotted") + dev.off() +} + +smearPlot = function(DGEList,deTags, outSmear, outMain) + { + pdf(outSmear) + plotSmear(DGEList,de.tags=deTags,main=outMain) + grid(col="lightgray", lty="dotted") + dev.off() + } + +boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) +{ # + nc = ncol(rawrs) + for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA} + fullnames = colnames(rawrs) + newcolnames = substr(colnames(rawrs),1,20) + colnames(rawrs) = newcolnames + newcolnames = substr(colnames(cleanrs),1,20) + colnames(cleanrs) = newcolnames + defpar = par(no.readonly=T) + print.noquote('raw contig counts by sample:') + print.noquote(summary(rawrs)) + print.noquote('normalised contig counts by sample:') + print.noquote(summary(cleanrs)) + pdf(pdfname) + par(mfrow=c(1,2)) + boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint)) + grid(col="lightgray",lty="dotted") + boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint)) + grid(col="lightgray",lty="dotted") + dev.off() + pdfname = "sample_counts_histogram.pdf" + nc = ncol(rawrs) + print.noquote(paste('Using ncol rawrs=',nc)) + ncroot = round(sqrt(nc)) + if (ncroot*ncroot < nc) { ncroot = ncroot + 1 } + m = c() + for (i in c(1:nc)) { + rhist = hist(rawrs[,i],breaks=100,plot=F) + m = append(m,max(rhist\$counts)) + } + ymax = max(m) + ncols = length(fullnames) + if (ncols > 20) + { + scale = 7*ncols/20 + pdf(pdfname,width=scale,height=scale) + } else { + pdf(pdfname) + } + par(mfrow=c(ncroot,ncroot)) + for (i in c(1:nc)) { + hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", + breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax)) + } + dev.off() + par(defpar) + +} + +cumPlot = function(rawrs,cleanrs,maint,myTitle) +{ # updated to use ecdf + pdfname = "Filtering_rowsum_bar_charts.pdf" + defpar = par(no.readonly=T) + lrs = log(rawrs,10) + lim = max(lrs) + pdf(pdfname) + par(mfrow=c(2,1)) + hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)", + ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1) + grid(col="lightgray", lty="dotted") + lrs = log(cleanrs,10) + hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)", + ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1) + grid(col="lightgray", lty="dotted") + dev.off() + par(defpar) +} + +cumPlot1 = function(rawrs,cleanrs,maint,myTitle) +{ # updated to use ecdf + pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_') + pdf(pdfname) + par(mfrow=c(2,1)) + lastx = max(rawrs) + rawe = knots(ecdf(rawrs)) + cleane = knots(ecdf(cleanrs)) + cy = 1:length(cleane)/length(cleane) + ry = 1:length(rawe)/length(rawe) + plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads", + ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) + grid(col="blue") + plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads", + ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) + grid(col="blue") + dev.off() +} + + + +doGSEAold = function(y=NULL,design=NULL,histgmt="", + bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") +{ + sink('Camera.log') + genesets = c() + if (bigmt > "") + { + bigenesets = readLines(bigmt) + genesets = bigenesets + } + if (histgmt > "") + { + hgenesets = readLines(histgmt) + if (bigmt > "") { + genesets = rbind(genesets,hgenesets) + } else { + genesets = hgenesets + } # use only history if no bi + } + print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) + genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n + outf = outfname + head=paste(myTitle,'edgeR GSEA') + write(head,file=outfname,append=F) + ntest=length(genesets) + urownames = toupper(rownames(y)) + upcam = c() + downcam = c() + for (i in 1:ntest) { + gs = unlist(genesets[i]) + g = gs[1] # geneset_id + u = gs[2] + if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") } + glist = gs[3:length(gs)] # member gene symbols + glist = toupper(glist) + inglist = urownames %in% glist + nin = sum(inglist) + if ((nin > minnin) && (nin < maxnin)) { + ### print(paste('@@found',sum(inglist),'genes in glist')) + camres = camera(y=y,index=inglist,design=design) + if (! is.null(camres)) { + rownames(camres) = g # gene set name + camres = cbind(GeneSet=g,URL=u,camres) + if (camres\$Direction == "Up") + { + upcam = rbind(upcam,camres) } else { + downcam = rbind(downcam,camres) + } + } + } + } + uscam = upcam[order(upcam\$PValue),] + unadjp = uscam\$PValue + uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + nup = max(10,sum((uscam\$adjPValue < fdrthresh))) + dscam = downcam[order(downcam\$PValue),] + unadjp = dscam\$PValue + dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) + write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) + write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) + write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) + write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) + sink() +} + + + + +doGSEA = function(y=NULL,design=NULL,histgmt="", + bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") +{ + sink('Camera.log') + genesets = c() + if (bigmt > "") + { + bigenesets = readLines(bigmt) + genesets = bigenesets + } + if (histgmt > "") + { + hgenesets = readLines(histgmt) + if (bigmt > "") { + genesets = rbind(genesets,hgenesets) + } else { + genesets = hgenesets + } # use only history if no bi + } + print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) + genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n + outf = outfname + head=paste(myTitle,'edgeR GSEA') + write(head,file=outfname,append=F) + ntest=length(genesets) + urownames = toupper(rownames(y)) + upcam = c() + downcam = c() + incam = c() + urls = c() + gsids = c() + for (i in 1:ntest) { + gs = unlist(genesets[i]) + gsid = gs[1] # geneset_id + url = gs[2] + if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") } + glist = gs[3:length(gs)] # member gene symbols + glist = toupper(glist) + inglist = urownames %in% glist + nin = sum(inglist) + if ((nin > minnin) && (nin < maxnin)) { + incam = c(incam,inglist) + gsids = c(gsids,gsid) + urls = c(urls,url) + } + } + incam = as.list(incam) + names(incam) = gsids + allcam = camera(y=y,index=incam,design=design) + allcamres = cbind(geneset=gsids,allcam,URL=urls) + for (i in 1:ntest) { + camres = allcamres[i] + res = try(test = (camres\$Direction == "Up")) + if ("try-error" %in% class(res)) { + cat("test failed, camres = :") + print.noquote(camres) + } else { if (camres\$Direction == "Up") + { upcam = rbind(upcam,camres) + } else { downcam = rbind(downcam,camres) + } + + } + } + uscam = upcam[order(upcam\$PValue),] + unadjp = uscam\$PValue + uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + nup = max(10,sum((uscam\$adjPValue < fdrthresh))) + dscam = downcam[order(downcam\$PValue),] + unadjp = dscam\$PValue + dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) + write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) + write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) + write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) + write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) + sink() + } + + +edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, + fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, + filterquantile=0.2, subjects=c(),mydesign=NULL, + doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', + histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + doCook=F,DESeq_fitType="parameteric") +{ + # Error handling + if (length(unique(group))!=2){ + print("Number of conditions identified in experiment does not equal 2") + q() + } + require(edgeR) + options(width = 512) + mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") + allN = nrow(Count_Matrix) + nscut = round(ncol(Count_Matrix)/2) + colTotmillionreads = colSums(Count_Matrix)/1e6 + counts.dataframe = as.data.frame(c()) + rawrs = rowSums(Count_Matrix) + nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes + nzN = nrow(nonzerod) + nzrs = rowSums(nonzerod) + zN = allN - nzN + print('# Quantiles for non-zero row counts:',quote=F) + print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F) + if (useNDF == T) + { + gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut + lo = colSums(Count_Matrix[!gt1rpin3,]) + workCM = Count_Matrix[gt1rpin3,] + cleanrs = rowSums(workCM) + cleanN = length(cleanrs) + meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="") + print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F) + maint = paste('Filter >=1/million reads in >=',nscut,'samples') + } else { + useme = (nzrs > quantile(nzrs,filterquantile)) + workCM = nonzerod[useme,] + lo = colSums(nonzerod[!useme,]) + cleanrs = rowSums(workCM) + cleanN = length(cleanrs) + meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="") + print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) + maint = paste('Filter below',filterquantile,'quantile') + } + cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) + allgenes = rownames(workCM) + reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" + genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" + ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') + testreg = str_match(allgenes,reg) + if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string + { + print("@@ using ucsc substitution for urls") + contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") + } else { + print("@@ using genecards substitution for urls") + contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") + } + print.noquote("# urls") + print.noquote(head(contigurls)) + print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) + cmrowsums = rowSums(workCM) + TName=unique(group)[1] + CName=unique(group)[2] + if (is.null(mydesign)) { + if (length(subjects) == 0) + { + mydesign = model.matrix(~group) + } + else { + subjf = factor(subjects) + mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it + } + } + print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) + print.noquote('Using design matrix:') + print.noquote(mydesign) + if (doedgeR) { + sink('edgeR.log') + #### Setup DGEList object + DGEList = DGEList(counts=workCM, group = group) + DGEList = calcNormFactors(DGEList) + + DGEList = estimateGLMCommonDisp(DGEList,mydesign) + comdisp = DGEList\$common.dispersion + DGEList = estimateGLMTrendedDisp(DGEList,mydesign) + if (edgeR_priordf > 0) { + print.noquote(paste("prior.df =",edgeR_priordf)) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) + } else { + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + } + DGLM = glmFit(DGEList,design=mydesign) + DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed + efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors + normData = (1e+06*DGEList\$counts/efflib) + uoutput = cbind( + Name=as.character(rownames(DGEList\$counts)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData, + DGEList\$counts + ) + soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable + goodness = gof(DGLM, pcutoff=fdrthresh) + if (sum(goodness\$outlier) > 0) { + print.noquote('GLM outliers:') + print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) + } else { + print('No GLM fit outlier genes found\n') + } + z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) + pdf("edgeR_GoodnessofFit.pdf") + qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") + abline(0,1,lwd=3) + points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") + dev.off() + estpriorn = getPriorN(DGEList) + print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) + efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors + normData = (1e+06*DGEList\$counts/efflib) + uniqueg = unique(group) + #### Plot MDS + sample_colors = match(group,levels(group)) + sampleTypes = levels(factor(group)) + print.noquote(sampleTypes) + pdf("edgeR_MDSplot.pdf") + plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) + legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) + grid(col="blue") + dev.off() + colnames(normData) = paste( colnames(normData),'N',sep="_") + print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) + nzd = data.frame(log(nonzerod + 1e-2,10)) + try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") ) + write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) + tt = cbind( + Name=as.character(rownames(DGEList\$counts)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums + ) + print.noquote("# edgeR Top tags\n") + tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely + tt = tt[order(DE\$table\$PValue),] + print.noquote(tt[1:50,]) + deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) + nsig = length(deTags) + print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) + deColours = ifelse(deTags,'red','black') + pdf("edgeR_BCV_vs_abundance.pdf") + plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance") + dev.off() + dg = DGEList[order(DE\$table\$PValue),] + #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) + efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors + normData = (1e+06*dg\$counts/efflib) + outpdfname="edgeR_top_100_heatmap.pdf" + hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle)) + outSmear = "edgeR_smearplot.pdf" + outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') + smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) + qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf') + norm.factor = DGEList\$samples\$norm.factors + topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] + edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) + edgeRcounts = rep(0, length(allgenes)) + edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits + sink() + } ### doedgeR + if (doDESeq2 == T) + { + sink("DESeq2.log") + # DESeq2 + require('DESeq2') + library('RColorBrewer') + pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) + if (length(subjects) == 0) + { + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) + } else { + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) + } + #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) + #rDESeq = results(DESeq2) + #newCountDataSet(workCM, group) + deSeqDatsizefac = estimateSizeFactors(deSEQds) + deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) + resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) + rDESeq = as.data.frame(results(resDESeq)) + rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) + srDESeq = rDESeq[order(rDESeq\$pvalue),] + qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') + cat("# DESeq top 50\n") + print.noquote(srDESeq[1:50,]) + write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) + topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] + DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) + DESeqcounts = rep(0, length(allgenes)) + DESeqcounts[DESeqcountsindex] = 1 + pdf("DESeq2_dispersion_estimates.pdf") + plotDispEsts(resDESeq) + dev.off() + ysmall = abs(min(rDESeq\$log2FoldChange)) + ybig = abs(max(rDESeq\$log2FoldChange)) + ylimit = min(4,ysmall,ybig) + pdf("DESeq2_MA_plot.pdf") + plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) + dev.off() + rlogres = rlogTransformation(resDESeq) + sampledists = dist( t( assay(rlogres) ) ) + sdmat = as.matrix(sampledists) + pdf("DESeq2_sample_distance_plot.pdf") + heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), + col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) + dev.off() + ###outpdfname="DESeq2_top50_heatmap.pdf" + ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) + sink() + result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) + if ("try-error" %in% class(result)) { + print.noquote('DESeq2 plotPCA failed.') + } else { + pdf("DESeq2_PCA_plot.pdf") + #### wtf - print? Seems needed to get this to work + print(ppca) + dev.off() + } + } + + if (doVoom == T) { + sink('VOOM.log') + if (doedgeR == F) { + #### Setup DGEList object + DGEList = DGEList(counts=workCM, group = group) + DGEList = calcNormFactors(DGEList) + DGEList = estimateGLMCommonDisp(DGEList,mydesign) + DGEList = estimateGLMTrendedDisp(DGEList,mydesign) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + norm.factor = DGEList\$samples\$norm.factors + } + pdf("VOOM_mean_variance_plot.pdf") + dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor) + dev.off() + # Use limma to fit data + fit = lmFit(dat.voomed, mydesign) + fit = eBayes(fit) + rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") + qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf') + rownames(rvoom) = rownames(workCM) + rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) + srvoom = rvoom[order(rvoom\$P.Value),] + cat("# VOOM top 50\n") + print(srvoom[1:50,]) + write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F) + # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma + topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] + voomcountsindex = which(allgenes %in% topresults.voom\$ID) + voomcounts = rep(0, length(allgenes)) + voomcounts[voomcountsindex] = 1 + sink() + } + + if (doCamera) { + doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, + outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) + } + + if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { + if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { + vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, + VOOM_limma = voomcounts, row.names = allgenes) + } else if ((doDESeq2==T) && (doedgeR==T)) { + vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes) + } else if ((doVoom==T) && (doedgeR==T)) { + vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) + } + + if (nrow(counts.dataframe > 1)) { + counts.venn = vennCounts(counts.dataframe) + vennf = "Venn_significant_genes_overlap.pdf" + pdf(vennf) + vennDiagram(counts.venn,main=vennmain,col="maroon") + dev.off() + } + } #### doDESeq2 or doVoom + +} +#### Done + +###sink(stdout(),append=T,type="message") +builtin_gmt = "" +history_gmt = "" +history_gmt_name = "" +out_edgeR = F +out_DESeq2 = F +out_VOOM = "$out_VOOM" +doDESeq2 = $DESeq2.doDESeq2 # make these T or F +doVoom = $doVoom +doCamera = $camera.doCamera +doedgeR = $edgeR.doedgeR +edgeR_priordf = 0 + + +#if $doVoom == "T": + out_VOOM = "$out_VOOM" +#end if + +#if $DESeq2.doDESeq2 == "T": + out_DESeq2 = "$out_DESeq2" + DESeq_fitType = "$DESeq2.DESeq_fitType" +#end if + +#if $edgeR.doedgeR == "T": + out_edgeR = "$out_edgeR" + edgeR_priordf = $edgeR.edgeR_priordf +#end if + +#if $camera.doCamera == 'T' + #if $camera.gmtSource.refgmtSource == "indexed" or $camera.gmtSource.refgmtSource == "both": + builtin_gmt = "${camera.gmtSource.builtinGMT.fields.path}" + #end if + #if $camera.gmtSource.refgmtSource == "history" or $camera.gmtSource.refgmtSource == "both": + history_gmt = "${camera.gmtSource.ownGMT}" + history_gmt_name = "${camera.gmtSource.ownGMT.name}" + #end if +#end if + + +if (sum(c(doedgeR,doVoom,doDESeq2)) == 0) +{ +write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr()) +quit(save="no",status=2) +} + +Out_Dir = "$html_file.files_path" +Input = "$input1" +TreatmentName = "$treatment_name" +TreatmentCols = "$Treat_cols" +ControlName = "$control_name" +ControlCols= "$Control_cols" +org = "$input1.dbkey" +if (org == "") { org = "hg19"} +fdrtype = "$fdrtype" +fdrthresh = $fdrthresh +useNDF = $useNDF +fQ = $fQ # non-differential centile cutoff +myTitle = "$title" +sids = strsplit("$subjectids",',') +subjects = unlist(sids) +nsubj = length(subjects) +TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1 +CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 +cat('Got TCols=') +cat(TCols) +cat('; CCols=') +cat(CCols) +cat('\n') +useCols = c(TCols,CCols) +if (file.exists(Out_Dir) == F) dir.create(Out_Dir) +Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header +snames = colnames(Count_Matrix) +nsamples = length(snames) +if (nsubj > 0 & nsubj != nsamples) { +options("show.error.messages"=T) +mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','), + 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=',')) +write(mess, stderr()) +quit(save="no",status=4) +} +if (length(subjects) != 0) {subjects = subjects[useCols]} +Count_Matrix = Count_Matrix[,useCols] ### reorder columns +rn = rownames(Count_Matrix) +islib = rn %in% c('librarySize','NotInBedRegions') +LibSizes = Count_Matrix[subset(rn,islib),][1] # take first +Count_Matrix = Count_Matrix[subset(rn,! islib),] +group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor +group = factor(group, levels=c(ControlName,TreatmentName)) +colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns +results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, + fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', + myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, + doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, + histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType) +sessionInfo() +]]> +</configfile> +</configfiles> +<help> + +**What it does** + +Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes. +Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design. + +**Input** + +Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper +and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the +non-negative integer count of reads from one sample overlapping the feature. +The matrix must have a header row uniquely identifying the source samples, and unique row names in +the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods. + +**Specifying comparisons** + +This is basically dumbed down for two factors - case vs control. + +More complex interfaces are possible but painful at present. +Probably need to specify a phenotype file to do this better. +Work in progress. Send code. + +If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples), +put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or +A list of integers, one for each subject or an empty string if samples are all independent. +If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix. +Integers for samples that are not in the analysis *must* be present in the string as filler even if not used. + +So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones +eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use +8,9,1,1,2,2 +as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6 + +**Methods available** + +You can run 3 popular Bioconductor packages available for count data. + +edgeR - see edgeR_ for details + +VOOM/limma - see limma_VOOM_ for details + +DESeq2 - see DESeq2_ for details + +and optionally camera in edgeR which works better if MSigDB is installed. + +**Outputs** + +Some helpful plots and analysis results. Note that most of these are produced using R code +suggested by the excellent documentation and vignettes for the Bioconductor +packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy. + +**Note on Voom** + +The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method. + +This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma. + +voom is an acronym for mean-variance modelling at the observational level. +The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation. +Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend. +This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance. +The weights are then used in the linear modelling process to adjust for heteroscedasticity. + +In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess. +The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag. +The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag. +Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays. +Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation. +This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data. + + +Author(s) + +Charity Law and Gordon Smyth + +References + +Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia. + +Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts. +Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia. +http://www.statsci.org/smyth/pubs/VoomPreprint.pdf + +See Also + +A voom case study is given in the edgeR User's Guide. + +vooma is a similar function but for microarrays instead of RNA-seq. + + +***old rant on changes to Bioconductor package variable names between versions*** + +The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) +breaking this and all other code that assumed the old name for this variable, +between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). +This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing +to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly +when their old scripts break. This tool currently now works with 2.4.6. + +**Note on prior.N** + +http://seqanswers.com/forums/showthread.php?t=5591 says: + +*prior.n* + +The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. +You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood +in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your +tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the +common likelihood the weight of one observation. + +In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, +or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that +you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation +(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? +What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. +If you have more samples, then the tagwise dispersion estimates will be more reliable, +so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. + + +From Bioconductor Digest, Vol 118, Issue 5, Gordon writes: + +Dear Dorota, + +The important settings are prior.df and trend. + +prior.n and prior.df are related through prior.df = prior.n * residual.df, +and your experiment has residual.df = 36 - 12 = 24. So the old setting of +prior.n=10 is equivalent for your data to prior.df = 240, a very large +value. Going the other way, the new setting of prior.df=10 is equivalent +to prior.n=10/24. + +To recover old results with the current software you would use + + estimateTagwiseDisp(object, prior.df=240, trend="none") + +To get the new default from old software you would use + + estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE) + +Actually the old trend method is equivalent to trend="loess" in the new +software. You should use plotBCV(object) to see whether a trend is +required. + +Note you could also use + + prior.n = getPriorN(object, prior.df=10) + +to map between prior.df and prior.n. + +---- + +**Attributions** + +edgeR - edgeR_ + +VOOM/limma - limma_VOOM_ + +DESeq2 - DESeq2_ for details + +See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package. + +Galaxy_ (that's what you are using right now!) for gluing everything together + +Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is +licensed to you under the LGPL_ like other rgenetics artefacts + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html +.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html +.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html +.. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html +.. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html +.. _Galaxy: http://getgalaxy.org +</help> + +</tool> + + This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/95b2c165412d/ Changeset: 95b2c165412d User: ross...@gmail.com Date: 2013-08-05 02:27:13 Summary: fixing on_string again. Add job title to tophat2 Affected #: 4 files diff -r f0cdc6bebfaa8757bb093d74508332515d5eec38 -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -106,6 +106,7 @@ self.allow_user_creation = string_as_bool( kwargs.get( "allow_user_creation", "True" ) ) self.allow_user_deletion = string_as_bool( kwargs.get( "allow_user_deletion", "False" ) ) self.allow_user_dataset_purge = string_as_bool( kwargs.get( "allow_user_dataset_purge", "False" ) ) + self.use_data_id_on_string = string_as_bool( kwargs.get( "use_data_id_on_string", "False" ) ) self.allow_user_impersonation = string_as_bool( kwargs.get( "allow_user_impersonation", "False" ) ) self.new_user_dataset_access_role_default_private = string_as_bool( kwargs.get( "new_user_dataset_access_role_default_private", "False" ) ) self.collect_outputs_from = [ x.strip() for x in kwargs.get( 'collect_outputs_from', 'new_file_path,job_working_directory' ).lower().split(',') ] diff -r f0cdc6bebfaa8757bb093d74508332515d5eec38 -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -191,14 +191,20 @@ data = data.to_history_dataset_association( None ) inp_data[name] = data - else: # HDA - if data.hid: - input_names.append( 'data %s' % data.hid ) +# else: # HDA +# if data.hid: +# input_names.append( 'data %s' % data.hid ) input_ext = data.ext if data.dbkey not in [None, '?']: input_dbkey = data.dbkey - + data_name_sane = re.sub('[^a-zA-Z0-9_]+', '', data.name) + if trans.app.config.use_data_id_on_string: + # we want names in our on_strings not numbers + input_names.append(data_name_sane) + else: + if data.hid: + input_names.append('data %s' % data.hid) # Collect chromInfo dataset and add as parameters to incoming db_datasets = {} db_dataset = trans.db_dataset_for( input_dbkey ) @@ -232,11 +238,14 @@ if len( input_names ) == 1: on_text = input_names[0] elif len( input_names ) == 2: - on_text = '%s and %s' % tuple(input_names[0:2]) + #on_text = '%s and %s' % tuple(input_names[0:2]) + on_text = '%s_%s' % tuple(input_names[0:2]) elif len( input_names ) == 3: - on_text = '%s, %s, and %s' % tuple(input_names[0:3]) + #on_text = '%s, %s, and %s' % tuple(input_names[0:3]) + on_text = '%s_%s_%s' % tuple(input_names[0:3]) elif len( input_names ) > 3: - on_text = '%s, %s, and others' % tuple(input_names[0:2]) + #on_text = '%s, %s, and others' % tuple(input_names[0:2]) + on_text = '%s_%s_and_others' % tuple(input_names[0:2]) else: on_text = "" # Add the dbkey to the incoming parameters diff -r f0cdc6bebfaa8757bb093d74508332515d5eec38 -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e tools/ngs_rna/tophat2_wrapper.xml --- a/tools/ngs_rna/tophat2_wrapper.xml +++ b/tools/ngs_rna/tophat2_wrapper.xml @@ -126,6 +126,14 @@ </command><inputs> + <param name="jobname" type="text" value="Tophat2" size="80" label="Job title for outputs" + help="Output name to remind you what this was for"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param><conditional name="singlePaired"><param name="sPaired" type="select" label="Is this library mate-paired?"><option value="single">Single-end</option> @@ -273,19 +281,19 @@ </stdio><outputs> - <data format="tabular" name="fusions" label="${on_string}_tophat2fus.xls" from_work_dir="tophat_out/fusions.out"> + <data format="tabular" name="fusions" label="${on_string}_${jobname}_fusions.xls" from_work_dir="tophat_out/fusions.out"><filter>(params['settingsType'] == 'full' and params['fusion_search']['do_search'] == 'Yes')</filter></data> - <data format="bed" name="insertions" label="${on_string}_tophat2ins.bed" from_work_dir="tophat_out/insertions.bed"> + <data format="bed" name="insertions" label="${on_string}_${jobname}_ins.bed" from_work_dir="tophat_out/insertions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="deletions" label="${on_string}_tophhat2del.bed" from_work_dir="tophat_out/deletions.bed"> + <data format="bed" name="deletions" label="${on_string}_${jobname}_del.bed" from_work_dir="tophat_out/deletions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="junctions" label="${on_string}tophat2sj.bed" from_work_dir="tophat_out/junctions.bed"> + <data format="bed" name="junctions" label="${on_string}_${jobname}_splicejunc.bed" from_work_dir="tophat_out/junctions.bed"><expand macro="dbKeyActions" /></data> - <data format="bam" name="accepted_hits" label="${on_string}tophat2hits.bam" from_work_dir="tophat_out/accepted_hits.bam"> + <data format="bam" name="accepted_hits" label="${on_string}_${jobname}_hits.bam" from_work_dir="tophat_out/accepted_hits.bam"><expand macro="dbKeyActions" /></data></outputs> diff -r f0cdc6bebfaa8757bb093d74508332515d5eec38 -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e tools/sr_mapping/bowtie2_wrapper.xml --- a/tools/sr_mapping/bowtie2_wrapper.xml +++ b/tools/sr_mapping/bowtie2_wrapper.xml @@ -95,15 +95,14 @@ #end if ## view/sort and output file - | samtools view -Su - | samtools sort -o - - > $output; + | samtools view -Su - | samtools sort -o - - > $output ## rename unaligned sequence files #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r: #set left = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' ) #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' ) - - mv $left $output_unaligned_reads_l; - mv $right $output_unaligned_reads_r; + ;mv $left $output_unaligned_reads_l; + mv $right $output_unaligned_reads_r #end if </command> @@ -218,7 +217,7 @@ <!-- define outputs --><outputs> - <data format="fastqsanger" name="output_unaligned_reads_l" label="${on_string}_{jobtitle}_unaligL.fastq" > + <data format="fastqsanger" name="output_unaligned_reads_l" label="${on_string}_${jobtitle}_unaligL.fastq" ><filter>unaligned_file is True</filter><actions><action type="format"> @@ -226,7 +225,7 @@ </action></actions></data> - <data format="fastqsanger" name="output_unaligned_reads_r" label="${on_string}_{jobtitle}_unaligR.fastq)"> + <data format="fastqsanger" name="output_unaligned_reads_r" label="${on_string}_${jobtitle}_unaligR.fastq)"><filter>library['type'] == "paired" and unaligned_file is True</filter><actions><action type="format"> @@ -234,7 +233,7 @@ </action></actions></data> - <data format="bam" name="output" label="${tool.name} on ${on_string}_{jobtitle}_aligned.bam"> + <data format="bam" name="output" label="${tool.name} on ${on_string}_${jobtitle}_aligned.bam"><actions><conditional name="reference_genome.source"><when value="indexed"> https://bitbucket.org/galaxy/galaxy-central/commits/fcfd3af76225/ Changeset: fcfd3af76225 User: ross...@gmail.com Date: 2013-08-07 01:45:35 Summary: monkeypatches to output.py Affected #: 3 files diff -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e -r fcfd3af762255910c5959c8c8147af90ab4038bc lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -1,5 +1,6 @@ import os import galaxy.tools +import re from galaxy.exceptions import ObjectInvalid from galaxy.model import LibraryDatasetDatasetAssociation diff -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e -r fcfd3af762255910c5959c8c8147af90ab4038bc lib/galaxy/tools/parameters/output.py --- a/lib/galaxy/tools/parameters/output.py +++ b/lib/galaxy/tools/parameters/output.py @@ -215,9 +215,13 @@ self.offset = elem.get( 'offset', -1 ) self.offset = int( self.offset ) else: + self.options = [] self.missing_tool_data_table_name = self.name def get_value( self, other_values ): - options = self.options + if self.options: + options = self.options + else: + options = [] for filter in self.filters: options = filter.filter_options( options, other_values ) try: diff -r 95b2c165412d1a7c55c3f6173d6c0c0285dd788e -r fcfd3af762255910c5959c8c8147af90ab4038bc tool-data/bowtie2_indices.loc.sample --- a/tool-data/bowtie2_indices.loc.sample +++ b/tool-data/bowtie2_indices.loc.sample @@ -1,37 +1,37 @@ -# bowtie2_indices.loc.sample -# This is a *.loc.sample file distributed with Galaxy that enables tools -# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. -# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup -# First create these data files and save them in your own data directory structure. -# Then, create a bowtie_indices.loc file to use those indexes with tools. -# Copy this file, save it with the same name (minus the .sample), -# follow the format examples, and store the result in this directory. -# The file should include an one line entry for each index set. -# The path points to the "basename" for the set, not a specific file. -# It has four text columns seperated by TABS. +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie2 indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie2_indices.loc +#file has this format (longer white space characters are TAB characters): # -# <unique_build_id><dbkey><display_name><file_base_path> +#<unique_build_id><dbkey><display_name><file_base_path> # -# So, for example, if you had hg18 indexes stored in: +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie2/hg18/, +#then the bowtie2_indices.loc entry would look like this: # -# /depot/data2/galaxy/hg19/bowtie2/ +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18 # -# containing hg19 genome and hg19.*.bt2 files, such as: -# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa -# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 -# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 -# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +#and your /depot/data2/galaxy/bowtie2/hg18/ directory +#would contain hg18.*.ebwt files: # -# then the bowtie2_indices.loc entry could look like this: +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... # -#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +#Your bowtie2_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: # -#More examples: +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19 +#...etc... # -#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 -#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. # -# https://bitbucket.org/galaxy/galaxy-central/commits/da94b2539602/ Changeset: da94b2539602 User: ross...@gmail.com Date: 2013-08-07 01:53:35 Summary: branch merge Affected #: 79 files diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b .hgignore --- a/.hgignore +++ b/.hgignore @@ -60,7 +60,7 @@ job_conf.xml data_manager_conf.xml shed_data_manager_conf.xml -config/visualizations/*.xml +config/* static/welcome.html.* static/welcome.html diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/plugins/visualizations/README.txt --- /dev/null +++ b/config/plugins/visualizations/README.txt @@ -0,0 +1,34 @@ +Custom visualization plugins +---------------------------- + +Visualizations can be added to your Galaxy instance by creating +sub-directories, templates, and static files here. + +Properly configured and written visualizations will be accessible to +the user when they click the 'visualizations' icon for a dataset +in their history panel. + +The framework must be enabled in your 'universe_wsgi.ini' file by +uncommenting (and having a valid path for) the +'visualizations_plugin_directory' entry. + +For more information, see http://wiki.galaxyproject.org/VisualizationsRegistry + + +Sub-directory structure +----------------------- + +In general, sub-directories should follow the pattern: + + my_visualization/ + config/ + my_visualization.xml + static/ + ... any static files the visualization needs (if any) + templates/ + ... any Mako templates the visualization needs + +The XML config file for a visualization plugin can be validated on the command +line using (from your plugin directory): + + xmllint my_visualization/config/my_visualization.xml --valid --noout diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/plugins/visualizations/visualization.dtd --- /dev/null +++ b/config/plugins/visualizations/visualization.dtd @@ -0,0 +1,130 @@ +<!-- each visualization must have a template (all other elements are optional) --> +<!ELEMENT visualization (data_sources*,params*,template_root*,template,link_text*,render_location*)> +<!-- visualization name (e.g. 'trackster', 'scatterplot', etc.) is required --> +<!ATTLIST visualization + name CDATA #REQUIRED +> + +<!ELEMENT data_sources (data_source*)> +<!-- data sources are elements that describe what objects (HDAs, LDDAs, Job, User, etc.) + are applicable to a visualization. Often these are used to fetch applicable links + to the visualizations that use them. +--> + <!ELEMENT data_source (model_class,(test|to_param)*)> + <!ELEMENT model_class (#PCDATA)> + <!-- model_class is currently the class name of the object you want to make a visualization + applicable to (e.g. HistoryDatasetAssociation). Currently only classes in galaxy.model + can be used. + REQUIRED and currently limited to: 'HistoryDatasetAssociation', 'LibraryDatasetDatasetAssociation' + --> + <!ELEMENT test (#PCDATA)> + <!-- tests help define what conditions the visualization can be applied to the model_class/target. + Currently, all tests are OR'd and there is no logical grouping. Tests are run in order. + (text): the text of this element is what the given target will be compared to (REQUIRED) + type: what type of test to run (e.g. when the target is an HDA the test will often be of type 'isinstance' + and test whether the HDA's datatype isinstace of a class) + DEFAULT: string comparison. + test_attr: what attribute of the target object should be used in the test. For instance, 'datatype' + will attempt to get the HDA.datatype from a target HDA. If the given object doesn't have + that attribute the test will fail (with no error). test_attr can be dot separated attributes, + looking up each in turn. For example, if the target was a history, one could access the + history.user.email by setting test_attr to 'user.email' (why you would want that, I don't know) + DEFAULT: to comparing the object itself (and not any of it's attributes) + result_type: if the result (the text of the element mentioned above) needs to be parsed into + something other than a string, result_type will tell the registry how to do this. E.g. + if result_type is 'datatype' the registry will assume the text is a datatype class name + and parse it into the proper class before the test (often 'isinstance') is run. + DEFAULT: no parsing (result should be a string) + --> + <!ATTLIST test + type CDATA #IMPLIED + test_attr CDATA #IMPLIED + result_type CDATA #IMPLIED + > + + <!ELEMENT to_param (#PCDATA)> + <!-- to_param tells the registry how to parse the data_source into a query string param. + For example, HDA data_sources can set param_to text to 'dataset_id' and param_attr to 'id' and the + the target HDA (if it passes the tests) will be passed as "dataset_id=HDA.id" + (text): the query string param key this source will be parsed into (e.g. dataset_id) + REQUIRED + param_attr: the attribute of the data_source object to use as the value in the query string param. + E.g. param_attr='id' for an HDA data_source would use the (encoded) id. + NOTE: a to_param MUST have either a param_attr or assign + assign: you can use this to directly assign a value to a query string's param. E.g. if the + data_source is a LDDA we can set 'hda_or_ldda=ldda' using assign='ldda'. + NOTE: a to_param MUST have either a param_attr or assign + --> + <!ATTLIST to_param + param_attr CDATA #IMPLIED + assign CDATA #IMPLIED + > + +<!ELEMENT params ((param|param_modifier)*)> +<!-- params describe what data will be sent to a visualization template and + how to convert them from a query string in a URL into variables usable in a template. + For example, + param_modifiers are a special class of parameters that modify other params + (e.g. hda_ldda can be 'hda' or 'ldda' and modifies/informs dataset_id to fetch an HDA or LDDA) +--> + <!ELEMENT param (#PCDATA)> + <!-- param tells the registry how to parse the query string param back into a resource/data_source. + For example, if a query string has "dataset_id=NNN" and the type is 'dataset', the registry + will attempt to fetch the hda with id of NNN from the database and pass it to the template. + (text): the query string param key this source will be parsed from (e.g. dataset_id) + REQUIRED + type: the type of the resource. + Can be: str (DEFAULT), bool, int, float, json, visualization, dbkey, dataset, or hda_ldda. + default: if a param is not passed on the query string (and is not required) OR the given param + fails to parse, this value is used instead. + DEFAULT: None + required: set this to true if the param is required for the template. Rendering will with an error + if the param hasn't been sent. + DEFAULT: false + csv: set this to true if the param is a comma separated list. The registry will attempt to + parse each value as the given type and send the result as a list to the template. + DEFAULT: false + constrain_to: (currently unused) constain a param to a set of values, error if not valid. + DEFAULT: don't constrain + var_name_in_template: a new name for the resource/variable to use in the template. E.g. an initial + query string param key might be 'dataset_id' in the URL, the registry parses it into an HDA, + and if var_name_in_template is set to 'hda', the template will be able to access the HDA + with the variable name 'hda' (as in hda.title). + DEFAULT: keep the original query string name + --> + <!ATTLIST param + type CDATA #IMPLIED + default CDATA #IMPLIED + required CDATA #IMPLIED + csv CDATA #IMPLIED + constrain_to CDATA #IMPLIED + var_name_in_template CDATA #IMPLIED + > + <!-- param_modifiers are the same as param but have a REQUIRED 'modifies' attribute. + 'modifies' must point to the param name (the text part of param element) that it will modify. + E.g. <param_modifier modifies="dataset_id">hda_ldda</param_modifier> + --> + <!ELEMENT param_modifier (#PCDATA)> + <!ATTLIST param_modifier + modifies CDATA #REQUIRED + type CDATA #IMPLIED + default CDATA #IMPLIED + required CDATA #IMPLIED + csv CDATA #IMPLIED + constrain_to CDATA #IMPLIED + var_name_in_template CDATA #IMPLIED + > + +<!-- template_root: the directory to search for the template relative to templates/webapps/galaxy + (optional) DEFAULT: visualizations +--> +<!ELEMENT template_root (#PCDATA)> +<!-- template: the template used to render the visualization. REQUIRED --> +<!ELEMENT template (#PCDATA)> +<!-- link_text: the text component of an html anchor displayed when the registry builds the link information --> +<!ELEMENT link_text (#PCDATA)> +<!-- render_location: used as the target attribute of the link to the visualization. + Can be 'galaxy_main', '_top', '_blank'. DEFAULT: 'galaxy_main' +--> +<!-- TODO: rename -> render_target --> +<!ELEMENT render_location (#PCDATA)> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/plugins/visualizations/visualization_base.mako --- /dev/null +++ b/config/plugins/visualizations/visualization_base.mako @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +<% _=n_ %> + +%if embedded: + ${self.as_embedded()} +%else: + ${self.as_page()} +%endif + +## render this inside another page or via ajax +<%def name="as_embedded()"> + ${self.stylesheets()} + ${self.javascripts()} + ${self.get_body()} +</%def> + +## render this as it's own page +<%def name="as_page()"> +<!DOCTYPE HTML> +<html> + <head> + <title>${self.title()}</title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + ${self.metas()} + ${self.stylesheets()} + ${self.javascripts()} + </head> + <body> + ${self.get_body()} + </body> +</html> +</%def> +##TODO: late_javascripts + +## Default body +<%def name="get_body()"></%def> + +## Default title +<%def name="title()">${visualization_name}</%def> + +## Additional metas can be defined by templates inheriting from this one. +<%def name="metas()"></%def> + +## Default stylesheets +<%def name="stylesheets()"> +${h.css('base')} +</%def> + +## Default javascripts +<%def name="javascripts()"> +${h.js( + "libs/jquery/jquery", + "libs/jquery/jquery.migrate" +)} + +<script type="text/javascript"> + // console protection + window.console = window.console || { + log : function(){}, + debug : function(){}, + info : function(){}, + warn : function(){}, + error : function(){}, + assert : function(){} + }; +</script> +</%def> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/circster.xml.sample --- a/config/visualizations/circster.xml.sample +++ /dev/null @@ -1,26 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE visualization SYSTEM "visualization.dtd"> -<visualization name="circster"> - <data_sources> - <data_source> - <model_class>HistoryDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="hda">hda_ldda</to_param> - </data_source> - <data_source> - <model_class>LibraryDatasetDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="ldda">hda_ldda</to_param> - </data_source> - </data_sources> - <params> - <param type="visualization">id</param> - <param type="hda_or_ldda">dataset_id</param> - <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier> - <param type="dbkey">dbkey</param> - </params> - <template>circster.mako</template> - <render_location>_top</render_location> -</visualization> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/phyloviz.xml.sample --- a/config/visualizations/phyloviz.xml.sample +++ /dev/null @@ -1,18 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE visualization SYSTEM "visualization.dtd"> -<visualization name="phyloviz"> - <data_sources> - <data_source> - <model_class>HistoryDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Newick</test> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Nexus</test> - <to_param param_attr="id">dataset_id</to_param> - </data_source> - </data_sources> - <params> - <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param> - <param type="integer" default="0">tree_index</param> - </params> - <template>phyloviz.mako</template> - <render_location>_top</render_location> -</visualization> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/scatterplot.xml.sample --- a/config/visualizations/scatterplot.xml.sample +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE visualization SYSTEM "visualization.dtd"> -<visualization name="scatterplot"> - <data_sources> - <data_source> - <model_class>HistoryDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">tabular.Tabular</test> - <to_param param_attr="id">dataset_id</to_param> - </data_source> - </data_sources> - <params> - <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param> - </params> - <template>scatterplot.mako</template> -</visualization> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/sweepster.xml.sample --- a/config/visualizations/sweepster.xml.sample +++ /dev/null @@ -1,25 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE visualization SYSTEM "visualization.dtd"> -<visualization name="sweepster"> - <data_sources> - <data_source> - <model_class>HistoryDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="hda">hda_ldda</to_param> - </data_source> - <data_source> - <model_class>LibraryDatasetDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="ldda">hda_ldda</to_param> - </data_source> - </data_sources> - <params> - <param type="visualization" var_name_in_template="viz">visualization</param> - <param type="hda_or_ldda" var_name_in_template="dataset">dataset_id</param> - <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier> - </params> - <template>sweepster.mako</template> - <render_location>_top</render_location> -</visualization> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/trackster.xml.sample --- a/config/visualizations/trackster.xml.sample +++ /dev/null @@ -1,29 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE visualization SYSTEM "visualization.dtd"> -<visualization name="trackster"> - <!--not tested yet --> - <data_sources> - <data_source> - <model_class>HistoryDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="hda">hda_ldda</to_param> - <to_param param_attr="dbkey">dbkey</to_param> - </data_source> - <data_source> - <model_class>LibraryDatasetDatasetAssociation</model_class> - <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> - <to_param param_attr="id">dataset_id</to_param> - <to_param assign="ldda">hda_ldda</to_param> - </data_source> - </data_sources> - <params> - <param type="visualization">id</param> - <param type="dataset">dataset_id</param> - <param type="genome_region">genome_region</param> - <param type="dbkey">dbkey</param> - </params> - <template_root>tracks</template_root> - <template>browser.mako</template> - <render_location>_top</render_location> -</visualization> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b config/visualizations/visualization.dtd --- a/config/visualizations/visualization.dtd +++ /dev/null @@ -1,132 +0,0 @@ -<!-- runnable on NIX with xmllint --> - -<!-- each visualization must have a template (all other elements are optional) --> -<!ELEMENT visualization (data_sources*,params*,template_root*,template,link_text*,render_location*)> -<!-- visualization name (e.g. 'trackster', 'scatterplot', etc.) is required --> -<!ATTLIST visualization - name CDATA #REQUIRED -> - -<!ELEMENT data_sources (data_source*)> -<!-- data sources are elements that describe what objects (HDAs, LDDAs, Job, User, etc.) - are applicable to a visualization. Often these are used to fetch applicable links - to the visualizations that use them. ---> - <!ELEMENT data_source (model_class,(test|to_param)*)> - <!ELEMENT model_class (#PCDATA)> - <!-- model_class is currently the class name of the object you want to make a visualization - applicable to (e.g. HistoryDatasetAssociation). Currently only classes in galaxy.model - can be used. - REQUIRED and currently limited to: 'HistoryDatasetAssociation', 'LibraryDatasetDatasetAssociation' - --> - <!ELEMENT test (#PCDATA)> - <!-- tests help define what conditions the visualization can be applied to the model_class/target. - Currently, all tests are OR'd and there is no logical grouping. Tests are run in order. - (text): the text of this element is what the given target will be compared to (REQUIRED) - type: what type of test to run (e.g. when the target is an HDA the test will often be of type 'isinstance' - and test whether the HDA's datatype isinstace of a class) - DEFAULT: string comparison. - test_attr: what attribute of the target object should be used in the test. For instance, 'datatype' - will attempt to get the HDA.datatype from a target HDA. If the given object doesn't have - that attribute the test will fail (with no error). test_attr can be dot separated attributes, - looking up each in turn. For example, if the target was a history, one could access the - history.user.email by setting test_attr to 'user.email' (why you would want that, I don't know) - DEFAULT: to comparing the object itself (and not any of it's attributes) - result_type: if the result (the text of the element mentioned above) needs to be parsed into - something other than a string, result_type will tell the registry how to do this. E.g. - if result_type is 'datatype' the registry will assume the text is a datatype class name - and parse it into the proper class before the test (often 'isinstance') is run. - DEFAULT: no parsing (result should be a string) - --> - <!ATTLIST test - type CDATA #IMPLIED - test_attr CDATA #IMPLIED - result_type CDATA #IMPLIED - > - - <!ELEMENT to_param (#PCDATA)> - <!-- to_param tells the registry how to parse the data_source into a query string param. - For example, HDA data_sources can set param_to text to 'dataset_id' and param_attr to 'id' and the - the target HDA (if it passes the tests) will be passed as "dataset_id=HDA.id" - (text): the query string param key this source will be parsed into (e.g. dataset_id) - REQUIRED - param_attr: the attribute of the data_source object to use as the value in the query string param. - E.g. param_attr='id' for an HDA data_source would use the (encoded) id. - NOTE: a to_param MUST have either a param_attr or assign - assign: you can use this to directly assign a value to a query string's param. E.g. if the - data_source is a LDDA we can set 'hda_or_ldda=ldda' using assign='ldda'. - NOTE: a to_param MUST have either a param_attr or assign - --> - <!ATTLIST to_param - param_attr CDATA #IMPLIED - assign CDATA #IMPLIED - > - -<!ELEMENT params ((param|param_modifier)*)> -<!-- params describe what data will be sent to a visualization template and - how to convert them from a query string in a URL into variables usable in a template. - For example, - param_modifiers are a special class of parameters that modify other params - (e.g. hda_ldda can be 'hda' or 'ldda' and modifies/informs dataset_id to fetch an HDA or LDDA) ---> - <!ELEMENT param (#PCDATA)> - <!-- param tells the registry how to parse the query string param back into a resource/data_source. - For example, if a query string has "dataset_id=NNN" and the type is 'dataset', the registry - will attempt to fetch the hda with id of NNN from the database and pass it to the template. - (text): the query string param key this source will be parsed from (e.g. dataset_id) - REQUIRED - type: the type of the resource. - Can be: str (DEFAULT), bool, int, float, json, visualization, dbkey, dataset, or hda_ldda. - default: if a param is not passed on the query string (and is not required) OR the given param - fails to parse, this value is used instead. - DEFAULT: None - required: set this to true if the param is required for the template. Rendering will with an error - if the param hasn't been sent. - DEFAULT: false - csv: set this to true if the param is a comma separated list. The registry will attempt to - parse each value as the given type and send the result as a list to the template. - DEFAULT: false - constrain_to: (currently unused) constain a param to a set of values, error if not valid. - DEFAULT: don't constrain - var_name_in_template: a new name for the resource/variable to use in the template. E.g. an initial - query string param key might be 'dataset_id' in the URL, the registry parses it into an HDA, - and if var_name_in_template is set to 'hda', the template will be able to access the HDA - with the variable name 'hda' (as in hda.title). - DEFAULT: keep the original query string name - --> - <!ATTLIST param - type CDATA #IMPLIED - default CDATA #IMPLIED - required CDATA #IMPLIED - csv CDATA #IMPLIED - constrain_to CDATA #IMPLIED - var_name_in_template CDATA #IMPLIED - > - <!-- param_modifiers are the same as param but have a REQUIRED 'modifies' attribute. - 'modifies' must point to the param name (the text part of param element) that it will modify. - E.g. <param_modifier modifies="dataset_id">hda_ldda</param_modifier> - --> - <!ELEMENT param_modifier (#PCDATA)> - <!ATTLIST param_modifier - modifies CDATA #REQUIRED - type CDATA #IMPLIED - default CDATA #IMPLIED - required CDATA #IMPLIED - csv CDATA #IMPLIED - constrain_to CDATA #IMPLIED - var_name_in_template CDATA #IMPLIED - > - -<!-- template_root: the directory to search for the template relative to templates/webapps/galaxy - (optional) DEFAULT: visualizations ---> -<!ELEMENT template_root (#PCDATA)> -<!-- template: the template used to render the visualization. REQUIRED --> -<!ELEMENT template (#PCDATA)> -<!-- link_text: the text component of an html anchor displayed when the registry builds the link information --> -<!ELEMENT link_text (#PCDATA)> -<!-- render_location: used as the target attribute of the link to the visualization. - Can be 'galaxy_main', '_top', '_blank'. DEFAULT: 'galaxy_main' ---> -<!-- TODO: rename -> render_target --> -<!ELEMENT render_location (#PCDATA)> diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -54,7 +54,7 @@ from tool_shed.galaxy_install.migrate.check import verify_tools verify_tools( self, db_url, kwargs.get( 'global_conf', {} ).get( '__file__', None ), self.config.database_engine_options ) # Object store manager - self.object_store = build_object_store_from_config(self.config) + self.object_store = build_object_store_from_config(self.config, fsmon=True) # Setup the database engine and ORM from galaxy.model import mapping self.model = mapping.init( self.config.file_path, @@ -123,10 +123,8 @@ # Load genome indexer tool. load_genome_index_tools( self.toolbox ) # visualizations registry: associates resources with visualizations, controls how to render - self.visualizations_registry = None - if self.config.visualizations_config_directory: - self.visualizations_registry = VisualizationsRegistry( self.config.root, - self.config.visualizations_config_directory ) + self.visualizations_registry = VisualizationsRegistry.from_config( + self.config.visualizations_plugins_directory, self.config ) # Load security policy. self.security_agent = self.model.security_agent self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions ) diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -76,21 +76,24 @@ self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root ) self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root ) self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) ) + self.running_functional_tests = string_as_bool( kwargs.get( 'running_functional_tests', False ) ) self.hours_between_check = kwargs.get( 'hours_between_check', 12 ) try: - hbc_test = int( self.hours_between_check ) - self.hours_between_check = hbc_test - if self.hours_between_check < 1 or self.hours_between_check > 24: + if isinstance( self.hours_between_check, int ): + if self.hours_between_check < 1 or self.hours_between_check > 24: + self.hours_between_check = 12 + elif isinstance( self.hours_between_check, float ): + # If we're running functional tests, the minimum hours between check should be reduced to 0.001, or 3.6 seconds. + if self.running_functional_tests: + if self.hours_between_check < 0.001 or self.hours_between_check > 24.0: + self.hours_between_check = 12.0 + else: + if self.hours_between_check < 1.0 or self.hours_between_check > 24.0: + self.hours_between_check = 12.0 + else: self.hours_between_check = 12 except: - try: - # Float values are supported for functional tests. - hbc_test = float( self.hours_between_check ) - self.hours_between_check = hbc_test - if self.hours_between_check < 0.001 or self.hours_between_check > 24.0: - self.hours_between_check = 12.0 - except: - self.hours_between_check = 12 + self.hours_between_check = 12 self.update_integrated_tool_panel = kwargs.get( "update_integrated_tool_panel", True ) self.enable_data_manager_user_view = string_as_bool( kwargs.get( "enable_data_manager_user_view", "False" ) ) self.data_manager_config_file = resolve_path( kwargs.get('data_manager_config_file', 'data_manager_conf.xml' ), self.root ) @@ -277,7 +280,6 @@ self.biostar_url = kwargs.get( 'biostar_url', None ) self.biostar_key_name = kwargs.get( 'biostar_key_name', None ) self.biostar_key = kwargs.get( 'biostar_key', None ) - self.running_functional_tests = string_as_bool( kwargs.get( 'running_functional_tests', False ) ) # Experimental: This will not be enabled by default and will hide # nonproduction code. # The api_folders refers to whether the API exposes the /folders section. @@ -290,8 +292,10 @@ self.fluent_log = string_as_bool( kwargs.get( 'fluent_log', False ) ) self.fluent_host = kwargs.get( 'fluent_host', 'localhost' ) self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) ) - # visualization registries config directory - self.visualizations_config_directory = kwargs.get( 'visualizations_config_directory', None ) + # PLUGINS: + self.plugin_frameworks = [] + # visualization framework + self.visualizations_plugins_directory = kwargs.get( 'visualizations_plugins_directory', None ) @property def sentry_dsn_public( self ): diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/jobs/runners/condor.py --- a/lib/galaxy/jobs/runners/condor.py +++ b/lib/galaxy/jobs/runners/condor.py @@ -246,12 +246,12 @@ if job_id is None: self.put( job_wrapper ) return - cjs = CondorJobState() + cjs = CondorJobState( job_wrapper=job_wrapper, files_dir=self.app.config.cluster_files_directory ) cjs.job_id = str( job_id ) cjs.command_line = job.get_command_line() cjs.job_wrapper = job_wrapper cjs.job_destination = job_wrapper.job_destination - cjs.user_log = os.path.join( self.app.config.cluster_files_directory, '%s.condor.log' % galaxy_id_tag ) + cjs.user_log = os.path.join( self.app.config.cluster_files_directory, 'galaxy_%s.condor.log' % galaxy_id_tag ) cjs.register_cleanup_file_attribute( 'user_log' ) self.__old_state_paths( cjs ) if job.state == model.Job.states.RUNNING: @@ -268,9 +268,9 @@ files in the AsychronousJobState object """ if cjs.job_wrapper is not None: - user_log = "%s/%s.condor.log" % (self.app.config.cluster_files_directory, job_wrapper.job_id) - if not os.path.exists( cjs.job_file ) and os.path.exists( job_file ): - cjs.output_file = "%s/%s.o" % (self.app.config.cluster_files_directory, job_wrapper.job_id) - cjs.error_file = "%s/%s.e" % (self.app.config.cluster_files_directory, job_wrapper.job_id) - cjs.job_file = "%s/galaxy_%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id) + user_log = "%s/%s.condor.log" % (self.app.config.cluster_files_directory, cjs.job_wrapper.job_id) + if not os.path.exists( cjs.user_log ) and os.path.exists( user_log ): + cjs.output_file = "%s/%s.o" % (self.app.config.cluster_files_directory, cjs.job_wrapper.job_id) + cjs.error_file = "%s/%s.e" % (self.app.config.cluster_files_directory, cjs.job_wrapper.job_id) + cjs.job_file = "%s/galaxy_%s.sh" % (self.app.config.cluster_files_directory, cjs.job_wrapper.job_id) cjs.user_log = user_log diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/jobs/runners/pbs.py --- a/lib/galaxy/jobs/runners/pbs.py +++ b/lib/galaxy/jobs/runners/pbs.py @@ -588,8 +588,8 @@ def stop_job( self, job ): """Attempts to delete a job from the PBS queue""" - job_tag = ( "(%s/%s)" - % ( job.get_id_tag(), job.get_job_runner_external_id() ) ) + job_id = job.get_job_runner_external_id().encode('utf-8') + job_tag = "(%s/%s)" % ( job.get_id_tag(), job_id ) log.debug( "%s Stopping PBS job" % job_tag ) # Declare the connection handle c so that it can be cleaned up: @@ -606,7 +606,7 @@ log.debug("(%s) Connection to PBS server for job delete failed" % job_tag ) return - pbs.pbs_deljob( c, job.get_job_runner_external_id(), '' ) + pbs.pbs_deljob( c, job_id, '' ) log.debug( "%s Removed from PBS queue before job completion" % job_tag ) except: diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -666,10 +666,11 @@ api_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' ) api_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged' ) + default_name = 'Unnamed history' def __init__( self, id=None, name=None, user=None ): self.id = id - self.name = name or "Unnamed history" + self.name = name or History.default_name self.deleted = False self.purged = False self.importing = False diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/objectstore/__init__.py --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -864,7 +864,7 @@ store selected randomly, but with weighting. """ - def __init__(self, config): + def __init__(self, config, fsmon=False): super(DistributedObjectStore, self).__init__() self.distributed_config = config.distributed_object_store_config_file assert self.distributed_config is not None, "distributed object store ('object_store = distributed') " \ @@ -881,9 +881,10 @@ self.__parse_distributed_config(config) self.sleeper = None - if self.global_max_percent_full or filter(lambda x: x != 0.0, self.max_percent_full.values()): + if fsmon and ( self.global_max_percent_full or filter( lambda x: x != 0.0, self.max_percent_full.values() ) ): self.sleeper = Sleeper() self.filesystem_monitor_thread = threading.Thread(target=self.__filesystem_monitor) + self.filesystem_monitor_thread.setDaemon( True ) self.filesystem_monitor_thread.start() log.info("Filesystem space monitor started") @@ -1020,7 +1021,7 @@ def __init__(self, backends=[]): super(HierarchicalObjectStore, self).__init__() -def build_object_store_from_config(config): +def build_object_store_from_config(config, fsmon=False): """ Depending on the configuration setting, invoke the appropriate object store """ store = config.object_store @@ -1029,7 +1030,7 @@ elif store == 's3' or store == 'swift': return S3ObjectStore(config=config) elif store == 'distributed': - return DistributedObjectStore(config=config) + return DistributedObjectStore(config=config, fsmon=fsmon) elif store == 'hierarchical': return HierarchicalObjectStore() else: diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/tools/filters/__init__.py --- a/lib/galaxy/tools/filters/__init__.py +++ b/lib/galaxy/tools/filters/__init__.py @@ -12,7 +12,7 @@ # Prepopulate dict containing filters that are always checked, # other filters that get checked depending on context (e.g. coming from # trackster or no user found are added in build filters). - self.default_filters = dict( tool=[ _not_hidden ], section=[], label=[] ) + self.default_filters = dict( tool=[ _not_hidden, _handle_requires_login ], section=[], label=[] ) # Add dynamic filters to these default filters. config = toolbox.app.config self.__init_filters( "tool", config.tool_filters ) @@ -25,9 +25,6 @@ """ filters = self.default_filters.copy() - if not trans.user: - filters[ "tool" ].append( _requires_login ) - if kwds.get( "trackster", False ): filters[ "tool" ].append( _has_trackster_conf ) @@ -59,8 +56,8 @@ return not tool.hidden -def _requires_login( context, tool ): - return not tool.require_login +def _handle_requires_login( context, tool ): + return not tool.require_login or context.trans.user def _has_trackster_conf( context, tool ): diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/util/__init__.py --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -38,6 +38,9 @@ from inflection import Inflector, English inflector = Inflector(English) +pkg_resources.require( "simplejson" ) +import simplejson + def is_multi_byte( chars ): for char in chars: try: @@ -282,6 +285,11 @@ value = "%s%s%s" % ( value[:left_index], join_by, value[-right_index:] ) return value +def pretty_print_json(json_data, is_json_string=False): + if is_json_string: + json_data = simplejson.loads(json_data) + return simplejson.dumps(json_data, sort_keys=True, indent=4 * ' ') + # characters that are valid valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!") diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/visualization/registry.py --- a/lib/galaxy/visualization/registry.py +++ b/lib/galaxy/visualization/registry.py @@ -12,6 +12,8 @@ import galaxy.model from galaxy.web import url_for +from galaxy.web.base import pluginframework + import logging log = logging.getLogger( __name__ ) @@ -28,17 +30,16 @@ tests: anding, grouping, not has_dataprovider + user is admin data_sources: lists of add description element to visualization. -TESTS to add: - has dataprovider - user is admin +user_pref for ordering/ex/inclusion of particular visualizations """ # ------------------------------------------------------------------- the registry -class VisualizationsRegistry( object ): +class VisualizationsRegistry( pluginframework.PluginFramework ): """ Main responsibilities are: - testing if an object has a visualization that can be applied to it @@ -47,6 +48,8 @@ - validating and parsing params into resources (based on a context) used in the visualization template """ + #: name of this plugin + #: any built in visualizations that have their own render method in ctrls/visualization # these should be handled somewhat differently - and be passed onto their resp. methods in ctrl.visualization #TODO: change/remove if/when they can be updated to use this system BUILT_IN_VISUALIZATIONS = [ @@ -55,57 +58,34 @@ 'sweepster', 'phyloviz' ] - # where to search for visualiztion templates (relative to templates/webapps/galaxy) - # this can be overridden individually in the config entries - TEMPLATE_ROOT = 'visualization' + #: directories under plugin_directory that aren't plugins + non_plugin_directories = [] def __str__( self ): - listings_keys_str = ','.join( self.listings.keys() ) if self.listings else '' - return 'VisualizationsRegistry(%s)' %( listings_keys_str ) + return 'VisualizationsRegistry(%s)' %( self.plugin_directory ) - def __init__( self, galaxy_root, configuration_filepath ): - # load the registry from the xml files located in configuration_filepath using the given parser - configuration_filepath = os.path.join( galaxy_root, configuration_filepath ) - self.configuration_filepath = self.check_conf_filepath( configuration_filepath ) - self.move_sample_conf_files() - self.load() + def __init__( self, registry_filepath, template_cache_dir ): + super( VisualizationsRegistry, self ).__init__( registry_filepath, 'visualizations', template_cache_dir ) # what to use to parse query strings into resources/vars for the template self.resource_parser = ResourceParser() + log.debug( '%s loaded', str( self ) ) - def check_conf_filepath( self, configuration_filepath ): + def load_configuration( self ): """ - Checks for the existence of the given filepath. - :param configurarion_filepath: full filepath to the visualization config directory - :raises IOError: if the given directory doesn't exist + Builds the registry by parsing the `config/*.xml` files for every plugin + in ``get_plugin_directories`` and stores the results in ``self.listings``. + + ..note:: + This could be used to re-load a new configuration without restarting + the instance. """ - if not os.path.exists( configuration_filepath ): - raise IOError( 'visualization configuration directory (%s) not found' %( configuration_filepath ) ) - return configuration_filepath + try: + self.listings = VisualizationsConfigParser.parse( self.get_plugin_directories() ) - def move_sample_conf_files( self ): - """ - Copies any `*.xml.sample` files in `configuration_filepath` to - `.xml` files of the same names if no file with that name already exists. - - :returns: a list of the files moved - """ - files_moved = [] - for sample_file in glob.glob( os.path.join( self.configuration_filepath, '*.sample' ) ): - new_name = os.path.splitext( sample_file )[0] - if not os.path.exists( new_name ): - shutil.copy2( sample_file, new_name ) - files_moved.append( new_name ) - - def load( self ): - """ - Builds the registry by parsing the xml in `self.configuration_filepath` - and stores the results in `self.listings`. - - Provided as separate method from `__init__` in order to re-load a - new configuration without restarting the instance. - """ - self.listings = VisualizationsConfigParser.parse( self.configuration_filepath ) + except Exception, exc: + log.exception( 'Error parsing visualizations plugins %s', self.plugin_directory ) + raise def get_visualization( self, trans, visualization_name, target_object ): """ @@ -283,11 +263,11 @@ VALID_RENDER_LOCATIONS = [ 'galaxy_main', '_top', '_blank' ] @classmethod - def parse( cls, config_dir, debug=True ): + def parse( cls, plugin_directories, debug=False ): """ Static class interface. """ - return cls( debug ).parse_files( config_dir ) + return cls( debug ).parse_plugins( plugin_directories ) def __init__( self, debug=False ): self.debug = debug @@ -297,33 +277,45 @@ self.param_parser = ParamParser() self.param_modifier_parser = ParamModifierParser() - def parse_files( self, config_dir ): + def parse_plugins( self, plugin_directories ): """ - Parse each XML file in `config_dir` for visualizations config data. + Parses the config files for each plugin sub-dir in `base_path`. + + :param plugin_directories: a list of paths to enabled plugins. + + :returns: registry data in dictionary form + """ + returned = {} + for plugin_path in plugin_directories: + returned.update( self.parse_plugin( plugin_path ) ) + return returned + + def parse_plugin( self, plugin_path ): + """ + Parses any XML files in ``<plugin_path>/config``. If an error occurs while parsing a visualizations entry, it is skipped. :returns: registry data in dictionary form + ..note:: + assumes config files are in a 'config' sub-dir of each plugin """ returned = {} - try: - for xml_filepath in glob.glob( os.path.join( config_dir, '*.xml' ) ): - try: - visualization_name, visualization = self.parse_file( xml_filepath ) - # skip vis' with parsing errors - don't shutdown the startup - except ParsingException, parse_exc: - log.error( 'Skipped visualization config "%s" due to parsing errors: %s', - xml_filepath, str( parse_exc ), exc_info=self.debug ) - if visualization: - returned[ visualization_name ] = visualization - log.debug( 'Visualization config loaded for: %s', visualization_name ) + plugin_config_path = os.path.join( plugin_path, 'config' ) + if not os.path.isdir( plugin_config_path ): + return returned - except Exception, exc: - log.error( 'Error parsing visualizations configuration directory %s: %s', - config_dir, str( exc ), exc_info=( not self.debug ) ) - #TODO: change when this framework is on by default - if self.debug: - raise + for xml_filepath in glob.glob( os.path.join( plugin_config_path, '*.xml' ) ): + try: + visualization_name, visualization = self.parse_file( xml_filepath ) + # skip vis' with parsing errors - don't shutdown the startup + except ParsingException, parse_exc: + log.error( 'Skipped visualization config "%s" due to parsing errors: %s', + xml_filepath, str( parse_exc ), exc_info=self.debug ) + + if visualization: + returned[ visualization_name ] = visualization + log.debug( 'Visualization config loaded for: %s', visualization_name ) return returned @@ -457,6 +449,7 @@ returned[ 'tests' ] = tests # to_params (optional, 0 or more) - tells the registry to set certain params based on the model_clas, tests + returned[ 'to_params' ] = {} to_params = self.parse_to_params( xml_tree.findall( 'to_param' ) ) if to_params: returned[ 'to_params' ] = to_params @@ -522,6 +515,7 @@ # test_attr can be a dot separated chain of object attributes (e.g. dataset.datatype) - convert to list #TODO: too dangerous - constrain these to some allowed list + #TODO: does this err if no test_attr - it should... test_attr = test_elem.get( 'test_attr' ) test_attr = test_attr.split( self.ATTRIBUTE_SPLIT_CHAR ) if isinstance( test_attr, str ) else [] # build a lambda function that gets the desired attribute to test @@ -531,11 +525,17 @@ test_result_type = test_elem.get( 'result_type' ) or 'string' # test functions should be sent an object to test, and the parsed result expected from the test - #TODO: currently, isinstance and string equivalance are the only test types supported - if test_type == 'isinstance': + # is test_attr attribute an instance of result + if test_type == 'isinstance': #TODO: wish we could take this further but it would mean passing in the datatypes_registry test_fn = lambda o, result: isinstance( getter( o ), result ) + #TODO: needs cleanup - robustiosity-nessness + # does the object itself have a datatype attr and does that datatype have the given dataprovider + elif test_type == 'has_dataprovider': + test_fn = lambda o, result: ( hasattr( o, 'datatype' ) + and o.datatype.has_dataprovider( result ) ) + # default to simple (string) equilavance (coercing the test_attr to a string) else: test_fn = lambda o, result: str( getter( o ) ) == result diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/web/base/controller.py --- a/lib/galaxy/web/base/controller.py +++ b/lib/galaxy/web/base/controller.py @@ -36,6 +36,7 @@ from galaxy.datatypes.interval import ChromatinInteractions from galaxy.datatypes.data import Text +from galaxy.model import ExtendedMetadata, ExtendedMetadataIndex, LibraryDatasetDatasetAssociation from galaxy.visualization.genome.visual_analytics import get_tool_def from galaxy.datatypes.display_applications import util as da_util @@ -536,7 +537,9 @@ hda = None try: hda = self.get_dataset( trans, id, - check_ownership=check_ownership, check_accesible=check_accesible, check_state=check_state ) + check_ownership=check_ownership, + check_accessible=check_accessible, + check_state=check_state ) except Exception, exception: pass hdas.append( hda ) @@ -2281,6 +2284,80 @@ log.debug( "In get_item_tag_assoc with tagged_item %s" % tagged_item ) return self.get_tag_handler( trans )._get_item_tag_assoc( user, tagged_item, tag_name ) + + +class UsesExtendedMetadataMixin( SharableItemSecurityMixin ): + """ Mixin for getting and setting item extended metadata. """ + + def get_item_extended_metadata_obj( self, trans, item ): + """ + Given an item object (such as a LibraryDatasetDatasetAssociation), find the object + of the associated extended metadata + """ + if item.extended_metadata: + return item.extended_metadata + return None + + def set_item_extended_metadata_obj( self, trans, item, extmeta_obj, check_writable=False): + print "setting", extmeta_obj.data + if item.__class__ == LibraryDatasetDatasetAssociation: + if not check_writable or trans.app.security_agent.can_modify_library_item( trans.get_current_user_roles(), item, trans.user ): + item.extended_metadata = extmeta_obj + trans.sa_session.flush() + + def unset_item_extended_metadata_obj( self, trans, item, check_writable=False): + if item.__class__ == LibraryDatasetDatasetAssociation: + if not check_writable or trans.app.security_agent.can_modify_library_item( trans.get_current_user_roles(), item, trans.user ): + item.extended_metadata = None + trans.sa_session.flush() + + def create_extended_metadata(self, trans, extmeta): + """ + Create/index an extended metadata object. The returned object is + not associated with any items + """ + ex_meta = ExtendedMetadata(extmeta) + trans.sa_session.add( ex_meta ) + trans.sa_session.flush() + for path, value in self._scan_json_block(extmeta): + meta_i = ExtendedMetadataIndex(ex_meta, path, value) + trans.sa_session.add(meta_i) + trans.sa_session.flush() + return ex_meta + + def delete_extended_metadata( self, trans, item): + if item.__class__ == ExtendedMetadata: + trans.sa_session.delete( item ) + trans.sa_session.flush() + + def _scan_json_block(self, meta, prefix=""): + """ + Scan a json style data structure, and emit all fields and their values. + Example paths + + Data + { "data" : [ 1, 2, 3 ] } + + Path: + /data == [1,2,3] + + /data/[0] == 1 + + """ + if isinstance(meta, dict): + for a in meta: + for path, value in self._scan_json_block(meta[a], prefix + "/" + a): + yield path, value + elif isinstance(meta, list): + for i, a in enumerate(meta): + for path, value in self._scan_json_block(a, prefix + "[%d]" % (i)): + yield path, value + else: + #BUG: Everything is cast to string, which can lead to false positives + #for cross type comparisions, ie "True" == True + yield prefix, ("%s" % (meta)).encode("utf8", errors='replace') + + """ Deprecated: `BaseController` used to be available under the name `Root` """ diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/web/base/pluginframework.py --- /dev/null +++ b/lib/galaxy/web/base/pluginframework.py @@ -0,0 +1,233 @@ +""" +Base class for plugins - frameworks or systems that may: + * serve static content + * serve templated html + * have some configuration at startup +""" + +import os.path +import glob +import sys + +import pkg_resources +pkg_resources.require( 'MarkupSafe' ) +pkg_resources.require( 'Mako' ) +import mako + +import logging +log = logging.getLogger( __name__ ) + +# ============================================================================= exceptions +class PluginFrameworkException( Exception ): + """Base exception for plugin frameworks. + """ + pass +class PluginFrameworkConfigException( PluginFrameworkException ): + """Exception for plugin framework configuration errors. + """ + pass +class PluginFrameworkStaticException( PluginFrameworkException ): + """Exception for plugin framework static directory set up errors. + """ + pass +class PluginFrameworkTemplateException( PluginFrameworkException ): + """Exception for plugin framework template directory + and template rendering errors. + """ + pass + + +# ============================================================================= base +class PluginFramework( object ): + """ + Plugins are files/directories living outside the Galaxy ``lib`` directory + that serve static files (css, js, images, etc.), use and serve mako templates, + and have some configuration to control the rendering. + + A plugin framework sets up all the above components. + """ + #: does the class need a config file(s) to be parsed? + has_config = True + #: does the class need static files served? + serves_static = True + #: does the class need template files served? + serves_templates = True + #TODO: allow plugin mako inheritance from existing ``/templates`` files + #uses_galaxy_templates = True + #TODO: possibly better as instance var (or a combo) + #: the directories in ``plugin_directory`` with basenames listed here will + #: be ignored for config, static, and templates + non_plugin_directories = [] + + # ------------------------------------------------------------------------- setup + @classmethod + def from_config( cls, config_plugin_directory, config ): + """ + Set up the framework based on data from some config object by: + * constructing it's absolute plugin_directory filepath + * getting a template_cache + * and appending itself to the config object's ``plugin_frameworks`` list + + .. note:: + precondition: config obj should have attributes: + root, template_cache, and (list) plugin_frameworks + """ + # currently called from (base) app.py - defined here to allow override if needed + if not config_plugin_directory: + return None + try: + # create the plugin path and if plugin dir begins with '/' assume absolute path + full_plugin_filepath = os.path.join( config.root, config_plugin_directory ) + if config_plugin_directory.startswith( os.path.sep ): + full_plugin_filepath = config_plugin_directory + if not os.path.exists( full_plugin_filepath ): + raise PluginFrameworkException( 'Plugin path not found: %s' %( full_plugin_filepath ) ) + + template_cache = config.template_cache if cls.serves_static else None + plugin = cls( full_plugin_filepath, template_cache ) + + config.plugin_frameworks.append( plugin ) + return plugin + + except PluginFrameworkException, plugin_exc: + log.exception( "Error loading framework %s. Skipping...", cls.__class__.__name__ ) + return None + + def __str__( self ): + return '%s(%s)' %( self.__class__.__name__, self.plugin_directory ) + + def __init__( self, plugin_directory, name=None, template_cache_dir=None, debug=False ): + """ + :type plugin_directory: string + :param plugin_directory: the base directory where plugin code is kept + :type name: (optional) string (default: None) + :param name: the name of this plugin + (that will appear in url pathing, etc.) + :type template_cache_dir: (optional) string (default: None) + :param template_cache_dir: the cache directory to store compiled mako + """ + if not os.path.isdir( plugin_directory ): + raise PluginFrameworkException( 'Framework plugin directory not found: %s, %s' + %( self.__class__.__name__, plugin_directory ) ) + self.plugin_directory = plugin_directory + #TODO: or pass in from config + self.name = name or os.path.basename( self.plugin_directory ) + + if self.has_config: + self.load_configuration() + # set_up_static_urls will be called during the static middleware creation (if serves_static) + if self.serves_templates: + self.set_up_templates( template_cache_dir ) + + def get_plugin_directories( self ): + """ + Return the plugin directory paths for this plugin. + + Gets any directories within ``plugin_directory`` that are directories + themselves and whose ``basename`` is not in ``plugin_directory``. + """ + # could instead explicitly list on/off in master config file + for plugin_path in glob.glob( os.path.join( self.plugin_directory, '*' ) ): + if not os.path.isdir( plugin_path ): + continue + + if os.path.basename( plugin_path ) in self.non_plugin_directories: + continue + + yield plugin_path + + # ------------------------------------------------------------------------- config + def load_configuration( self ): + """ + Override to load some framework/plugin specifc configuration. + """ + # Abstract method + return True + + # ------------------------------------------------------------------------- serving static files + def get_static_urls_and_paths( self ): + """ + For each plugin, return a 2-tuple where the first element is a url path + to the plugin's static files and the second is a filesystem path to those + same files. + + Meant to be passed to a Static url map. + """ + url_and_paths = [] + # called during the static middleware creation (buildapp.py, wrap_in_static) + + # NOTE: this only searches for static dirs two levels deep (i.e. <plugin_directory>/<plugin-name>/static) + for plugin_path in self.get_plugin_directories(): + # that path is a plugin, search for subdirs named static in THAT dir + plugin_static_path = os.path.join( plugin_path, 'static' ) + if not os.path.isdir( plugin_static_path ): + continue + + # build a url for that static subdir and create a Static urlmap entry for it + plugin_name = os.path.splitext( os.path.basename( plugin_path ) )[0] + plugin_url = self.name + '/' + plugin_name + '/static' + url_and_paths.append( ( plugin_url, plugin_static_path ) ) + + return url_and_paths + + # ------------------------------------------------------------------------- templates + def set_up_templates( self, template_cache_dir ): + """ + Add a ``template_lookup`` attribute to the framework that can be passed + to the mako renderer to find templates. + """ + if not template_cache_dir: + raise PluginFrameworkTemplateException( 'Plugins that serve templates require a template_cache_dir' ) + self.template_lookup = self._create_mako_template_lookup( template_cache_dir, self._get_template_paths() ) + return self.template_lookup + + def _get_template_paths( self ): + """ + Get the paths that will be searched for templates. + """ + return [ self.plugin_directory ] + + def _create_mako_template_lookup( self, cache_dir, paths, collection_size=500, output_encoding='utf-8' ): + """ + Create a ``TemplateLookup`` with defaults. + """ + return mako.lookup.TemplateLookup( + directories = paths, + module_directory = cache_dir, + collection_size = collection_size, + output_encoding = output_encoding ) + + #TODO: do we want to remove trans and app from the plugin template context? + def fill_template( self, trans, template_filename, **kwargs ): + """ + Pass control over to trans and render the ``template_filename``. + """ + # defined here to be overridden + return trans.fill_template( template_filename, template_lookup=self.template_lookup, **kwargs ) + + def fill_template_with_plugin_imports( self, trans, template_filename, **kwargs ): + """ + Returns a rendered plugin template but allows importing modules from inside + the plugin directory within the template. + + ..example:: I.e. given this layout for a plugin: + bler/ + template/ + bler.mako + static/ + conifg/ + my_script.py + this version of `fill_template` allows `bler.mako` to call `import my_script`. + """ + try: + plugin_base_path = os.path.split( os.path.dirname( template_filename ) )[0] + plugin_path = os.path.join( self.plugin_directory, plugin_base_path ) + sys.path.append( plugin_path ) + filled_template = self.fill_template( trans, template_filename, **kwargs ) + + finally: + sys.path.remove( plugin_path ) + + return filled_template + + #TODO: could add plugin template helpers here diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/web/framework/__init__.py --- a/lib/galaxy/web/framework/__init__.py +++ b/lib/galaxy/web/framework/__init__.py @@ -832,6 +832,39 @@ history = property( get_history, set_history ) + def get_or_create_default_history( self ): + """ + Gets or creates a default history and associates it with the current + session. + """ + + # There must be a user to fetch a default history. + if not self.galaxy_session.user: + return self.new_history() + + # Look for default history that (a) has default name + is not deleted and + # (b) has no datasets. If suitable history found, use it; otherwise, create + # new history. + unnamed_histories = self.sa_session.query( self.app.model.History ).filter_by( + user=self.galaxy_session.user, + name=self.app.model.History.default_name, + deleted=False ) + default_history = None + for history in unnamed_histories: + if len( history.datasets ) == 0: + # Found suitable default history. + default_history = history + break + + # Set or create hsitory. + if default_history: + history = default_history + self.set_history( history ) + else: + history = self.new_history() + + return history + def new_history( self, name=None ): """ Create a new history and associate it with the current session and @@ -960,10 +993,13 @@ searchList=[kwargs, self.template_context, dict(caller=self, t=self, h=helpers, util=util, request=self.request, response=self.response, app=self.app)] ) return str( template ) - def fill_template_mako( self, filename, **kwargs ): - template = self.webapp.mako_template_lookup.get_template( filename ) + def fill_template_mako( self, filename, template_lookup=None, **kwargs ): + template_lookup = template_lookup or self.webapp.mako_template_lookup + template = template_lookup.get_template( filename ) template.output_encoding = 'utf-8' - data = dict( caller=self, t=self, trans=self, h=helpers, util=util, request=self.request, response=self.response, app=self.app ) + + data = dict( caller=self, t=self, trans=self, h=helpers, util=util, + request=self.request, response=self.response, app=self.app ) data.update( self.template_context ) data.update( kwargs ) return template.render( **data ) diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/web/framework/middleware/static.py --- a/lib/galaxy/web/framework/middleware/static.py +++ b/lib/galaxy/web/framework/middleware/static.py @@ -12,9 +12,11 @@ from paste.urlparser import StaticURLParser class CacheableStaticURLParser( StaticURLParser ): + def __init__( self, directory, cache_seconds=None ): StaticURLParser.__init__( self, directory ) self.cache_seconds = cache_seconds + def __call__( self, environ, start_response ): path_info = environ.get('PATH_INFO', '') if not path_info: diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/api/extended_metadata.py --- /dev/null +++ b/lib/galaxy/webapps/galaxy/api/extended_metadata.py @@ -0,0 +1,59 @@ +""" +API operations on annotations. +""" +import logging +from galaxy import web +from galaxy.web.base.controller import BaseAPIController, UsesHistoryMixin, UsesLibraryMixinItems, UsesHistoryDatasetAssociationMixin, UsesStoredWorkflowMixin, UsesExtendedMetadataMixin, HTTPNotImplemented + +log = logging.getLogger( __name__ ) + +class BaseExtendedMetadataController( BaseAPIController, UsesExtendedMetadataMixin, UsesHistoryMixin, UsesLibraryMixinItems, UsesHistoryDatasetAssociationMixin, UsesStoredWorkflowMixin ): + + @web.expose_api + def index( self, trans, **kwd ): + idnum = kwd[self.exmeta_item_id] + item = self._get_item_from_id(trans, idnum, check_writable=False) + if item is not None: + ex_meta = self.get_item_extended_metadata_obj( trans, item ) + if ex_meta is not None: + return ex_meta.data + + @web.expose_api + def create( self, trans, payload, **kwd ): + idnum = kwd[self.exmeta_item_id] + item = self._get_item_from_id(trans, idnum, check_writable=True) + if item is not None: + ex_obj = self.get_item_extended_metadata_obj(trans, item) + if ex_obj is not None: + self.unset_item_extended_metadata_obj(trans, item) + self.delete_extended_metadata(trans, ex_obj) + ex_obj = self.create_extended_metadata(trans, payload) + self.set_item_extended_metadata_obj(trans, item, ex_obj) + + @web.expose_api + def delete( self, trans, **kwd ): + idnum = kwd[self.tagged_item_id] + item = self._get_item_from_id(trans, idnum, check_writable=True) + if item is not None: + ex_obj = self.get_item_extended_metadata_obj(trans, item) + if ex_obj is not None: + self.unset_item_extended_metadata_obj(trans, item) + self.delete_extended_metadata(trans, ex_obj) + + @web.expose_api + def undelete( self, trans, **kwd ): + raise HTTPNotImplemented() + +class LibraryDatasetExtendMetadataController(BaseExtendedMetadataController): + controller_name = "library_dataset_extended_metadata" + exmeta_item_id = "library_content_id" + def _get_item_from_id(self, trans, idstr, check_writable=True): + if check_writable: + item = self.get_library_dataset_dataset_association( trans, idstr) + if trans.app.security_agent.can_modify_library_item( trans.get_current_user_roles(), item ): + return item + else: + item = self.get_library_dataset_dataset_association( trans, idstr) + if trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), item, trans.user ): + return item + return None diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/api/permissions.py --- a/lib/galaxy/webapps/galaxy/api/permissions.py +++ b/lib/galaxy/webapps/galaxy/api/permissions.py @@ -3,7 +3,8 @@ """ import logging, os, string, shutil, urllib, re, socket from cgi import escape, FieldStorage -from galaxy import util, datatypes, jobs, web, util +import galaxy.util +from galaxy import util, datatypes, jobs, web from galaxy.web.base.controller import * from galaxy.util.sanitize_html import sanitize_html from galaxy.model.orm import * @@ -23,7 +24,7 @@ trans.response.status = 403 return "You are not authorized to update library permissions." - params = util.Params( payload ) + params = galaxy.util.Params( payload ) try: decoded_library_id = trans.security.decode_id( library_id ) except TypeError: @@ -38,7 +39,7 @@ permissions = {} for k, v in trans.app.model.Library.permitted_actions.items(): role_params = params.get( k + '_in', [] ) - in_roles = [ trans.sa_session.query( trans.app.model.Role ).get( trans.security.decode_id( x ) ) for x in util.listify( role_params ) ] + in_roles = [ trans.sa_session.query( trans.app.model.Role ).get( trans.security.decode_id( x ) ) for x in galaxy.util.listify( role_params ) ] permissions[ trans.app.security_agent.get_action( v.action ) ] = in_roles trans.app.security_agent.set_all_library_permissions( trans, library, permissions ) trans.sa_session.refresh( library ) diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/buildapp.py --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -2,22 +2,26 @@ Provides factory methods to assemble the Galaxy web application """ -import logging, atexit -import os, os.path -import sys, warnings - -from galaxy.util import asbool +import sys +import os +import os.path +import atexit +import warnings +import glob from paste import httpexceptions import pkg_resources -log = logging.getLogger( __name__ ) - -from galaxy import util import galaxy.model import galaxy.model.mapping import galaxy.datatypes.registry import galaxy.web.framework +from galaxy import util +from galaxy.util import asbool + +import logging +log = logging.getLogger( __name__ ) + class GalaxyWebApplication( galaxy.web.framework.WebApplication ): pass @@ -118,6 +122,10 @@ name_prefix="workflow_", path_prefix='/api/workflows/:workflow_id' ) + _add_item_extended_metadata_controller( webapp, + name_prefix="library_dataset_", + path_prefix='/api/libraries/:library_id/contents/:library_content_id' ) + _add_item_annotation_controller( webapp, name_prefix="history_content_", path_prefix='/api/histories/:history_id/contents/:history_content_id' ) @@ -181,7 +189,7 @@ if kwargs.get( 'middleware', True ): webapp = wrap_in_middleware( webapp, global_conf, **kwargs ) if asbool( kwargs.get( 'static_enabled', True ) ): - webapp = wrap_in_static( webapp, global_conf, **kwargs ) + webapp = wrap_in_static( webapp, global_conf, plugin_frameworks=app.config.plugin_frameworks, **kwargs ) if asbool(kwargs.get('pack_scripts', False)): pack_scripts() # Close any pooled database connections before forking @@ -233,6 +241,10 @@ conditions=dict(method=["GET"])) +def _add_item_extended_metadata_controller( webapp, name_prefix, path_prefix, **kwd ): + controller = "%sextended_metadata" % name_prefix + name = "%sextended_metadata" % name_prefix + webapp.mapper.resource(name, "extended_metadata", path_prefix=path_prefix, controller=controller) def _add_item_annotation_controller( webapp, name_prefix, path_prefix, **kwd ): controller = "%sannotations" % name_prefix @@ -323,7 +335,7 @@ log.debug( "Enabling 'Request ID' middleware" ) return app -def wrap_in_static( app, global_conf, **local_conf ): +def wrap_in_static( app, global_conf, plugin_frameworks=None, **local_conf ): from paste.urlmap import URLMap from galaxy.web.framework.middleware.static import CacheableStaticURLParser as Static urlmap = URLMap() @@ -343,6 +355,16 @@ urlmap["/static/style"] = Static( conf.get( "static_style_dir" ), cache_time ) urlmap["/favicon.ico"] = Static( conf.get( "static_favicon_dir" ), cache_time ) urlmap["/robots.txt"] = Static( conf.get( "static_robots_txt", 'static/robots.txt'), cache_time ) + + # wrap any static dirs for plugins + plugin_frameworks = plugin_frameworks or [] + for static_serving_framework in ( framework for framework in plugin_frameworks if framework.serves_static ): + # invert control to each plugin for finding their own static dirs + for plugin_url, plugin_static_path in static_serving_framework.get_static_urls_and_paths(): + plugin_url = '/plugins/' + plugin_url + urlmap[( plugin_url )] = Static( plugin_static_path, cache_time ) + log.debug( 'added url, path to static middleware: %s, %s', plugin_url, plugin_static_path ) + # URL mapper becomes the root webapp return urlmap diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py --- a/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py +++ b/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py @@ -1054,18 +1054,19 @@ no_changes_checked=no_changes_checked, tool_panel_section=tool_panel_section, new_tool_panel_section=new_tool_panel_section ) - # The repository's status must be updated from 'Uninstall' to 'New' when initiating reinstall so the repository_installation_updater will function. - tool_shed_repository = suc.create_or_update_tool_shed_repository( trans.app, - tool_shed_repository.name, - tool_shed_repository.description, - tool_shed_repository.installed_changeset_revision, - tool_shed_repository.ctx_rev, - repository_clone_url, - metadata, - trans.model.ToolShedRepository.installation_status.NEW, - tool_shed_repository.installed_changeset_revision, - tool_shed_repository.owner, - tool_shed_repository.dist_to_shed ) + if tool_shed_repository.status == trans.model.ToolShedRepository.installation_status.UNINSTALLED: + # The repository's status must be updated from 'Uninstalled' to 'New' when initiating reinstall so the repository_installation_updater will function. + tool_shed_repository = suc.create_or_update_tool_shed_repository( trans.app, + tool_shed_repository.name, + tool_shed_repository.description, + tool_shed_repository.installed_changeset_revision, + tool_shed_repository.ctx_rev, + repository_clone_url, + metadata, + trans.model.ToolShedRepository.installation_status.NEW, + tool_shed_repository.installed_changeset_revision, + tool_shed_repository.owner, + tool_shed_repository.dist_to_shed ) ctx_rev = suc.get_ctx_rev( trans.app, tool_shed_url, tool_shed_repository.name, @@ -1096,7 +1097,8 @@ repository_metadata=None, tool_dependencies=tool_dependencies, repository_dependencies=repository_dependencies ) - repo_info_dicts.append( repo_info_dict ) + if repo_info_dict not in repo_info_dicts: + repo_info_dicts.append( repo_info_dict ) # Make sure all tool_shed_repository records exist. created_or_updated_tool_shed_repositories, tool_panel_section_keys, repo_info_dicts, filtered_repo_info_dicts = \ repository_dependency_util.create_repository_dependency_objects( trans=trans, @@ -1107,7 +1109,7 @@ install_repository_dependencies=install_repository_dependencies, no_changes_checked=no_changes_checked, tool_panel_section=tool_panel_section, - new_tool_panel_section=new_tool_panel_section ) + new_tool_panel_section=new_tool_panel_section ) # Default the selected tool panel location for loading tools included in each newly installed required tool shed repository to the location # selected for the repository selected for reinstallation. for index, tps_key in enumerate( tool_panel_section_keys ): diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/controllers/history.py --- a/lib/galaxy/webapps/galaxy/controllers/history.py +++ b/lib/galaxy/webapps/galaxy/controllers/history.py @@ -309,7 +309,7 @@ # If deleting the current history, make a new current. if history == trans.get_history(): deleted_current = True - trans.new_history() + trans.get_or_create_default_history() trans.log_event( "History (%s) marked as deleted" % history.name ) n_deleted += 1 if purge and trans.app.config.allow_user_dataset_purge: @@ -571,8 +571,8 @@ # No need to check other outputs since the job's parent history is this history job.mark_deleted( trans.app.config.track_jobs_in_database ) trans.app.job_manager.job_stop_queue.put( job.id ) - # Regardless of whether it was previously deleted, we make a new history active - trans.new_history() + # Regardless of whether it was previously deleted, get or create default history. + trans.get_or_create_default_history() return trans.show_ok_message( "History deleted, a new history is active", refresh_frames=['history'] ) @web.expose diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/controllers/library_common.py --- a/lib/galaxy/webapps/galaxy/controllers/library_common.py +++ b/lib/galaxy/webapps/galaxy/controllers/library_common.py @@ -16,9 +16,9 @@ from galaxy.security import Action from galaxy.tools.actions import upload_common from galaxy.util import inflector -from galaxy.util.json import to_json_string +from galaxy.util.json import to_json_string, from_json_string from galaxy.util.streamball import StreamBall -from galaxy.web.base.controller import BaseUIController, UsesFormDefinitionsMixin +from galaxy.web.base.controller import BaseUIController, UsesFormDefinitionsMixin, UsesExtendedMetadataMixin from galaxy.web.form_builder import AddressField, CheckboxField, SelectField, build_select_field from galaxy.model.orm import and_, eagerload_all @@ -73,7 +73,7 @@ pass os.rmdir( tmpd ) -class LibraryCommon( BaseUIController, UsesFormDefinitionsMixin ): +class LibraryCommon( BaseUIController, UsesFormDefinitionsMixin, UsesExtendedMetadataMixin ): @web.json def library_item_updates( self, trans, ids=None, states=None ): # Avoid caching @@ -536,6 +536,39 @@ trans.sa_session.flush() message = "Information updated for library dataset '%s'." % ldda.name status = 'done' + elif params.get( 'change_extended_metadata', False): + em_string = util.restore_text( params.get("extended_metadata", "") ) + if len(em_string): + payload = None + try: + payload = from_json_string(em_string) + except Exception, e: + message = 'Invalid JSON input' + status = 'error' + if payload is not None: + if ldda is not None: + ex_obj = self.get_item_extended_metadata_obj(trans, ldda) + if ex_obj is not None: + self.unset_item_extended_metadata_obj(trans, ldda) + self.delete_extended_metadata(trans, ex_obj) + ex_obj = self.create_extended_metadata(trans, payload) + self.set_item_extended_metadata_obj(trans, ldda, ex_obj) + message = "Updated Extended metadata '%s'." % ldda.name + status = 'done' + else: + message = "LDDA not found" + status = 'error' + else: + if ldda is not None: + ex_obj = self.get_item_extended_metadata_obj(trans, ldda) + if ex_obj is not None: + self.unset_item_extended_metadata_obj(trans, ldda) + self.delete_extended_metadata(trans, ex_obj) + message = "Deleted Extended metadata '%s'." % ldda.name + status = 'done' + + + if "dbkey" in ldda.datatype.metadata_spec and not ldda.metadata.dbkey: # Copy dbkey into metadata, for backwards compatability # This looks like it does nothing, but getting the dbkey diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/galaxy/controllers/visualization.py --- a/lib/galaxy/webapps/galaxy/controllers/visualization.py +++ b/lib/galaxy/webapps/galaxy/controllers/visualization.py @@ -708,7 +708,7 @@ # validate name vs. registry registry = trans.app.visualizations_registry if not registry: - raise HTTPNotFound( 'No visualization registry (possibly disabled in universe_wsgi.ini)') + raise HTTPNotFound( 'No visualization registry (possibly disabled in universe_wsgi.ini)' ) if visualization_name not in registry.listings: raise HTTPNotFound( 'Unknown or invalid visualization: ' + visualization_name ) # or redirect to list? @@ -722,16 +722,15 @@ resources = registry.query_dict_to_resources( trans, self, visualization_name, kwargs ) # look up template and render - template_root = registry_listing.get( 'template_root', registry.TEMPLATE_ROOT ) - template = registry_listing[ 'template' ] - template_path = os.path.join( template_root, template ) + template_path = registry_listing[ 'template' ] + returned = registry.fill_template( trans, template_path, + visualization_name=visualization_name, query_args=kwargs, + embedded=embedded, shared_vars={}, **resources ) #NOTE: passing *unparsed* kwargs as query_args #NOTE: shared_vars is a dictionary for shared data in the template # this feels hacky to me but it's what mako recommends: # http://docs.makotemplates.org/en/latest/runtime.html #TODO: embedded - returned = trans.fill_template( template_path, visualization_name=visualization_name, - embedded=embedded, query_args=kwargs, shared_vars={}, **resources ) except Exception, exception: log.exception( 'error rendering visualization (%s): %s', visualization_name, str( exception ) ) @@ -745,47 +744,58 @@ @web.expose @web.require_login() - def trackster(self, trans, id=None, **kwargs): + def trackster(self, trans, **kwargs): """ Display browser for the visualization denoted by id and add the datasets listed in `dataset_ids`. """ - # Get dataset to add. + # define app configuration + app = { 'jscript' : "viz/trackster" } + + # get dataset to add + id = kwargs.get( "id", None ) + + # get dataset to add new_dataset_id = kwargs.get( "dataset_id", None ) - # Check for gene region - gene_region = GenomeRegion.from_str(kwargs.get("gene_region", "")) - - # Set up new browser if no id provided. + # set up new browser if no id provided if not id: - # Use dbkey from dataset to be added or from incoming parameter. + # use dbkey from dataset to be added or from incoming parameter dbkey = None if new_dataset_id: dbkey = self.get_dataset( trans, new_dataset_id ).dbkey if dbkey == '?': dbkey = kwargs.get( "dbkey", None ) - - # fill template - return trans.fill_template( "tracks/browser.mako", viewport_config=gene_region.__dict__, add_dataset=new_dataset_id, default_dbkey=dbkey ) + + # save database key + app['default_dbkey'] = dbkey + + # add url + app['new_browser'] = web.url_for( controller='visualization', action='new_browser', default_dbkey=dbkey ) + else: + # load saved visualization + vis = self.get_visualization( trans, id, check_ownership=False, check_accessible=True ) + app['viz_config'] = self.get_visualization_config( trans, vis ) + + # backup id + app['id'] = id; - # Display saved visualization. - vis = self.get_visualization( trans, id, check_ownership=False, check_accessible=True ) - viz_config = self.get_visualization_config( trans, vis ) - - # Update gene region of saved visualization if user parses a new gene region in the url + # add dataset id + app['add_dataset'] = new_dataset_id + + # check for gene region + gene_region = GenomeRegion.from_str(kwargs.get("gene_region", "")) + + # update gene region of saved visualization if user parses a new gene region in the url if gene_region.chrom is not None: - viz_config['viewport']['chrom'] = gene_region.chrom - viz_config['viewport']['start'] = gene_region.start - viz_config['viewport']['end'] = gene_region.end - - ''' - FIXME: - if new_dataset is not None: - if trans.security.decode_id(new_dataset) in [ d["dataset_id"] for d in viz_config.get("tracks") ]: - new_dataset = None # Already in browser, so don't add - ''' + app['gene_region'] = { + 'chrom' : gene_region.chrom, + 'start' : gene_region.start, + 'end' : gene_region.end + } + # fill template - return trans.fill_template( 'tracks/browser.mako', config=viz_config, add_dataset=new_dataset_id ) + return trans.fill_template('galaxy.panels.mako', config = {'right_panel' : True, 'app' : app}) @web.expose def circster( self, trans, id=None, hda_ldda=None, dataset_id=None, dbkey=None ): @@ -839,7 +849,15 @@ if not isinstance( genome_data, str ): track[ 'preloaded_data' ] = genome_data - return trans.fill_template( 'visualization/circster.mako', viz_config=viz_config, genome=genome ) + # define app configuration for generic mako template + app = { + 'jscript' : "viz/circster", + 'viz_config' : viz_config, + 'genome' : genome + } + + # fill template + return trans.fill_template('galaxy.panels.mako', config = {'app' : app}) @web.expose def sweepster( self, trans, id=None, hda_ldda=None, dataset_id=None, regions=None ): diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/tool_shed/api/repository_revisions.py --- a/lib/galaxy/webapps/tool_shed/api/repository_revisions.py +++ b/lib/galaxy/webapps/tool_shed/api/repository_revisions.py @@ -54,7 +54,6 @@ try: # We'll currently support only gzip-compressed tar archives. file_type = 'gz' - file_type_str = suc.get_file_type_str( changeset_revision, file_type ) export_repository_dependencies = util.string_as_bool( export_repository_dependencies ) # Get the repository information. repository = suc.get_repository_by_name_and_owner( trans.app, name, owner ) diff -r fcfd3af762255910c5959c8c8147af90ab4038bc -r da94b25396024b6af382242f446d2daf117b514b lib/galaxy/webapps/tool_shed/util/container_util.py --- a/lib/galaxy/webapps/tool_shed/util/container_util.py +++ b/lib/galaxy/webapps/tool_shed/util/container_util.py @@ -257,13 +257,12 @@ class ToolDependency( object ): """Tool dependency object""" - def __init__( self, id=None, name=None, version=None, type=None, install_dir=None, readme=None, installation_status=None, repository_id=None, + def __init__( self, id=None, name=None, version=None, type=None, readme=None, installation_status=None, repository_id=None, tool_dependency_id=None, is_orphan=None ): self.id = id self.name = name self.version = version self.type = type - self.install_dir = install_dir self.readme = readme self.installation_status = installation_status self.repository_id = repository_id @@ -948,12 +947,10 @@ # Insert a header row. tool_dependency_id += 1 if trans.webapp.name == 'galaxy': - # Include the installation directory. tool_dependency = ToolDependency( id=tool_dependency_id, name='Name', version='Version', type='Type', - install_dir='Install directory', readme=None, installation_status='Installation status', repository_id=None, @@ -964,7 +961,6 @@ name='Name', version='Version', type='Type', - install_dir=None, readme=None, installation_status=None, repository_id=None, @@ -995,7 +991,6 @@ name=name, version=None, type=type, - install_dir=None, readme=None, installation_status=installation_status, repository_id=repository_id, @@ -1013,7 +1008,6 @@ name = requirements_dict[ 'name' ] version = requirements_dict[ 'version' ] type = requirements_dict[ 'type' ] - install_dir = requirements_dict.get( 'install_dir', None ) repository_id = requirements_dict.get( 'repository_id', None ) td_id = requirements_dict.get( 'tool_dependency_id', None ) if trans.webapp.name == 'galaxy': @@ -1024,7 +1018,6 @@ name=name, version=version, type=type, - install_dir=install_dir, readme=None, installation_status=installation_status, repository_id=repository_id, This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/b076688903e1/ Changeset: b076688903e1 User: ross...@gmail.com Date: 2013-08-08 07:21:58 Summary: adding rgenetics tools missing from baker Affected #: 2 files diff -r da94b25396024b6af382242f446d2daf117b514b -r b076688903e1f96fb0453c61d0ea38b5d92cd89c tools/rgenetics/htseqsams2mx.py --- a/tools/rgenetics/htseqsams2mx.py +++ b/tools/rgenetics/htseqsams2mx.py @@ -60,7 +60,7 @@ self.msg = msg -def htseqMX(gff_filename,sam_filenames,colnames,sam_exts,opts): +def htseqMX(gff_filename,sam_filenames,colnames,sam_exts,sam_bais,opts): """ Code taken from count.py in Simon Anders HTSeq distribution Wrapped in a loop to accept multiple bam/sam files and their names from galaxy to @@ -90,6 +90,7 @@ features = HTSeq.GenomicArrayOfSets( "auto", opts.stranded != "no" ) mapqMin = int(opts.mapqMin) counts = {} + nreads = 0 empty = 0 ambiguous = 0 notaligned = 0 @@ -123,9 +124,19 @@ for sami,sam_filename in enumerate(sam_filenames): colname = colnames[sami] isbam = sam_exts[sami] == 'bam' + hasbai = sam_bais[sami] > '' + if hasbai: + tempname = os.path.splitext(os.path.basename(sam_filename))[0] + tempbam = '%s.bam' % tempname + tempbai = '%s.bai' % tempname + os.link(sam_filename,tempbam) + os.link(sam_bais[sami],tempbai) try: if isbam: - read_seq = HTSeq.BAM_Reader( sam_filename ) + if hasbai: + read_seq = HTSeq.BAM_Reader ( tempbam ) + else: + read_seq = HTSeq.BAM_Reader( sam_filename ) else: read_seq = HTSeq.SAM_Reader( sam_filename ) first_read = iter(read_seq).next() @@ -142,6 +153,7 @@ read_seq_pe_file = read_seq read_seq = HTSeq.pair_SAM_alignments( read_seq ) for seqi,r in enumerate(read_seq): + nreads += 1 if not pe_mode: if not r.aligned: notaligned += 1 @@ -240,13 +252,13 @@ raise if not opts.quiet: - sys.stdout.write( "%d sam %s processed.\n" % ( seqi, "lines " if not pe_mode else "line pairs" ) ) - return counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered + sys.stdout.write( "%d sam %s processed for %s.\n" % ( seqi, "lines " if not pe_mode else "line pairs", colname ) ) + return counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered,nreads warnings.showwarning = my_showwarning assert os.path.isfile(gff_filename),'## unable to open supplied gff file %s' % gff_filename try: - counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered = count_reads_in_features( sam_filenames, colnames, gff_filename,opts) + counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered,nreads = count_reads_in_features( sam_filenames, colnames, gff_filename,opts) except: sys.stderr.write( "Error: %s\n" % str( sys.exc_info()[1] ) ) sys.stderr.write( "[Exception type: %s, raised in %s:%d]\n" % @@ -254,7 +266,7 @@ os.path.basename(traceback.extract_tb( sys.exc_info()[2] )[-1][0]), traceback.extract_tb( sys.exc_info()[2] )[-1][1] ) ) sys.exit( 1 ) - return counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered + return counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered,nreads def usage(): @@ -330,6 +342,7 @@ assert len(samfnames) == len(scolnames), '##ERROR sams2mx: Count of sam/cname not consistent - %d/%d' % (len(samfnames),len(scolnames)) sam_exts = [x[2] for x in samsets] assert len(samfnames) == len(sam_exts), '##ERROR sams2mx: Count of extensions not consistent - %d/%d' % (len(samfnames),len(sam_exts)) + sam_bais = [x[3] for x in samsets] # these only exist for bams and need to be finessed with a symlink so pysam will just work for i,b in enumerate(samfnames): assert os.path.isfile(b),'## Supplied input sam file "%s" not found' % b sam_filenames.append(b) @@ -339,7 +352,7 @@ sampName = sampName.replace(')','') # for R sampName = sampName.replace(' ','_') # for R colnames.append(sampName) - counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered = htseqMX(gff_file, sam_filenames,colnames,sam_exts,opts) + counts,empty,ambiguous,lowqual,notaligned,nonunique,filtered,nreads = htseqMX(gff_file, sam_filenames,colnames,sam_exts,sam_bais,opts) heads = '\t'.join(['Contig',] + colnames) res = [heads,] contigs = counts.keys() @@ -359,9 +372,10 @@ outf.write('\n') outf.close() walltime = int(time.time() - starttime) - accumulatornames = ('walltimeseconds','totreadscounted','ncontigs','emptyreads','ambiguousreads','lowqualreads', - 'notalignedreads','nonuniquereads','extra_filtered_reads','emptycontigs') - accums = (walltime,totalc,len(contigs),empty,ambiguous,lowqual,notaligned,nonunique,filtered,emptycontigs) - notes = ['%s=%d' % (accumulatornames[i],x) for i,x in enumerate(accums)] - print >> sys.stdout, ','.join(notes) + accumulatornames = ('walltime (seconds)','total reads read','total reads counted','number of contigs','total empty reads','total ambiguous reads','total low quality reads', + 'total not aligned reads','total not unique mapping reads','extra filtered reads','empty contigs') + accums = (walltime,nreads,totalc,len(contigs),empty,ambiguous,lowqual,notaligned,nonunique,filtered,emptycontigs) + fracs = (1.0,1.0,float(totalc)/nreads,1.0,float(empty)/nreads,float(ambiguous)/nreads,float(lowqual)/nreads,float(notaligned)/nreads,float(nonunique)/nreads,float(filtered)/nreads,float(emptycontigs)/len(contigs)) + notes = ['%s = %d (%2.3f)' % (accumulatornames[i],x,100.0*fracs[i]) for i,x in enumerate(accums)] + print >> sys.stdout, '\n'.join(notes) sys.exit(0) diff -r da94b25396024b6af382242f446d2daf117b514b -r b076688903e1f96fb0453c61d0ea38b5d92cd89c tools/rgenetics/htseqsams2mx.xml --- a/tools/rgenetics/htseqsams2mx.xml +++ b/tools/rgenetics/htseqsams2mx.xml @@ -1,24 +1,25 @@ -<tool id="htseqsams2mxlocal" name="SAM/BAM to count matrix" version="0.4"> - <description>local using HTSeq code</description> +<tool id="htseqsams2mx" name="SAM/BAM to count matrix" version="0.4"> + <description>using HTSeq code</description><stdio><exit_code range="666" level="warning" description="Exit code 666 encountered" /></stdio><requirements><requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="0.7.5">pysam</requirement><requirement type="package" version="2.4.11">freetype</requirement><requirement type="package" version="1.2.1">matplotliblite</requirement><requirement type="package" version="0.5.4p3">htseq</requirement></requirements><command interpreter="python"> htseqsams2mx.py -g "$gfffile" -o "$outfile" -m "$model" --id_attribute "$id_attr" --feature_type "$feature_type" - --mapqMin $mapqMin --samf "'${firstsamf}','${firstsamf.name}','${firstsamf.ext}'" + --mapqMin $mapqMin --samf "'${firstsamf}','${firstsamf.name}','${firstsamf.ext}','${firstsamf.metadata.bam_index}'" #if $secondsamf.ext != 'data': - --samf "'${secondsamf}','${secondsamf.name}','${secondsamf.ext}'" + --samf "'${secondsamf}','${secondsamf.name}','${secondsamf.ext}','${secondsamf.metadata.bam_index}'" #end if #for $s in $samfiles: #if $s.samf.ext != 'data': - --samf "'${s.samf}','${s.samf.name}','${s.samf.ext}'" + --samf "'${s.samf}','${s.samf.name}','${s.samf.ext}','${s.samf.metadata.bam_index}'" #end if #end for #if $filter_extras: https://bitbucket.org/galaxy/galaxy-central/commits/c7d0b32602fb/ Changeset: c7d0b32602fb User: ross...@gmail.com Date: 2013-08-08 07:38:10 Summary: branch merge with central Affected #: 111 files diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/circster/config/circster.xml --- /dev/null +++ b/config/plugins/visualizations/circster/config/circster.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE visualization SYSTEM "../../visualization.dtd"> +<visualization name="circster"> + <data_sources> + <data_source> + <model_class>HistoryDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="hda">hda_ldda</to_param> + </data_source> + <data_source> + <model_class>LibraryDatasetDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="ldda">hda_ldda</to_param> + </data_source> + </data_sources> + <params> + <param type="visualization">id</param> + <param type="hda_or_ldda">dataset_id</param> + <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier> + <param type="dbkey">dbkey</param> + </params> + <!-- template_root and template are currently ignored for the 'built-in' visualizations --> + <template_root>webapps/galaxy/visualization</template_root> + <template>circster.mako</template> + <render_location>_top</render_location> +</visualization> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/phyloviz/config/phyloviz.xml --- /dev/null +++ b/config/plugins/visualizations/phyloviz/config/phyloviz.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE visualization SYSTEM "../../visualization.dtd"> +<visualization name="phyloviz"> + <data_sources> + <data_source> + <model_class>HistoryDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Newick</test> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Nexus</test> + <to_param param_attr="id">dataset_id</to_param> + </data_source> + </data_sources> + <params> + <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param> + <param type="integer" default="0">tree_index</param> + </params> + <!-- template_root and template are currently ignored for the 'built-in' visualizations --> + <template_root>webapps/galaxy/visualization</template_root> + <template>phyloviz.mako</template> + <render_location>_top</render_location> +</visualization> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/Gruntfile.js --- /dev/null +++ b/config/plugins/visualizations/scatterplot/Gruntfile.js @@ -0,0 +1,56 @@ +// NOTE: use 'sudo npm install .', then 'grunt' to use this file + +module.exports = function(grunt) { + + grunt.initConfig({ + pkg: grunt.file.readJSON( 'package.json' ), + + handlebars: { + compile: { + options: { + namespace: 'Templates', + processName : function( filepath ){ + return filepath.match( /\w*\.handlebars/ )[0].replace( '.handlebars', '' ); + } + }, + files: { + "build/compiled-templates.js" : "src/handlebars/*.handlebars" + } + } + }, + + concat: { + options: { + separator: ';\n' + }, + dist: { + //NOTE: mvc references templates - templates must be cat'd first + src : [ 'build/compiled-templates.js', 'src/**/*.js' ], + dest: 'build/scatterplot-concat.js' + } + }, + + uglify: { + options: { + }, + dist: { + src : 'build/scatterplot-concat.js', + // uglify directly into static dir + dest: 'static/scatterplot.js' + } + }, + + watch: { + files: [ 'src/**.js', 'src/handlebars/*.handlebars' ], + tasks: [ 'default' ] + } + }); + + grunt.loadNpmTasks( 'grunt-contrib-handlebars' ); + grunt.loadNpmTasks( 'grunt-contrib-concat' ); + grunt.loadNpmTasks( 'grunt-contrib-uglify' ); + grunt.loadNpmTasks( 'grunt-contrib-watch' ); + + grunt.registerTask( 'default', [ 'handlebars', 'concat', 'uglify' ]); + grunt.registerTask( 'watch', [ 'handlebars', 'concat', 'uglify', 'watch' ]); +}; diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/config/scatterplot.xml --- /dev/null +++ b/config/plugins/visualizations/scatterplot/config/scatterplot.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE visualization SYSTEM "../../visualization.dtd"> +<visualization name="scatterplot"> + <data_sources> + <data_source> + <model_class>HistoryDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">tabular.Tabular</test> + <to_param param_attr="id">dataset_id</to_param> + </data_source> + </data_sources> + <params> + <param type="dataset" var_name_in_template="hda" required="true">dataset_id</param> + </params> + <template>scatterplot/templates/scatterplot.mako</template> +</visualization> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/package.json --- /dev/null +++ b/config/plugins/visualizations/scatterplot/package.json @@ -0,0 +1,24 @@ +{ + "name": "galaxy-scatterplot", + "version": "0.0.0", + "description": "Scatterplot visualization plugin for the Galaxy informatics framework", + "main": " ", + "scripts": { + "test": "test" + }, + "keywords": [ + "galaxy", + "visualization", + "d3" + ], + "author": "Carl Eberhard", + "license": "BSD", + "devDependencies": { + "grunt": "~0.4.1", + "grunt-cli": "~0.1.9", + "grunt-contrib-handlebars": "~0.5.10", + "grunt-contrib-concat": "~0.3.0", + "grunt-contrib-uglify": "~0.2.2", + "grunt-contrib-watch": "~0.5.1" + } +} diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/handlebars/chartControl.handlebars --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/handlebars/chartControl.handlebars @@ -0,0 +1,56 @@ +<p class="help-text"> + Use the following controls to how the chart is displayed. + The slide controls can be moved by the mouse or, if the 'handle' is in focus, your keyboard's arrow keys. + Move the focus between controls by using the tab or shift+tab keys on your keyboard. + Use the 'Draw' button to render (or re-render) the chart with the current settings. + </p> + + <div id="datapointSize" class="form-input numeric-slider-input"> + <label for="datapointSize">Size of data point: </label> + <div class="slider-output">{{datapointSize}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + Size of the graphic representation of each data point + </p> + </div> + + <div id="animDuration" class="form-input checkbox-input"> + <label for="animate-chart">Animate chart transitions?: </label> + <input type="checkbox" id="animate-chart" + class="checkbox control"{{#if animDuration}} checked="true"{{/if}} /> + <p class="form-help help-text-small"> + Uncheck this to disable the animations used on the chart + </p> + </div> + + <div id="width" class="form-input numeric-slider-input"> + <label for="width">Chart width: </label> + <div class="slider-output">{{width}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + (not including chart margins and axes) + </p> + </div> + + <div id="height" class="form-input numeric-slider-input"> + <label for="height">Chart height: </label> + <div class="slider-output">{{height}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + (not including chart margins and axes) + </p> + </div> + + <div id="X-axis-label"class="text-input form-input"> + <label for="X-axis-label">Re-label the X axis: </label> + <input type="text" name="X-axis-label" id="X-axis-label" value="{{xLabel}}" /> + <p class="form-help help-text-small"></p> + </div> + + <div id="Y-axis-label" class="text-input form-input"> + <label for="Y-axis-label">Re-label the Y axis: </label> + <input type="text" name="Y-axis-label" id="Y-axis-label" value="{{yLabel}}" /> + <p class="form-help help-text-small"></p> + </div> + + <input id="render-button" type="button" value="Draw" /> \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/handlebars/chartDisplay.handlebars --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/handlebars/chartDisplay.handlebars @@ -0,0 +1,1 @@ +<svg width="{{width}}" height="{{height}}"></svg> \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/handlebars/dataControl.handlebars --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/handlebars/dataControl.handlebars @@ -0,0 +1,56 @@ +<p class="help-text"> + Use the following controls to change the data used by the chart. + Use the 'Draw' button to render (or re-render) the chart with the current settings. + </p> + + {{! column selector containers }} + <div class="column-select"> + <label for="X-select">Data column for X: </label> + <select name="X" id="X-select"> + {{#each numericColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + <div class="column-select"> + <label for="Y-select">Data column for Y: </label> + <select name="Y" id="Y-select"> + {{#each numericColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + + {{! optional id column }} + <div id="include-id"> + <label for="include-id-checkbox">Include a third column as data point IDs?</label> + <input type="checkbox" name="include-id" id="include-id-checkbox" /> + <p class="help-text-small"> + These will be displayed (along with the x and y values) when you hover over + a data point. + </p> + </div> + <div class="column-select" style="display: none"> + <label for="ID-select">Data column for IDs: </label> + <select name="ID" id="ID-select"> + {{#each allColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + + {{! if we're using generic column selection names ('column 1') - allow the user to use the first line }} + <div id="first-line-header" style="display: none;"> + <p>Possible headers: {{ possibleHeaders }} + </p> + <label for="first-line-header-checkbox">Use the above as column headers?</label> + <input type="checkbox" name="include-id" id="first-line-header-checkbox" + {{#if usePossibleHeaders }}checked="true"{{/if}}/> + <p class="help-text-small"> + It looks like Galaxy couldn't get proper column headers for this data. + Would you like to use the column headers above as column names to select columns? + </p> + </div> + + <input id="render-button" type="button" value="Draw" /> + <div class="clear"></div> \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/handlebars/scatterplotControlForm.handlebars --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/handlebars/scatterplotControlForm.handlebars @@ -0,0 +1,46 @@ +{{! main layout }} + +<div class="scatterplot-container chart-container tabbable tabs-left"> + {{! tab buttons/headers using Bootstrap }} + <ul class="nav nav-tabs"> + {{! start with the data controls as the displayed tab }} + <li class="active"><a href="#data-control" data-toggle="tab" class="tooltip" + title="Use this tab to change which data are used">Data Controls</a></li> + <li><a href="#chart-control" data-toggle="tab" class="tooltip" + title="Use this tab to change how the chart is drawn">Chart Controls</a></li> + <li><a href="#stats-display" data-toggle="tab" class="tooltip" + title="This tab will display overall statistics for your data">Statistics</a></li> + <li><a href="#chart-display" data-toggle="tab" class="tooltip" + title="This tab will display the chart">Chart</a> + {{! loading indicator - initially hidden }} + <div id="loading-indicator" style="display: none;"> + <img class="loading-img" src="{{loadingIndicatorImagePath}}" /> + <span class="loading-message">{{message}}</span> + </div> + </li> + </ul> + + {{! data form, chart config form, stats, and chart all get their own tab }} + <div class="tab-content"> + {{! ---------------------------- tab for data settings form }} + <div id="data-control" class="tab-pane active"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for chart graphics control form }} + <div id="chart-control" class="tab-pane"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for data statistics }} + <div id="stats-display" class="tab-pane"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for actual chart }} + <div id="chart-display" class="tab-pane"> + {{! chart rendered separately }} + </div> + + </div>{{! end .tab-content }} +</div>{{! end .chart-control }} \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/handlebars/statsDisplay.handlebars --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/handlebars/statsDisplay.handlebars @@ -0,0 +1,8 @@ +<p class="help-text">By column:</p> + <table id="chart-stats-table"> + <thead><th></th><th>X</th><th>Y</th></thead> + {{#each stats}} + <tr><td>{{name}}</td><td>{{xval}}</td><td>{{yval}}</td></tr> + </tr> + {{/each}} + </table> \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/scatterplot.js --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/scatterplot.js @@ -0,0 +1,488 @@ +/* ============================================================================= +todo: + outside this: + BUG: setting width, height in plot controls doesn't re-interpolate data locations!! + BUG?: get metadata_column_names (from datatype if necessary) + BUG: single vis in popupmenu should have tooltip with that name NOT 'Visualizations' + + wire label setters, anim setter + + TwoVarScatterplot: + ??: maybe better to do this with a canvas... + save as visualization + to seperate file? + remove underscore dependencies + add interface to change values (seperate)? + download svg -> base64 encode + incorporate glyphs, glyph state renderers + + ScatterplotSettingsForm: + some css bug that lowers the width of settings form when plot-controls tab is open + causes chart to shift + what can be abstracted/reused for other graphs? + avoid direct manipulation of this.plot + allow option to put plot into seperate tab of interface (for small multiples) + + provide callback in view to load data incrementally - for large sets + paginate + handle rerender + use endpoint (here and on the server (fileptr)) + fetch (new?) data + handle rerender + use d3.TSV? + render warning on long data (> maxDataPoints) + adjust endpoint + + selectable list of preset column comparisons (rnaseq etc.) + how to know what sort of Tabular the data is? + smarter about headers + validate columns selection (here or server) + + set stats column names by selected columns + move chart into tabbed area... + + Scatterplot.mako: + multiple plots on one page (small multiples) + ?? ensure svg styles thru d3 or css? + d3: configable (easily) + css: standard - better maintenance + ? override at config + +============================================================================= */ +/** + * Two Variable scatterplot visualization using d3 + * Uses semi transparent circles to show density of data in x, y grid + * usage : + * var plot = new TwoVarScatterplot({ containerSelector : 'div#my-plot', ... }) + * plot.render( xColumnData, yColumnData ); + * + * depends on: d3, underscore + */ +function TwoVarScatterplot( config ){ + var TICK_LINE_AND_PADDING = 10, + GUESS_AT_SVG_CHAR_WIDTH = 7, + GUESS_AT_SVG_CHAR_HEIGHT = 10, + PADDING = 8, + X_LABEL_TOO_LONG_AT = 5; + + // set up logging + //this.debugging = true; + this.log = function(){ + if( this.debugging && console && console.debug ){ + var args = Array.prototype.slice.call( arguments ); + args.unshift( this.toString() ); + console.debug.apply( console, args ); + } + }; + this.log( 'new TwoVarScatterplot:', config ); + + // ........................................................ set up chart config + // config will default to these values when not passed in + //NOTE: called on new + this.defaults = { + id : 'TwoVarScatterplot', + containerSelector : 'body', + //TODO??: needed? + maxDataPoints : 30000, + datapointSize : 4, + animDuration : 500, + //TODO: variable effect (not always exactly # of ticks set to) + xNumTicks : 10, + yNumTicks : 10, + xAxisLabelBumpY : 40, + yAxisLabelBumpX : -40, + width : 400, + height : 400, + //TODO: anyway to make this a sub-obj? + marginTop : 50, + marginRight : 50, + marginBottom : 50, + marginLeft : 50, + + xMin : null, + xMax : null, + yMin : null, + yMax : null, + + xLabel : "X", + yLabel : "Y" + }; + this.config = _.extend( {}, this.defaults, config ); + this.log( 'intial config:', this.config ); + + this.updateConfig = function( newConfig, rerender ){ + // setter for chart config + //TODO: validate here + _.extend( this.config, newConfig ); + this.log( this + '.updateConfig:', this.config ); + //TODO: implement rerender flag + }; + + // ........................................................ helpers + this.toString = function(){ + return this.config.id; + }; + // conv. methods for svg transforms + this.translateStr = function( x, y ){ + return 'translate(' + x + ',' + y + ')'; + }; + this.rotateStr = function( d, x, y ){ + return 'rotate(' + d + ',' + x + ',' + y + ')'; + }; + + // ........................................................ initial element creation + this.adjustChartDimensions = function( top, right, bottom, left ){ + //this.log( this + '.adjustChartDimensions', arguments ); + top = top || 0; + right = right || 0; + bottom = bottom || 0; + left = left || 0; + this.svg + .attr( "width", this.config.width + ( this.config.marginRight + right ) + + ( this.config.marginLeft + left ) ) + .attr( "height", this.config.height + ( this.config.marginTop + top ) + + ( this.config.marginBottom + bottom ) ) + // initial is hidden - show it + .style( 'display', 'block' ); + + // move content group away from margins + //TODO: allow top, right axis + this.content = this.svg.select( "g.content" ) + .attr( "transform", this.translateStr( this.config.marginLeft + left, this.config.marginTop + top ) ); + }; + + // ........................................................ data and scales + this.preprocessData = function( data, min, max ){ + //this.log( this + '.preprocessData', arguments ); + //TODO: filter by min, max if set + + // set a cap on the data, limit to first n points + return ( data.length > this.config.maxDataPoints )? ( data.slice( 0, this.config.maxDataPoints ) ): ( data ); + }; + + this.findMinMaxes = function( xCol, yCol, meta ){ + //this.log( this + '.findMinMaxes', arguments ); + // configuration takes priority, otherwise meta (from the server) if passed, last-resort: compute it here + this.xMin = this.config.xMin || ( meta )?( meta[0].min ):( d3.min( xCol ) ); + this.xMax = this.config.xMax || ( meta )?( meta[0].max ):( d3.max( xCol ) ); + this.yMin = this.config.yMin || ( meta )?( meta[1].min ):( d3.min( yCol ) ); + this.yMax = this.config.yMax || ( meta )?( meta[1].max ):( d3.max( yCol ) ); + }; + + this.setUpScales = function(){ + //this.log( this + '.setUpScales', arguments ); + // Interpolation for x, y based on data domains + this.xScale = d3.scale.linear() + .domain([ this.xMin, this.xMax ]) + .range([ 0, this.config.width ]), + this.yScale = d3.scale.linear() + .domain([ this.yMin, this.yMax ]) + .range([ this.config.height, 0 ]); + }; + + // ........................................................ axis and ticks + this.setUpXAxis = function(){ + //this.log( this + '.setUpXAxis', arguments ); + // origin: bottom, left + //TODO: incoporate top, right + this.xAxisFn = d3.svg.axis() + .scale( this.xScale ) + .ticks( this.config.xNumTicks ) + .orient( 'bottom' ); + this.xAxis// = content.select( 'g#x-axis' ) + .attr( 'transform', this.translateStr( 0, this.config.height ) ) + .call( this.xAxisFn ); + //this.log( 'xAxis:', this.xAxis ); + + //TODO: adjust ticks when tick labels are long - move odds down and extend tick line + // (for now) hide them + var xLongestTickLabel = d3.max( _.map( [ this.xMin, this.xMax ], + function( number ){ return ( String( number ) ).length; } ) ); + //this.log( 'xLongestTickLabel:', xLongestTickLabel ); + if( xLongestTickLabel >= X_LABEL_TOO_LONG_AT ){ + this.xAxis.selectAll( 'g' ).filter( ':nth-child(odd)' ).style( 'display', 'none' ); + } + + this.log( 'this.config.xLabel:', this.config.xLabel ); + this.xAxisLabel// = xAxis.select( 'text#x-axis-label' ) + .attr( 'x', this.config.width / 2 ) + .attr( 'y', this.config.xAxisLabelBumpY ) + .attr( 'text-anchor', 'middle' ) + .text( this.config.xLabel ); + this.log( 'xAxisLabel:', this.xAxisLabel ); + }; + + this.setUpYAxis = function(){ + //this.log( this + '.setUpYAxis', arguments ); + this.yAxisFn = d3.svg.axis() + .scale( this.yScale ) + .ticks( this.config.yNumTicks ) + .orient( 'left' ); + this.yAxis// = content.select( 'g#y-axis' ) + .call( this.yAxisFn ); + //this.log( 'yAxis:', this.yAxis ); + + // a too complicated section for increasing the left margin when tick labels are long + // get the tick labels for the y axis + var yTickLabels = this.yAxis.selectAll( 'text' ).filter( function( e, i ){ return i !== 0; } ); + this.log( 'yTickLabels:', yTickLabels ); + + // get the longest label length (or 0 if no labels) + this.yLongestLabel = d3.max( + //NOTE: d3 returns an nested array - use the plain array inside ([0]) + yTickLabels[0].map( function( e, i ){ + return ( d3.select( e ).text() ).length; + }) + ) || 0; + //this.log( 'yLongestLabel:', this.yLongestLabel ); + //TODO: lose the guessing if possible + var neededY = TICK_LINE_AND_PADDING + ( this.yLongestLabel * GUESS_AT_SVG_CHAR_WIDTH ) + + PADDING + GUESS_AT_SVG_CHAR_HEIGHT; + //this.log( 'neededY:', neededY ); + + // increase width for yLongerStr, increase margin for y + //TODO??: (or transform each number: 2k) + this.config.yAxisLabelBumpX = -( neededY - GUESS_AT_SVG_CHAR_HEIGHT ); + if( this.config.marginLeft < neededY ){ + var adjusting = ( neededY ) - this.config.marginLeft; + adjusting = ( adjusting < 0 )?( 0 ):( adjusting ); + //this.log( 'adjusting:', adjusting ); + + // update dimensions, translations + this.adjustChartDimensions( 0, 0, 0, adjusting ); + } + //this.log( 'this.config.yAxisLableBumpx, this.config.marginLeft:', + // this.config.yAxisLabelBumpX, this.config.marginLeft ); + + this.yAxisLabel// = yAxis.select( 'text#y-axis-label' ) + .attr( 'x', this.config.yAxisLabelBumpX ) + .attr( 'y', this.config.height / 2 ) + .attr( 'text-anchor', 'middle' ) + .attr( 'transform', this.rotateStr( -90, this.config.yAxisLabelBumpX, this.config.height / 2 ) ) + .text( this.config.yLabel ); + //this.log( 'yAxisLabel:', this.yAxisLabel ); + }; + + // ........................................................ grid lines + this.renderGrid = function(){ + //this.log( this + '.renderGrid', arguments ); + // VERTICAL + // select existing + this.vGridLines = this.content.selectAll( 'line.v-grid-line' ) + .data( this.xScale.ticks( this.xAxisFn.ticks()[0] ) ); + + // append any extra lines needed (more ticks) + this.vGridLines.enter().append( 'svg:line' ) + .classed( 'grid-line v-grid-line', true ); + + // update the attributes of existing and appended + this.vGridLines + .attr( 'x1', this.xScale ) + .attr( 'y1', 0 ) + .attr( 'x2', this.xScale ) + .attr( 'y2', this.config.height ); + + // remove unneeded (less ticks) + this.vGridLines.exit().remove(); + //this.log( 'vGridLines:', this.vGridLines ); + + // HORIZONTAL + this.hGridLines = this.content.selectAll( 'line.h-grid-line' ) + .data( this.yScale.ticks( this.yAxisFn.ticks()[0] ) ); + + this.hGridLines.enter().append( 'svg:line' ) + .classed( 'grid-line h-grid-line', true ); + + this.hGridLines + .attr( 'x1', 0 ) + .attr( 'y1', this.yScale ) + .attr( 'x2', this.config.width ) + .attr( 'y2', this.yScale ); + + this.hGridLines.exit().remove(); + //this.log( 'hGridLines:', this.hGridLines ); + }; + + // ........................................................ data points + this.renderDatapoints = function( xCol, yCol, ids ){ + this.log( this + '.renderDatapoints', arguments ); + var count = 0, + plot = this, + xPosFn = function( d, i ){ + //if( d ){ this.log( 'x.data:', newXCol[ i ], 'plotted:', plot.xScale( newXCol[ i ] ) ); } + return plot.xScale( xCol[ i ] ); + }, + yPosFn = function( d, i ){ + //if( d ){ this.log( 'y.data:', newYCol[ i ], 'plotted:', plot.yScale( newYCol[ i ] ) ); } + return plot.yScale( yCol[ i ] ); + }; + + //this.datapoints = this.addDatapoints( xCol, yCol, ids, ".glyph" ); + var datapoints = this.content.selectAll( '.glyph' ).data( xCol ); + + // enter - NEW data to be added as glyphs: give them a 'entry' position and style + count = 0; + datapoints.enter() + .append( 'svg:circle' ) + .each( function(){ count += 1; } ) + .classed( "glyph", true ) + .attr( "cx", 0 ) + .attr( "cy", this.config.height ) + // start all bubbles small... + .attr( "r", 0 ); + this.log( count, ' new glyphs created' ); + + // for all EXISTING glyphs and those that need to be added: transition anim to final state + count = 0; + datapoints + // ...animate to final position + .transition().duration( this.config.animDuration ) + .each( function(){ count += 1; } ) + .attr( "cx", xPosFn ) + .attr( "cy", yPosFn ) + .attr( "r", plot.config.datapointSize ); + this.log( count, ' existing glyphs transitioned' ); + + // events + // glyphs that need to be removed: transition to from normal state to 'exit' state, remove from DOM + datapoints.exit() + .each( function(){ count += 1; } ) + .transition().duration( this.config.animDuration ) + .attr( "cy", this.config.height ) + .attr( "r", 0 ) + .remove(); + this.log( count, ' glyphs removed' ); + + this._addDatapointEventhandlers( datapoints, xCol, yCol, ids ); + }; + + this._addDatapointEventhandlers = function( datapoints, xCol, yCol, ids ){ + var plot = this; + datapoints + //TODO: remove magic numbers + .on( 'mouseover', function( d, i ){ + var datapoint = d3.select( this ); + datapoint + .style( 'fill', 'red' ) + .style( 'fill-opacity', 1 ); + + // create horiz, vert lines to axis + plot.content.append( 'line' ) + .attr( 'stroke', 'red' ) + .attr( 'stroke-width', 1 ) + // start not at center, but at the edge of the circle - to prevent mouseover thrashing + .attr( 'x1', datapoint.attr( 'cx' ) - plot.config.datapointSize ) + .attr( 'y1', datapoint.attr( 'cy' ) ) + .attr( 'x2', 0 ) + .attr( 'y2', datapoint.attr( 'cy' ) ) + .classed( 'hoverline', true ); + + // if the vertical hoverline + if( datapoint.attr( 'cy' ) < plot.config.height ){ + plot.content.append( 'line' ) + .attr( 'stroke', 'red' ) + .attr( 'stroke-width', 1 ) + .attr( 'x1', datapoint.attr( 'cx' ) ) + .attr( 'y1', datapoint.attr( 'cy' ) + plot.config.datapointSize ) + .attr( 'x2', datapoint.attr( 'cx' ) ) + .attr( 'y2', plot.config.height ) + .classed( 'hoverline', true ); + } + + var datapointWindowPos = $( this ).offset(); + plot.datapointInfoBox = plot.infoBox( + datapointWindowPos.top, datapointWindowPos.left, + plot.infoHtml( xCol[ i ], yCol[ i ], ( ids )?( ids[ i ] ):( undefined ) ) + ); + $( 'body' ).append( plot.datapointInfoBox ); + }) + .on( 'mouseout', function(){ + d3.select( this ) + .style( 'fill', 'black' ) + .style( 'fill-opacity', 0.2 ); + plot.content.selectAll( '.hoverline' ).remove(); + if( plot.datapointInfoBox ){ + plot.datapointInfoBox.remove(); + } + }); + }, + + this.render = function( columnData, meta ){ + this.log( this + '.render', arguments ); + this.log( '\t config:', this.config ); + + // prepare the data + //pre: columns passed are numeric + //pre: at least two columns are passed + //assume: first column is x, second column is y, any remaining aren't used + var xCol = columnData[0], + yCol = columnData[1], + ids = ( columnData.length > 2 )?( columnData[2] ):( undefined ); + //this.log( this + '.render', xCol.length, yCol.length, this.config ); + + //pre: xCol.len == yCol.len + xCol = this.preprocessData( xCol ); + yCol = this.preprocessData( yCol ); + this.log( 'xCol len', xCol.length, 'yCol len', yCol.length ); + + this.findMinMaxes( xCol, yCol, meta ); + //this.log( 'xMin, xMax, yMin, yMax:', this.xMin, this.xMax, this.yMin, this.yMax ); + this.setUpScales(); + + // find (or build if it doesn't exist) the svg dom infrastructure + if( !this.svg ){ this.svg = d3.select( 'svg' ).attr( "class", "chart" ); } + if( !this.content ){ + this.content = this.svg.append( "svg:g" ).attr( "class", "content" ).attr( 'id', this.config.id ); + } + //this.log( 'svg:', this.svg ); + //this.log( 'content:', this.content ); + + this.adjustChartDimensions(); + + if( !this.xAxis ){ this.xAxis = this.content.append( 'g' ).attr( 'class', 'axis' ).attr( 'id', 'x-axis' ); } + if( !this.xAxisLabel ){ + this.xAxisLabel = this.xAxis.append( 'text' ).attr( 'class', 'axis-label' ).attr( 'id', 'x-axis-label' ); + } + //this.log( 'xAxis:', this.xAxis, 'xAxisLabel:', this.xAxisLabel ); + + if( !this.yAxis ){ this.yAxis = this.content.append( 'g' ).attr( 'class', 'axis' ).attr( 'id', 'y-axis' ); } + if( !this.yAxisLabel ){ + this.yAxisLabel = this.yAxis.append( 'text' ).attr( 'class', 'axis-label' ).attr( 'id', 'y-axis-label' ); + } + //this.log( 'yAxis:', this.yAxis, 'yAxisLabel:', this.yAxisLabel ); + + this.setUpXAxis(); + this.setUpYAxis(); + + this.renderGrid(); + this.renderDatapoints( xCol, yCol, ids ); + }; + + this.infoHtml = function( x, y, id ){ + var retDiv = $( '<div/>' ); + if( id ){ + $( '<div/>' ).text( id ).css( 'font-weight', 'bold' ).appendTo( retDiv ); + } + $( '<div/>' ).text( x ).appendTo( retDiv ); + $( '<div/>' ).text( y ).appendTo( retDiv ); + return retDiv.html(); + }; + + //TODO: html for now + this.infoBox = function( top, left, html, adjTop, adjLeft ){ + adjTop = adjTop || 0; + adjLeft = adjLeft || 20; + var infoBox = $( '<div />' ) + .addClass( 'chart-info-box' ) + .css({ + 'position' : 'absolute', + 'top' : top + adjTop, + 'left' : left + adjLeft + }); + infoBox.html( html ); + return infoBox; + }; + +} + +//============================================================================== diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/scatterplotControlForm.js --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/scatterplotControlForm.js @@ -0,0 +1,631 @@ +/* ============================================================================= +todo: + I'd like to move the svg creation out of the splot constr. to: + allow adding splots to an existing canvas + allow mult. splots sharing a canvas + + + outside this: + BUG: setting width, height in plot controls doesn't re-interpolate data locations!! + BUG?: get metadata_column_names (from datatype if necessary) + BUG: single vis in popupmenu should have tooltip with that name NOT 'Visualizations' + + wire label setters, anim setter + + TwoVarScatterplot: + ??: maybe better to do this with a canvas... + save as visualization + to seperate file? + remove underscore dependencies + add interface to change values (seperate)? + download svg -> base64 encode + incorporate glyphs, glyph state renderers + + ScatterplotSettingsForm: + some css bug that lowers the width of settings form when plot-controls tab is open + causes chart to shift + what can be abstracted/reused for other graphs? + avoid direct manipulation of this.plot + allow option to put plot into seperate tab of interface (for small multiples) + + provide callback in view to load data incrementally - for large sets + paginate + handle rerender + use endpoint (here and on the server (fileptr)) + fetch (new?) data + handle rerender + use d3.TSV? + render warning on long data (> maxDataPoints) + adjust endpoint + + selectable list of preset column comparisons (rnaseq etc.) + how to know what sort of Tabular the data is? + smarter about headers + validate columns selection (here or server) + + set stats column names by selected columns + move chart into tabbed area... + + Scatterplot.mako: + multiple plots on one page (small multiples) + ?? ensure svg styles thru d3 or css? + d3: configable (easily) + css: standard - better maintenance + ? override at config + +============================================================================= */ +/** + * Scatterplot control UI as a backbone view + * handles: + * getting the desired data + * configuring the plot display + * showing (general) statistics + * + * initialize attributes REQUIRES a dataset and an apiDatasetsURL + */ +var ScatterplotControlForm = BaseView.extend( LoggableMixin ).extend({ + //logger : console, + className : 'scatterplot-control-form', + + //NOTE: should include time needed to render + dataLoadDelay : 4000, + dataLoadSize : 5000, + + loadingIndicatorImage : 'loading_small_white_bg.gif', + fetchMsg : 'Fetching data...', + renderMsg : 'Rendering...', + + initialize : function( attributes ){ + this.log( this + '.initialize, attributes:', attributes ); + + this.dataset = null; + this.chartConfig = null; + this.chart = null; + this.loader = null; + + // set up refs to the four tab areas + this.$dataControl = null; + this.$chartControl = null; + this.$statsDisplay = null; + this.$chartDisplay = null; + + this.dataFetch = null; + + this.initializeFromAttributes( attributes ); + this.initializeChart( attributes ); + this.initializeDataLoader( attributes ); + }, + + initializeFromAttributes : function( attributes ){ + // required settings: ensure certain vars we need are passed in attributes + if( !attributes || !attributes.dataset ){ + throw( "ScatterplotView requires a dataset" ); + } else { + this.dataset = attributes.dataset; + } + if( jQuery.type( this.dataset.metadata_column_types ) === 'string' ){ + this.dataset.metadata_column_types = this.dataset.metadata_column_types.split( ', ' ); + } + this.log( '\t dataset:', this.dataset ); + + // attempt to get possible headers from the data's first line + if( this.dataset.comment_lines && this.dataset.comment_lines.length ){ + //TODO:?? + var firstLine = this.dataset.comment_lines[0], + possibleHeaders = firstLine.split( '\t' ); + if( possibleHeaders.length === this.dataset.metadata_column_types.length ){ + this.possibleHeaders = possibleHeaders; + } + } + + // passed from mako helper + //TODO: integrate to galaxyPaths + //TODO: ?? seems like data loader section would be better + if( !attributes.apiDatasetsURL ){ + throw( "ScatterplotView requires a apiDatasetsURL" ); + } else { + this.dataURL = attributes.apiDatasetsURL + '/' + this.dataset.id + '?'; + } + this.log( '\t dataURL:', this.dataURL ); + }, + + initializeChart : function( attributes ){ + // set up the basic chart infrastructure and config (if any) + this.chartConfig = attributes.chartConfig || {}; + //if( this.logger ){ this.chartConfig.debugging = true; } + this.log( '\t initial chartConfig:', this.chartConfig ); + + this.chart = new TwoVarScatterplot( this.chartConfig ); + //TODO: remove 2nd ref, use this.chart.config + this.chartConfig = this.chart.config; + }, + + initializeDataLoader : function( attributes ){ + // set up data loader + var view = this; + this.loader = new LazyDataLoader({ + //logger : ( this.logger )?( this.logger ):( null ), + // we'll generate this when columns are chosen + url : null, + start : attributes.start || 0, + //NOTE: metadata_data_lines can be null (so we won't know the total) + total : attributes.total || this.dataset.metadata_data_lines, + delay : this.dataLoadDelay, + size : this.dataLoadSize, + + buildUrl : function( start, size ){ + // currently VERY SPECIFIC to using data_providers.py start_val, max_vals params + return this.url + '&' + jQuery.param({ + start_val: start, + max_vals: size + }); + } + }); + $( this.loader ).bind( 'error', function( event, status, error ){ + view.log( 'ERROR:', status, error ); + alert( 'ERROR fetching data:\n' + status + '\n' + error ); + view.hideLoadingIndicator(); + }); + }, + + // ------------------------------------------------------------------------- CONTROLS RENDERING + render : function(){ + this.log( this + '.render' ); + + // render the tab controls, areas and loading indicator + this.$el.append( ScatterplotControlForm.templates.mainLayout({ + loadingIndicatorImagePath : '/static/images/' + this.loadingIndicatorImage, + message : '' + })); + + // render the tab content + this.$dataControl = this._render_dataControl(); + this.$chartControl = this._render_chartControl(); + this.$statsDisplay = this.$el.find( '.tab-pane#stats-display' ); + this.$chartDisplay = this._render_chartDisplay(); + + // auto render if given both x, y column choices in query for page + //TODO:?? add autoRender=1 to query maybe? + if( this.chartConfig.xColumn && this.chartConfig.yColumn ){ + this.renderChart(); + } + + // set up behaviours + this.$el.find( '.tooltip' ).tooltip(); + + // uncomment any of the following to have that tab show on initial load (for testing) + //this.$el.find( 'ul.nav' ).find( 'a[href="#data-control"]' ).tab( 'show' ); + //this.$el.find( 'ul.nav' ).find( 'a[href="#chart-control"]' ).tab( 'show' ); + //this.$el.find( 'ul.nav' ).find( 'a[href="#stats-display"]' ).tab( 'show' ); + //this.$el.find( 'ul.nav' ).find( 'a[href="#chart-display"]' ).tab( 'show' ); + return this; + }, + + _render_dataControl : function(){ + // controls for which columns are used to plot datapoints (and ids/additional info to attach if desired) + var view = this, + allColumns = [], + numericColumns = [], + usePossibleHeaders = ( this.possibleHeaders && this.$dataControl )? + ( this.$dataControl.find( '#first-line-header-checkbox' ).is( ':checked' ) ):( false ); + + // gather column indeces (from metadata_column_types) and names (from metadata_columnnames) + _.each( this.dataset.metadata_column_types, function( type, index ){ + // use a 1 based index in names/values within the form (will be dec. when parsed out) + var oneBasedIndex = index + 1, + // default name is 'column <index>'... + name = 'column ' + oneBasedIndex; + + // ...but label with the name if available... + if( view.dataset.metadata_column_names ){ + name = view.dataset.metadata_column_names[ index ]; + + // ...or, use the first line as headers if the user wants + } else if( usePossibleHeaders ){ + name = view.possibleHeaders[ index ]; + } + + // cache all columns here + allColumns.push({ index: oneBasedIndex, name: name }); + + // filter numeric columns to their own list + if( type === 'int' || type === 'float' ){ + numericColumns.push({ index: oneBasedIndex, name: name }); + } + }); + //TODO: other vals: max_vals, start_val, pagination (chart-settings) + + // render the html + var $dataControl = this.$el.find( '.tab-pane#data-control' ); + $dataControl.html( ScatterplotControlForm.templates.dataControl({ + allColumns : allColumns, + numericColumns : numericColumns, + possibleHeaders : ( this.possibleHeaders )?( this.possibleHeaders.join( ', ' ) ):( '' ), + usePossibleHeaders : usePossibleHeaders + })); + + if( !this.dataset.metadata_column_names && this.possibleHeaders ){ + $dataControl.find( '#first-line-header' ).show(); + } + + // preset to column selectors if they were passed in the config in the query string + $dataControl.find( '#X-select' ).val( this.chartConfig.xColumn ); + $dataControl.find( '#Y-select' ).val( this.chartConfig.yColumn ); + if( this.chartConfig.idColumn !== undefined ){ + $dataControl.find( '#include-id-checkbox' ) + .attr( 'checked', true ).trigger( 'change' ); + $dataControl.find( '#ID-select' ).val( this.chartConfig.idColumn ); + } + + return $dataControl; + }, + + _render_chartControl : function(){ + // tab content to control how the chart is rendered (data glyph size, chart size, etc.) + var view = this, + $chartControl = this.$el.find( '.tab-pane#chart-control' ), + // limits for controls (by control/chartConfig id) + //TODO: move into TwoVarScatterplot + controlRanges = { + 'datapointSize' : { min: 2, max: 10, step: 1 }, + 'width' : { min: 200, max: 800, step: 20 }, + 'height' : { min: 200, max: 800, step: 20 } + }; + + // render the html + $chartControl.append( ScatterplotControlForm.templates.chartControl( this.chartConfig ) ); + + // set up behaviours, js on sliders + $chartControl.find( '.numeric-slider-input' ).each( function(){ + var $this = $( this ), + $output = $this.find( '.slider-output' ), + $slider = $this.find( '.slider' ), + id = $this.attr( 'id' ); + //chartControl.log( 'slider set up', 'this:', $this, 'slider:', $slider, 'id', id ); + + // what to do when the slider changes: update display and update chartConfig + //TODO: move out of loop + function onSliderChange(){ + var $this = $( this ), + newValue = $this.slider( 'value' ); + //chartControl.log( 'slider change', 'this:', $this, 'output:', $output, 'value', newValue ); + $output.text( newValue ); + //chartControl.chartConfig[ id ] = newValue; + } + + $slider.slider( _.extend( controlRanges[ id ], { + value : view.chartConfig[ id ], + change : onSliderChange, + slide : onSliderChange + })); + }); + + return $chartControl; + }, + + _render_chartDisplay : function(){ + // render the tab content where the chart is displayed (but not the chart itself) + var $chartDisplay = this.$el.find( '.tab-pane#chart-display' ); + $chartDisplay.append( ScatterplotControlForm.templates.chartDisplay( this.chartConfig ) ); + return $chartDisplay; + }, + + // ------------------------------------------------------------------------- EVENTS + events : { + 'change #include-id-checkbox' : 'toggleThirdColumnSelector', + 'change #first-line-header-checkbox' : 'rerenderDataControl', + 'click #data-control #render-button' : 'renderChart', + 'click #chart-control #render-button' : 'changeChartSettings' + }, + + toggleThirdColumnSelector : function(){ + // show/hide the id selector on the data settings panel + this.$el.find( 'select[name="ID"]' ).parent().toggle(); + }, + + rerenderDataControl : function(){ + this.$dataControl = this._render_dataControl(); + }, + + showLoadingIndicator : function( message, callback ){ + // display the loading indicator over the tab panels if hidden, update message (if passed) + message = message || ''; + var indicator = this.$el.find( 'div#loading-indicator' ); + messageBox = indicator.find( '.loading-message' ); + + if( indicator.is( ':visible' ) ){ + if( message ){ + messageBox.fadeOut( 'fast', function(){ + messageBox.text( message ); + messageBox.fadeIn( 'fast', callback ); + }); + } else { + callback(); + } + + } else { + if( message ){ messageBox.text( message ); } + indicator.fadeIn( 'fast', callback ); + } + }, + + hideLoadingIndicator : function( callback ){ + this.$el.find( 'div#loading-indicator' ).fadeOut( 'fast', callback ); + }, + + // ------------------------------------------------------------------------- CHART/STATS RENDERING + renderChart : function(){ + // fetch the data, (re-)render the chart + this.log( this + '.renderChart' ); + + //TODO: separate data fetch + + // this is a complete re-render, so clear the prev. data + this.data = null; + this.meta = null; + + // update the chartConfig (here and chart) using chart settings + //TODO: separate and improve (used in changeChartSettings too) + _.extend( this.chartConfig, this.getChartSettings() ); + this.log( '\t chartConfig:', this.chartConfig ); + this.chart.updateConfig( this.chartConfig, false ); + + // build the url with the current data settings + this.loader.url = this.dataURL + '&' + jQuery.param( this.getDataSettings() ); + this.log( '\t loader: total lines:', this.loader.total, ' url:', this.loader.url ); + + // bind the new data event to: aggregate data, update the chart and stats with new data + var view = this; + $( this.loader ).bind( 'loaded.new', function( event, response ){ + view.log( view + ' loaded.new', response ); + + // aggregate data and meta + view.postProcessDataFetchResponse( response ); + view.log( '\t postprocessed data:', view.data ); + view.log( '\t postprocessed meta:', view.meta ); + + // update the chart and stats + view.showLoadingIndicator( view.renderMsg, function(){ + view.chart.render( view.data, view.meta ); + view.renderStats( view.data, view.meta ); + view.hideLoadingIndicator(); + }); + }); + // when all data loaded - unbind (or we'll start doubling event handlers) + $( this.loader ).bind( 'complete', function( event, data ){ + view.log( view + ' complete', data ); + $( view.loader ).unbind(); + }); + + // begin loading the data, switch to the chart display tab + view.showLoadingIndicator( view.fetchMsg, function(){ + view.$el.find( 'ul.nav' ).find( 'a[href="#chart-display"]' ).tab( 'show' ); + view.loader.load(); + }); + }, + + renderStats : function(){ + this.log( this + '.renderStats' ); + // render the stats table in the stats panel + //TODO: there's a better way + this.$statsDisplay.html( ScatterplotControlForm.templates.statsDisplay({ + stats: [ + { name: 'Count', xval: this.meta[0].count, yval: this.meta[1].count }, + { name: 'Min', xval: this.meta[0].min, yval: this.meta[1].min }, + { name: 'Max', xval: this.meta[0].max, yval: this.meta[1].max }, + { name: 'Sum', xval: this.meta[0].sum, yval: this.meta[1].sum }, + { name: 'Mean', xval: this.meta[0].mean, yval: this.meta[1].mean }, + { name: 'Median', xval: this.meta[0].median, yval: this.meta[1].median } + ] + })); + }, + + changeChartSettings : function(){ + // re-render the chart with new chart settings and OLD data + var view = this; + newChartSettings = this.getChartSettings(); + + // update the chart config from the chartSettings panel controls + _.extend( this.chartConfig, newChartSettings ); + this.log( 'this.chartConfig:', this.chartConfig ); + this.chart.updateConfig( this.chartConfig, false ); + + // if there's current data, call chart.render with it (no data fetch) + if( view.data && view.meta ){ + view.showLoadingIndicator( view.renderMsg, function(){ + view.$el.find( 'ul.nav' ).find( 'a[href="#chart-display"]' ).tab( 'show' ); + view.chart.render( view.data, view.meta ); + view.hideLoadingIndicator(); + }); + + // no current data, call renderChart instead (which will fetch data) + } else { + this.renderChart(); + } + }, + + // ------------------------------------------------------------------------- DATA AGGREGATION + postProcessDataFetchResponse : function( response ){ + // the loader only returns new data - it's up to this to munge the fetches together properly + //TODO: we're now storing data in two places: loader and here + // can't we reduce incoming data into loader.data[0]? are there concurrency problems? + this.postProcessData( response.data ); + this.postProcessMeta( response.meta ); + }, + + postProcessData : function( newData ){ + // stack the column data on top of each other into this.data + //this.log( this + '.postProcessData:', newData ); + var view = this; + + // if we already have data: aggregate + if( view.data ){ + _.each( newData, function( newColData, colIndex ){ + //view.log( colIndex + ' data:', newColData ); + //TODO??: time, space efficiency of this? + view.data[ colIndex ] = view.data[ colIndex ].concat( newColData ); + }); + + // otherwise: assign (first load) + } else { + view.data = newData; + } + }, + + postProcessMeta : function( newMeta ){ + // munge the meta data (stats) from the server fetches together + //pre: this.data must be preprocessed (needed for medians) + //this.log( this + '.postProcessMeta:', newMeta ); + var view = this, + colTypes = this.dataset.metadata_column_types; + + // if we already have meta: aggregate + if( view.meta ){ + _.each( newMeta, function( newColMeta, colIndex ){ + var colMeta = view.meta[ colIndex ], + colType = colTypes[ colIndex ]; + //view.log( '\t ' + colIndex + ' postprocessing meta:', newColMeta ); + //view.log( colIndex + ' old meta:', + // 'min:', colMeta.min, + // 'max:', colMeta.max, + // 'sum:', colMeta.sum, + // 'mean:', colMeta.mean, + // 'median:', colMeta.median + //); + + //!TODO: at what point are we getting int/float overflow on these?! + //??: need to be null safe? + colMeta.count += ( newColMeta.count )?( newColMeta.count ):( 0 ); + //view.log( colIndex, 'count:', colMeta.count ); + + if( ( colType === 'int' ) || ( colType === 'float' ) ){ + //view.log( colIndex + ' incoming meta:', + // 'min:', newColMeta.min, + // 'max:', newColMeta.max, + // 'sum:', newColMeta.sum, + // 'mean:', newColMeta.mean, + // 'median:', newColMeta.median + //); + + colMeta.min = Math.min( newColMeta.min, colMeta.min ); + colMeta.max = Math.max( newColMeta.max, colMeta.max ); + colMeta.sum = newColMeta.sum + colMeta.sum; + colMeta.mean = ( colMeta.count )?( colMeta.sum / colMeta.count ):( null ); + + // median's a pain bc of sorting (requires the data as well) + var sortedCol = view.data[ colIndex ].slice().sort(), + middleIndex = Math.floor( sortedCol.length / 2 ); + + if( sortedCol.length % 2 === 0 ){ + colMeta.median = ( ( sortedCol[ middleIndex ] + sortedCol[( middleIndex + 1 )] ) / 2 ); + + } else { + colMeta.median = sortedCol[ middleIndex ]; + } + + //view.log( colIndex + ' new meta:', + // 'min:', colMeta.min, + // 'max:', colMeta.max, + // 'sum:', colMeta.sum, + // 'mean:', colMeta.mean, + // 'median:', colMeta.median + //); + } + }); + + // otherwise: assign (first load) + } else { + view.meta = newMeta; + //view.log( '\t meta (first load):', view.meta ); + } + }, + + // ------------------------------------------------------------------------- GET DATA/CHART SETTINGS + getDataSettings : function(){ + // parse the column values for both indeces (for the data fetch) and names (for the chart) + var columnSelections = this.getColumnSelections(), + columns = []; + this.log( '\t columnSelections:', columnSelections ); + + //TODO: validate columns - minimally: we can assume either set by selectors or via a good query string + + // get column indices for params, include the desired ID column (if any) + //NOTE: these are presented in human-readable 1 base index (to match the data.peek) - adjust + columns = [ + columnSelections.X.colIndex - 1, + columnSelections.Y.colIndex - 1 + ]; + if( this.$dataControl.find( '#include-id-checkbox' ).attr( 'checked' ) ){ + columns.push( columnSelections.ID.colIndex - 1 ); + } + //TODO: other vals: max, start, page + + var params = { + data_type : 'raw_data', + provider : 'column_with_stats', + columns : '[' + columns + ']' + }; + this.log( '\t data settings (url params):', params ); + return params; + }, + + getColumnSelections : function(){ + // gets the current user-selected values for which columns to fetch from the data settings panel + // returns a map: { column-select name (eg. X) : { colIndex : column-selector val, + // colName : selected option text }, ... } + var selections = {}; + this.$dataControl.find( 'div.column-select select' ).each( function(){ + var $this = $( this ), + val = $this.val(); + selections[ $this.attr( 'name' ) ] = { + colIndex : val, + colName : $this.children( '[value="' + val + '"]' ).text() + }; + }); + return selections; + }, + + getChartSettings : function(){ + // gets the user-selected chartConfig from the chart settings panel + var settings = {}, + colSelections = this.getColumnSelections(); + //this.log( 'colSelections:', colSelections ); + + //TODO: simplify with keys and loop + settings.datapointSize = this.$chartControl.find( '#datapointSize.numeric-slider-input' ) + .find( '.slider' ).slider( 'value' ); + settings.width = this.$chartControl.find( '#width.numeric-slider-input' ) + .find( '.slider' ).slider( 'value' ); + settings.height = this.$chartControl.find( '#height.numeric-slider-input' ) + .find( '.slider' ).slider( 'value' ); + + // update axes labels using chartSettings inputs (if not at defaults), otherwise the selects' colName + //TODO: a little confusing + var chartSettingsXLabel = this.$chartControl.find( 'input#X-axis-label' ).val(), + chartSettingsYLabel = this.$chartControl.find( 'input#Y-axis-label' ).val(); + settings.xLabel = ( chartSettingsXLabel === 'X' )? + ( colSelections.X.colName ):( chartSettingsXLabel ); + settings.yLabel = ( chartSettingsYLabel === 'Y' )? + ( colSelections.Y.colName ):( chartSettingsYLabel ); + + settings.animDuration = ( this.$chartControl.find( '#animate-chart' ).is( ':checked' ) )? + ( this.chart.defaults.animDuration ):( 0 ); + + this.log( '\t chartSettings:', settings ); + return settings; + }, + + toString : function(){ + return 'ScatterplotControlForm(' + (( this.dataset )?( this.dataset.id ):( '' )) + ')'; + } +}); + +ScatterplotControlForm.templates = { + mainLayout : Templates.scatterplotControlForm, + dataControl : Templates.dataControl, + chartControl : Templates.chartControl, + statsDisplay : Templates.statsDisplay, + chartDisplay : Templates.chartDisplay +}; + +//============================================================================== diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/src/visualization-templates.html --- /dev/null +++ b/config/plugins/visualizations/scatterplot/src/visualization-templates.html @@ -0,0 +1,182 @@ +<script type="text/template" class="template-visualization" id="template-visualization-scatterplotControlForm"> +{{! main layout }} + +<div class="scatterplot-container chart-container tabbable tabs-left"> + {{! tab buttons/headers using Bootstrap }} + <ul class="nav nav-tabs"> + {{! start with the data controls as the displayed tab }} + <li class="active"><a href="#data-control" data-toggle="tab" class="tooltip" + title="Use this tab to change which data are used">Data Controls</a></li> + <li><a href="#chart-control" data-toggle="tab" class="tooltip" + title="Use this tab to change how the chart is drawn">Chart Controls</a></li> + <li><a href="#stats-display" data-toggle="tab" class="tooltip" + title="This tab will display overall statistics for your data">Statistics</a></li> + <li><a href="#chart-display" data-toggle="tab" class="tooltip" + title="This tab will display the chart">Chart</a> + {{! loading indicator - initially hidden }} + <div id="loading-indicator" style="display: none;"> + <img class="loading-img" src="{{loadingIndicatorImagePath}}" /> + <span class="loading-message">{{message}}</span> + </div> + </li> + </ul> + + {{! data form, chart config form, stats, and chart all get their own tab }} + <div class="tab-content"> + {{! ---------------------------- tab for data settings form }} + <div id="data-control" class="tab-pane active"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for chart graphics control form }} + <div id="chart-control" class="tab-pane"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for data statistics }} + <div id="stats-display" class="tab-pane"> + {{! rendered separately }} + </div> + + {{! ---------------------------- tab for actual chart }} + <div id="chart-display" class="tab-pane"> + {{! chart rendered separately }} + </div> + + </div>{{! end .tab-content }} +</div>{{! end .chart-control }} +</script> + +<script type="text/template" class="template-visualization" id="template-visualization-dataControl"> + + <p class="help-text"> + Use the following controls to change the data used by the chart. + Use the 'Draw' button to render (or re-render) the chart with the current settings. + </p> + + {{! column selector containers }} + <div class="column-select"> + <label for="X-select">Data column for X: </label> + <select name="X" id="X-select"> + {{#each numericColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + <div class="column-select"> + <label for="Y-select">Data column for Y: </label> + <select name="Y" id="Y-select"> + {{#each numericColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + + {{! optional id column }} + <div id="include-id"> + <label for="include-id-checkbox">Include a third column as data point IDs?</label> + <input type="checkbox" name="include-id" id="include-id-checkbox" /> + <p class="help-text-small"> + These will be displayed (along with the x and y values) when you hover over + a data point. + </p> + </div> + <div class="column-select" style="display: none"> + <label for="ID-select">Data column for IDs: </label> + <select name="ID" id="ID-select"> + {{#each allColumns}} + <option value="{{index}}">{{name}}</option> + {{/each}} + </select> + </div> + + {{! if we're using generic column selection names ('column 1') - allow the user to use the first line }} + <div id="first-line-header" style="display: none;"> + <p>Possible headers: {{ possibleHeaders }} + </p> + <label for="first-line-header-checkbox">Use the above as column headers?</label> + <input type="checkbox" name="include-id" id="first-line-header-checkbox" + {{#if usePossibleHeaders }}checked="true"{{/if}}/> + <p class="help-text-small"> + It looks like Galaxy couldn't get proper column headers for this data. + Would you like to use the column headers above as column names to select columns? + </p> + </div> + + <input id="render-button" type="button" value="Draw" /> + <div class="clear"></div> +</script> + +<script type="text/template" class="template-visualization" id="template-visualization-chartControl"> + <p class="help-text"> + Use the following controls to how the chart is displayed. + The slide controls can be moved by the mouse or, if the 'handle' is in focus, your keyboard's arrow keys. + Move the focus between controls by using the tab or shift+tab keys on your keyboard. + Use the 'Draw' button to render (or re-render) the chart with the current settings. + </p> + + <div id="datapointSize" class="form-input numeric-slider-input"> + <label for="datapointSize">Size of data point: </label> + <div class="slider-output">{{datapointSize}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + Size of the graphic representation of each data point + </p> + </div> + + <div id="animDuration" class="form-input checkbox-input"> + <label for="animate-chart">Animate chart transitions?: </label> + <input type="checkbox" id="animate-chart" + class="checkbox control"{{#if animDuration}} checked="true"{{/if}} /> + <p class="form-help help-text-small"> + Uncheck this to disable the animations used on the chart + </p> + </div> + + <div id="width" class="form-input numeric-slider-input"> + <label for="width">Chart width: </label> + <div class="slider-output">{{width}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + (not including chart margins and axes) + </p> + </div> + + <div id="height" class="form-input numeric-slider-input"> + <label for="height">Chart height: </label> + <div class="slider-output">{{height}}</div> + <div class="slider"></div> + <p class="form-help help-text-small"> + (not including chart margins and axes) + </p> + </div> + + <div id="X-axis-label"class="text-input form-input"> + <label for="X-axis-label">Re-label the X axis: </label> + <input type="text" name="X-axis-label" id="X-axis-label" value="{{xLabel}}" /> + <p class="form-help help-text-small"></p> + </div> + + <div id="Y-axis-label" class="text-input form-input"> + <label for="Y-axis-label">Re-label the Y axis: </label> + <input type="text" name="Y-axis-label" id="Y-axis-label" value="{{yLabel}}" /> + <p class="form-help help-text-small"></p> + </div> + + <input id="render-button" type="button" value="Draw" /> +</script> + +<script type="text/template" class="template-visualization" id="template-visualization-statsDisplay"> + <p class="help-text">By column:</p> + <table id="chart-stats-table"> + <thead><th></th><th>X</th><th>Y</th></thead> + {{#each stats}} + <tr><td>{{name}}</td><td>{{xval}}</td><td>{{yval}}</td></tr> + </tr> + {{/each}} + </table> +</script> + +<script type="text/template" class="template-visualization" id="template-visualization-chartDisplay"> + <svg width="{{width}}" height="{{height}}"></svg> +</script> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/static/scatterplot.css --- /dev/null +++ b/config/plugins/visualizations/scatterplot/static/scatterplot.css @@ -0,0 +1,181 @@ +/*TODO: use/move into base.less*/ +* { margin: 0px; padding: 0px; } + +/* -------------------------------------------- general layout */ +div.tab-pane { + padding: 8px; +} + +/* -------------------------------------------- header */ +.header { + margin-bottom: 8px; +} + +#chart-header { + padding : 8px; + background-color: #ebd9b2; + margin-bottom: 16px; + overflow: auto; +} + +#chart-header .subtitle { + margin: -4px 0px 0px 4px; + padding : 0; + color: white; + font-size: small; +} + +/* -------------------------------------------- main layout */ +#scatterplot { + /*from width + margin of chart?*/ +} + +.scatterplot-container .tab-pane { +} + +/* -------------------------------------------- all controls */ + +#scatterplot input[type=button], +#scatterplot select { + width: 100%; + max-width: 256px; + margin-bottom: 8px; +} + +#scatterplot .help-text, +#scatterplot .help-text-small { + color: grey; +} + +#scatterplot .help-text { + padding-bottom: 16px; +} + +#scatterplot .help-text-small { + padding: 4px; + font-size: smaller; +} + +#scatterplot > * { +} + +#scatterplot input[value=Draw] { + display: block; + margin-top: 16px; +} + +#scatterplot .numeric-slider-input { + max-width: 70%; +} + +/* -------------------------------------------- data controls */ + +/* -------------------------------------------- chart controls */ +#chart-control .form-input { + /*display: table-row;*/ +} + +#chart-control label { + /*text-align: right;*/ + margin-bottom: 8px; + /*display: table-cell;*/ +} + +#chart-control .slider { + /*display: table-cell;*/ + height: 8px; + display: block; + margin: 8px 0px 0px 8px; +} + +#chart-control .slider-output { + /*display: table-cell;*/ + float: right; +} + +#chart-control input[type="text"] { + border: 1px solid lightgrey; +} + + +/* -------------------------------------------- statistics */ +#stats-display table#chart-stats-table { + width: 100%; +} + +#stats-display #chart-stats-table th { + width: 30%; + padding: 4px; + font-weight: bold; + color: grey; +} + +#stats-display #chart-stats-table td { + border: solid lightgrey; + border-width: 1px 0px 0px 1px; + padding: 4px; +} + +#stats-display #chart-stats-table td:nth-child(1) { + border-width: 1px 0px 0px 0px; + padding-right: 1em; + text-align: right; + font-weight: bold; + color: grey; +} + +/* -------------------------------------------- load indicators */ +#loading-indicator { + margin: 12px 0px 0px 8px; +} + +#scatterplot #loading-indicator .loading-message { + font-style: italic; + font-size: smaller; + color: grey; +} + +/* -------------------------------------------- chart area */ +#chart-holder { + overflow: auto; + margin-left: 8px; +} + +svg .grid-line { + fill: none; + stroke: lightgrey; + stroke-opacity: 0.5; + shape-rendering: crispEdges; + stroke-dasharray: 3, 3; +} + +svg .axis path, svg .axis line { + fill: none; + stroke: black; + shape-rendering: crispEdges; +} + +svg .axis text { + font-family: monospace; + font-size: 12px; +} + +svg #x-axis-label, svg #y-axis-label { + font-family: sans-serif; + font-size: 10px; +} + +svg .glyph { + stroke: none; + fill: black; + fill-opacity: 0.2; +} + +/* -------------------------------------------- info box */ +.chart-info-box { + border-radius: 4px; + padding: 4px; + background-color: white; + border: 1px solid black; +} + diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/static/scatterplot.js --- /dev/null +++ b/config/plugins/visualizations/scatterplot/static/scatterplot.js @@ -0,0 +1,1 @@ +function TwoVarScatterplot(a){var b=10,c=7,d=10,e=8,f=5;this.log=function(){if(this.debugging&&console&&console.debug){var a=Array.prototype.slice.call(arguments);a.unshift(this.toString()),console.debug.apply(console,a)}},this.log("new TwoVarScatterplot:",a),this.defaults={id:"TwoVarScatterplot",containerSelector:"body",maxDataPoints:3e4,datapointSize:4,animDuration:500,xNumTicks:10,yNumTicks:10,xAxisLabelBumpY:40,yAxisLabelBumpX:-40,width:400,height:400,marginTop:50,marginRight:50,marginBottom:50,marginLeft:50,xMin:null,xMax:null,yMin:null,yMax:null,xLabel:"X",yLabel:"Y"},this.config=_.extend({},this.defaults,a),this.log("intial config:",this.config),this.updateConfig=function(a){_.extend(this.config,a),this.log(this+".updateConfig:",this.config)},this.toString=function(){return this.config.id},this.translateStr=function(a,b){return"translate("+a+","+b+")"},this.rotateStr=function(a,b,c){return"rotate("+a+","+b+","+c+")"},this.adjustChartDimensions=function(a,b,c,d){a=a||0,b=b||0,c=c||0,d=d||0,this.svg.attr("width",this.config.width+(this.config.marginRight+b)+(this.config.marginLeft+d)).attr("height",this.config.height+(this.config.marginTop+a)+(this.config.marginBottom+c)).style("display","block"),this.content=this.svg.select("g.content").attr("transform",this.translateStr(this.config.marginLeft+d,this.config.marginTop+a))},this.preprocessData=function(a){return a.length>this.config.maxDataPoints?a.slice(0,this.config.maxDataPoints):a},this.findMinMaxes=function(a,b,c){this.xMin=this.config.xMin||c?c[0].min:d3.min(a),this.xMax=this.config.xMax||c?c[0].max:d3.max(a),this.yMin=this.config.yMin||c?c[1].min:d3.min(b),this.yMax=this.config.yMax||c?c[1].max:d3.max(b)},this.setUpScales=function(){this.xScale=d3.scale.linear().domain([this.xMin,this.xMax]).range([0,this.config.width]),this.yScale=d3.scale.linear().domain([this.yMin,this.yMax]).range([this.config.height,0])},this.setUpXAxis=function(){this.xAxisFn=d3.svg.axis().scale(this.xScale).ticks(this.config.xNumTicks).orient("bottom"),this.xAxis.attr("transform",this.translateStr(0,this.config.height)).call(this.xAxisFn);var a=d3.max(_.map([this.xMin,this.xMax],function(a){return String(a).length}));a>=f&&this.xAxis.selectAll("g").filter(":nth-child(odd)").style("display","none"),this.log("this.config.xLabel:",this.config.xLabel),this.xAxisLabel.attr("x",this.config.width/2).attr("y",this.config.xAxisLabelBumpY).attr("text-anchor","middle").text(this.config.xLabel),this.log("xAxisLabel:",this.xAxisLabel)},this.setUpYAxis=function(){this.yAxisFn=d3.svg.axis().scale(this.yScale).ticks(this.config.yNumTicks).orient("left"),this.yAxis.call(this.yAxisFn);var a=this.yAxis.selectAll("text").filter(function(a,b){return 0!==b});this.log("yTickLabels:",a),this.yLongestLabel=d3.max(a[0].map(function(a){return d3.select(a).text().length}))||0;var f=b+this.yLongestLabel*c+e+d;if(this.config.yAxisLabelBumpX=-(f-d),this.config.marginLeft<f){var g=f-this.config.marginLeft;g=0>g?0:g,this.adjustChartDimensions(0,0,0,g)}this.yAxisLabel.attr("x",this.config.yAxisLabelBumpX).attr("y",this.config.height/2).attr("text-anchor","middle").attr("transform",this.rotateStr(-90,this.config.yAxisLabelBumpX,this.config.height/2)).text(this.config.yLabel)},this.renderGrid=function(){this.vGridLines=this.content.selectAll("line.v-grid-line").data(this.xScale.ticks(this.xAxisFn.ticks()[0])),this.vGridLines.enter().append("svg:line").classed("grid-line v-grid-line",!0),this.vGridLines.attr("x1",this.xScale).attr("y1",0).attr("x2",this.xScale).attr("y2",this.config.height),this.vGridLines.exit().remove(),this.hGridLines=this.content.selectAll("line.h-grid-line").data(this.yScale.ticks(this.yAxisFn.ticks()[0])),this.hGridLines.enter().append("svg:line").classed("grid-line h-grid-line",!0),this.hGridLines.attr("x1",0).attr("y1",this.yScale).attr("x2",this.config.width).attr("y2",this.yScale),this.hGridLines.exit().remove()},this.renderDatapoints=function(a,b,c){this.log(this+".renderDatapoints",arguments);var d=0,e=this,f=function(b,c){return e.xScale(a[c])},g=function(a,c){return e.yScale(b[c])},h=this.content.selectAll(".glyph").data(a);d=0,h.enter().append("svg:circle").each(function(){d+=1}).classed("glyph",!0).attr("cx",0).attr("cy",this.config.height).attr("r",0),this.log(d," new glyphs created"),d=0,h.transition().duration(this.config.animDuration).each(function(){d+=1}).attr("cx",f).attr("cy",g).attr("r",e.config.datapointSize),this.log(d," existing glyphs transitioned"),h.exit().each(function(){d+=1}).transition().duration(this.config.animDuration).attr("cy",this.config.height).attr("r",0).remove(),this.log(d," glyphs removed"),this._addDatapointEventhandlers(h,a,b,c)},this._addDatapointEventhandlers=function(a,b,c,d){var e=this;a.on("mouseover",function(a,f){var g=d3.select(this);g.style("fill","red").style("fill-opacity",1),e.content.append("line").attr("stroke","red").attr("stroke-width",1).attr("x1",g.attr("cx")-e.config.datapointSize).attr("y1",g.attr("cy")).attr("x2",0).attr("y2",g.attr("cy")).classed("hoverline",!0),g.attr("cy")<e.config.height&&e.content.append("line").attr("stroke","red").attr("stroke-width",1).attr("x1",g.attr("cx")).attr("y1",g.attr("cy")+e.config.datapointSize).attr("x2",g.attr("cx")).attr("y2",e.config.height).classed("hoverline",!0);var h=$(this).offset();e.datapointInfoBox=e.infoBox(h.top,h.left,e.infoHtml(b[f],c[f],d?d[f]:void 0)),$("body").append(e.datapointInfoBox)}).on("mouseout",function(){d3.select(this).style("fill","black").style("fill-opacity",.2),e.content.selectAll(".hoverline").remove(),e.datapointInfoBox&&e.datapointInfoBox.remove()})},this.render=function(a,b){this.log(this+".render",arguments),this.log(" config:",this.config);var c=a[0],d=a[1],e=a.length>2?a[2]:void 0;c=this.preprocessData(c),d=this.preprocessData(d),this.log("xCol len",c.length,"yCol len",d.length),this.findMinMaxes(c,d,b),this.setUpScales(),this.svg||(this.svg=d3.select("svg").attr("class","chart")),this.content||(this.content=this.svg.append("svg:g").attr("class","content").attr("id",this.config.id)),this.adjustChartDimensions(),this.xAxis||(this.xAxis=this.content.append("g").attr("class","axis").attr("id","x-axis")),this.xAxisLabel||(this.xAxisLabel=this.xAxis.append("text").attr("class","axis-label").attr("id","x-axis-label")),this.yAxis||(this.yAxis=this.content.append("g").attr("class","axis").attr("id","y-axis")),this.yAxisLabel||(this.yAxisLabel=this.yAxis.append("text").attr("class","axis-label").attr("id","y-axis-label")),this.setUpXAxis(),this.setUpYAxis(),this.renderGrid(),this.renderDatapoints(c,d,e)},this.infoHtml=function(a,b,c){var d=$("<div/>");return c&&$("<div/>").text(c).css("font-weight","bold").appendTo(d),$("<div/>").text(a).appendTo(d),$("<div/>").text(b).appendTo(d),d.html()},this.infoBox=function(a,b,c,d,e){d=d||0,e=e||20;var f=$("<div />").addClass("chart-info-box").css({position:"absolute",top:a+d,left:b+e});return f.html(c),f}}this.Templates=this.Templates||{},this.Templates.chartControl=Handlebars.template(function(a,b,c,d,e){function f(){return' checked="true"'}this.compilerInfo=[4,">= 1.0.0"],c=this.merge(c,a.helpers),e=e||{};var g,h="",i="function",j=this.escapeExpression,k=this;return h+='<p class="help-text">\n Use the following controls to how the chart is displayed.\n The slide controls can be moved by the mouse or, if the \'handle\' is in focus, your keyboard\'s arrow keys.\n Move the focus between controls by using the tab or shift+tab keys on your keyboard.\n Use the \'Draw\' button to render (or re-render) the chart with the current settings.\n </p>\n\n <div id="datapointSize" class="form-input numeric-slider-input">\n <label for="datapointSize">Size of data point: </label>\n <div class="slider-output">',(g=c.datapointSize)?g=g.call(b,{hash:{},data:e}):(g=b.datapointSize,g=typeof g===i?g.apply(b):g),h+=j(g)+'</div>\n <div class="slider"></div>\n <p class="form-help help-text-small">\n Size of the graphic representation of each data point\n </p>\n </div>\n\n <div id="animDuration" class="form-input checkbox-input">\n <label for="animate-chart">Animate chart transitions?: </label>\n <input type="checkbox" id="animate-chart"\n class="checkbox control"',g=c["if"].call(b,b.animDuration,{hash:{},inverse:k.noop,fn:k.program(1,f,e),data:e}),(g||0===g)&&(h+=g),h+=' />\n <p class="form-help help-text-small">\n Uncheck this to disable the animations used on the chart\n </p>\n </div>\n\n <div id="width" class="form-input numeric-slider-input">\n <label for="width">Chart width: </label>\n <div class="slider-output">',(g=c.width)?g=g.call(b,{hash:{},data:e}):(g=b.width,g=typeof g===i?g.apply(b):g),h+=j(g)+'</div>\n <div class="slider"></div>\n <p class="form-help help-text-small">\n (not including chart margins and axes)\n </p>\n </div>\n\n <div id="height" class="form-input numeric-slider-input">\n <label for="height">Chart height: </label>\n <div class="slider-output">',(g=c.height)?g=g.call(b,{hash:{},data:e}):(g=b.height,g=typeof g===i?g.apply(b):g),h+=j(g)+'</div>\n <div class="slider"></div>\n <p class="form-help help-text-small">\n (not including chart margins and axes)\n </p>\n </div>\n\n <div id="X-axis-label"class="text-input form-input">\n <label for="X-axis-label">Re-label the X axis: </label>\n <input type="text" name="X-axis-label" id="X-axis-label" value="',(g=c.xLabel)?g=g.call(b,{hash:{},data:e}):(g=b.xLabel,g=typeof g===i?g.apply(b):g),h+=j(g)+'" />\n <p class="form-help help-text-small"></p>\n </div>\n\n <div id="Y-axis-label" class="text-input form-input">\n <label for="Y-axis-label">Re-label the Y axis: </label>\n <input type="text" name="Y-axis-label" id="Y-axis-label" value="',(g=c.yLabel)?g=g.call(b,{hash:{},data:e}):(g=b.yLabel,g=typeof g===i?g.apply(b):g),h+=j(g)+'" />\n <p class="form-help help-text-small"></p>\n </div>\n\n <input id="render-button" type="button" value="Draw" />'}),this.Templates.chartDisplay=Handlebars.template(function(a,b,c,d,e){this.compilerInfo=[4,">= 1.0.0"],c=this.merge(c,a.helpers),e=e||{};var f,g="",h="function",i=this.escapeExpression;return g+='<svg width="',(f=c.width)?f=f.call(b,{hash:{},data:e}):(f=b.width,f=typeof f===h?f.apply(b):f),g+=i(f)+'" height="',(f=c.height)?f=f.call(b,{hash:{},data:e}):(f=b.height,f=typeof f===h?f.apply(b):f),g+=i(f)+'"></svg>'}),this.Templates.dataControl=Handlebars.template(function(a,b,c,d,e){function f(a,b){var d,e="";return e+='\n <option value="',(d=c.index)?d=d.call(a,{hash:{},data:b}):(d=a.index,d=typeof d===j?d.apply(a):d),e+=k(d)+'">',(d=c.name)?d=d.call(a,{hash:{},data:b}):(d=a.name,d=typeof d===j?d.apply(a):d),e+=k(d)+"</option>\n "}function g(){return'checked="true"'}this.compilerInfo=[4,">= 1.0.0"],c=this.merge(c,a.helpers),e=e||{};var h,i="",j="function",k=this.escapeExpression,l=this;return i+='<p class="help-text">\n Use the following controls to change the data used by the chart.\n Use the \'Draw\' button to render (or re-render) the chart with the current settings.\n </p>\n\n \n <div class="column-select">\n <label for="X-select">Data column for X: </label>\n <select name="X" id="X-select">\n ',h=c.each.call(b,b.numericColumns,{hash:{},inverse:l.noop,fn:l.program(1,f,e),data:e}),(h||0===h)&&(i+=h),i+='\n </select>\n </div>\n <div class="column-select">\n <label for="Y-select">Data column for Y: </label>\n <select name="Y" id="Y-select">\n ',h=c.each.call(b,b.numericColumns,{hash:{},inverse:l.noop,fn:l.program(1,f,e),data:e}),(h||0===h)&&(i+=h),i+='\n </select>\n </div>\n\n \n <div id="include-id">\n <label for="include-id-checkbox">Include a third column as data point IDs?</label>\n <input type="checkbox" name="include-id" id="include-id-checkbox" />\n <p class="help-text-small">\n These will be displayed (along with the x and y values) when you hover over\n a data point.\n </p>\n </div>\n <div class="column-select" style="display: none">\n <label for="ID-select">Data column for IDs: </label>\n <select name="ID" id="ID-select">\n ',h=c.each.call(b,b.allColumns,{hash:{},inverse:l.noop,fn:l.program(1,f,e),data:e}),(h||0===h)&&(i+=h),i+='\n </select>\n </div>\n\n \n <div id="first-line-header" style="display: none;">\n <p>Possible headers: ',(h=c.possibleHeaders)?h=h.call(b,{hash:{},data:e}):(h=b.possibleHeaders,h=typeof h===j?h.apply(b):h),i+=k(h)+'\n </p>\n <label for="first-line-header-checkbox">Use the above as column headers?</label>\n <input type="checkbox" name="include-id" id="first-line-header-checkbox"\n ',h=c["if"].call(b,b.usePossibleHeaders,{hash:{},inverse:l.noop,fn:l.program(3,g,e),data:e}),(h||0===h)&&(i+=h),i+='/>\n <p class="help-text-small">\n It looks like Galaxy couldn\'t get proper column headers for this data.\n Would you like to use the column headers above as column names to select columns?\n </p>\n </div>\n\n <input id="render-button" type="button" value="Draw" />\n <div class="clear"></div>'}),this.Templates.scatterplotControlForm=Handlebars.template(function(a,b,c,d,e){this.compilerInfo=[4,">= 1.0.0"],c=this.merge(c,a.helpers),e=e||{};var f,g="",h="function",i=this.escapeExpression;return g+='\n\n<div class="scatterplot-container chart-container tabbable tabs-left">\n \n <ul class="nav nav-tabs">\n \n <li class="active"><a href="#data-control" data-toggle="tab" class="tooltip"\n title="Use this tab to change which data are used">Data Controls</a></li>\n <li><a href="#chart-control" data-toggle="tab" class="tooltip"\n title="Use this tab to change how the chart is drawn">Chart Controls</a></li>\n <li><a href="#stats-display" data-toggle="tab" class="tooltip"\n title="This tab will display overall statistics for your data">Statistics</a></li>\n <li><a href="#chart-display" data-toggle="tab" class="tooltip"\n title="This tab will display the chart">Chart</a>\n \n <div id="loading-indicator" style="display: none;">\n <img class="loading-img" src="',(f=c.loadingIndicatorImagePath)?f=f.call(b,{hash:{},data:e}):(f=b.loadingIndicatorImagePath,f=typeof f===h?f.apply(b):f),g+=i(f)+'" />\n <span class="loading-message">',(f=c.message)?f=f.call(b,{hash:{},data:e}):(f=b.message,f=typeof f===h?f.apply(b):f),g+=i(f)+"</span>\n </div>\n </li>\n </ul>\n\n "+'\n <div class="tab-content">\n '+'\n <div id="data-control" class="tab-pane active">\n '+"\n </div>\n \n "+'\n <div id="chart-control" class="tab-pane">\n '+"\n </div>\n\n "+'\n <div id="stats-display" class="tab-pane">\n '+"\n </div>\n\n "+'\n <div id="chart-display" class="tab-pane">\n '+"\n </div>\n\n </div>"+"\n</div>"}),this.Templates.statsDisplay=Handlebars.template(function(a,b,c,d,e){function f(a,b){var d,e="";return e+="\n <tr><td>",(d=c.name)?d=d.call(a,{hash:{},data:b}):(d=a.name,d=typeof d===i?d.apply(a):d),e+=j(d)+"</td><td>",(d=c.xval)?d=d.call(a,{hash:{},data:b}):(d=a.xval,d=typeof d===i?d.apply(a):d),e+=j(d)+"</td><td>",(d=c.yval)?d=d.call(a,{hash:{},data:b}):(d=a.yval,d=typeof d===i?d.apply(a):d),e+=j(d)+"</td></tr>\n </tr>\n "}this.compilerInfo=[4,">= 1.0.0"],c=this.merge(c,a.helpers),e=e||{};var g,h="",i="function",j=this.escapeExpression,k=this;return h+='<p class="help-text">By column:</p>\n <table id="chart-stats-table">\n <thead><th></th><th>X</th><th>Y</th></thead>\n ',g=c.each.call(b,b.stats,{hash:{},inverse:k.noop,fn:k.program(1,f,e),data:e}),(g||0===g)&&(h+=g),h+="\n </table>"});var ScatterplotControlForm=BaseView.extend(LoggableMixin).extend({className:"scatterplot-control-form",dataLoadDelay:4e3,dataLoadSize:5e3,loadingIndicatorImage:"loading_small_white_bg.gif",fetchMsg:"Fetching data...",renderMsg:"Rendering...",initialize:function(a){this.log(this+".initialize, attributes:",a),this.dataset=null,this.chartConfig=null,this.chart=null,this.loader=null,this.$dataControl=null,this.$chartControl=null,this.$statsDisplay=null,this.$chartDisplay=null,this.dataFetch=null,this.initializeFromAttributes(a),this.initializeChart(a),this.initializeDataLoader(a)},initializeFromAttributes:function(a){if(!a||!a.dataset)throw"ScatterplotView requires a dataset";if(this.dataset=a.dataset,"string"===jQuery.type(this.dataset.metadata_column_types)&&(this.dataset.metadata_column_types=this.dataset.metadata_column_types.split(", ")),this.log(" dataset:",this.dataset),this.dataset.comment_lines&&this.dataset.comment_lines.length){var b=this.dataset.comment_lines[0],c=b.split(" ");c.length===this.dataset.metadata_column_types.length&&(this.possibleHeaders=c)}if(!a.apiDatasetsURL)throw"ScatterplotView requires a apiDatasetsURL";this.dataURL=a.apiDatasetsURL+"/"+this.dataset.id+"?",this.log(" dataURL:",this.dataURL)},initializeChart:function(a){this.chartConfig=a.chartConfig||{},this.log(" initial chartConfig:",this.chartConfig),this.chart=new TwoVarScatterplot(this.chartConfig),this.chartConfig=this.chart.config},initializeDataLoader:function(a){var b=this;this.loader=new LazyDataLoader({url:null,start:a.start||0,total:a.total||this.dataset.metadata_data_lines,delay:this.dataLoadDelay,size:this.dataLoadSize,buildUrl:function(a,b){return this.url+"&"+jQuery.param({start_val:a,max_vals:b})}}),$(this.loader).bind("error",function(a,c,d){b.log("ERROR:",c,d),alert("ERROR fetching data:\n"+c+"\n"+d),b.hideLoadingIndicator()})},render:function(){return this.log(this+".render"),this.$el.append(ScatterplotControlForm.templates.mainLayout({loadingIndicatorImagePath:"/static/images/"+this.loadingIndicatorImage,message:""})),this.$dataControl=this._render_dataControl(),this.$chartControl=this._render_chartControl(),this.$statsDisplay=this.$el.find(".tab-pane#stats-display"),this.$chartDisplay=this._render_chartDisplay(),this.chartConfig.xColumn&&this.chartConfig.yColumn&&this.renderChart(),this.$el.find(".tooltip").tooltip(),this},_render_dataControl:function(){var a=this,b=[],c=[],d=this.possibleHeaders&&this.$dataControl?this.$dataControl.find("#first-line-header-checkbox").is(":checked"):!1;_.each(this.dataset.metadata_column_types,function(e,f){var g=f+1,h="column "+g;a.dataset.metadata_column_names?h=a.dataset.metadata_column_names[f]:d&&(h=a.possibleHeaders[f]),b.push({index:g,name:h}),("int"===e||"float"===e)&&c.push({index:g,name:h})});var e=this.$el.find(".tab-pane#data-control");return e.html(ScatterplotControlForm.templates.dataControl({allColumns:b,numericColumns:c,possibleHeaders:this.possibleHeaders?this.possibleHeaders.join(", "):"",usePossibleHeaders:d})),!this.dataset.metadata_column_names&&this.possibleHeaders&&e.find("#first-line-header").show(),e.find("#X-select").val(this.chartConfig.xColumn),e.find("#Y-select").val(this.chartConfig.yColumn),void 0!==this.chartConfig.idColumn&&(e.find("#include-id-checkbox").attr("checked",!0).trigger("change"),e.find("#ID-select").val(this.chartConfig.idColumn)),e},_render_chartControl:function(){var a=this,b=this.$el.find(".tab-pane#chart-control"),c={datapointSize:{min:2,max:10,step:1},width:{min:200,max:800,step:20},height:{min:200,max:800,step:20}};return b.append(ScatterplotControlForm.templates.chartControl(this.chartConfig)),b.find(".numeric-slider-input").each(function(){function b(){var a=$(this),b=a.slider("value");e.text(b)}var d=$(this),e=d.find(".slider-output"),f=d.find(".slider"),g=d.attr("id");f.slider(_.extend(c[g],{value:a.chartConfig[g],change:b,slide:b}))}),b},_render_chartDisplay:function(){var a=this.$el.find(".tab-pane#chart-display");return a.append(ScatterplotControlForm.templates.chartDisplay(this.chartConfig)),a},events:{"change #include-id-checkbox":"toggleThirdColumnSelector","change #first-line-header-checkbox":"rerenderDataControl","click #data-control #render-button":"renderChart","click #chart-control #render-button":"changeChartSettings"},toggleThirdColumnSelector:function(){this.$el.find('select[name="ID"]').parent().toggle()},rerenderDataControl:function(){this.$dataControl=this._render_dataControl()},showLoadingIndicator:function(a,b){a=a||"";var c=this.$el.find("div#loading-indicator");messageBox=c.find(".loading-message"),c.is(":visible")?a?messageBox.fadeOut("fast",function(){messageBox.text(a),messageBox.fadeIn("fast",b)}):b():(a&&messageBox.text(a),c.fadeIn("fast",b))},hideLoadingIndicator:function(a){this.$el.find("div#loading-indicator").fadeOut("fast",a)},renderChart:function(){this.log(this+".renderChart"),this.data=null,this.meta=null,_.extend(this.chartConfig,this.getChartSettings()),this.log(" chartConfig:",this.chartConfig),this.chart.updateConfig(this.chartConfig,!1),this.loader.url=this.dataURL+"&"+jQuery.param(this.getDataSettings()),this.log(" loader: total lines:",this.loader.total," url:",this.loader.url);var a=this;$(this.loader).bind("loaded.new",function(b,c){a.log(a+" loaded.new",c),a.postProcessDataFetchResponse(c),a.log(" postprocessed data:",a.data),a.log(" postprocessed meta:",a.meta),a.showLoadingIndicator(a.renderMsg,function(){a.chart.render(a.data,a.meta),a.renderStats(a.data,a.meta),a.hideLoadingIndicator()})}),$(this.loader).bind("complete",function(b,c){a.log(a+" complete",c),$(a.loader).unbind()}),a.showLoadingIndicator(a.fetchMsg,function(){a.$el.find("ul.nav").find('a[href="#chart-display"]').tab("show"),a.loader.load()})},renderStats:function(){this.log(this+".renderStats"),this.$statsDisplay.html(ScatterplotControlForm.templates.statsDisplay({stats:[{name:"Count",xval:this.meta[0].count,yval:this.meta[1].count},{name:"Min",xval:this.meta[0].min,yval:this.meta[1].min},{name:"Max",xval:this.meta[0].max,yval:this.meta[1].max},{name:"Sum",xval:this.meta[0].sum,yval:this.meta[1].sum},{name:"Mean",xval:this.meta[0].mean,yval:this.meta[1].mean},{name:"Median",xval:this.meta[0].median,yval:this.meta[1].median}]}))},changeChartSettings:function(){var a=this;newChartSettings=this.getChartSettings(),_.extend(this.chartConfig,newChartSettings),this.log("this.chartConfig:",this.chartConfig),this.chart.updateConfig(this.chartConfig,!1),a.data&&a.meta?a.showLoadingIndicator(a.renderMsg,function(){a.$el.find("ul.nav").find('a[href="#chart-display"]').tab("show"),a.chart.render(a.data,a.meta),a.hideLoadingIndicator()}):this.renderChart()},postProcessDataFetchResponse:function(a){this.postProcessData(a.data),this.postProcessMeta(a.meta)},postProcessData:function(a){var b=this;b.data?_.each(a,function(a,c){b.data[c]=b.data[c].concat(a)}):b.data=a},postProcessMeta:function(a){var b=this,c=this.dataset.metadata_column_types;b.meta?_.each(a,function(a,d){var e=b.meta[d],f=c[d];if(e.count+=a.count?a.count:0,"int"===f||"float"===f){e.min=Math.min(a.min,e.min),e.max=Math.max(a.max,e.max),e.sum=a.sum+e.sum,e.mean=e.count?e.sum/e.count:null;var g=b.data[d].slice().sort(),h=Math.floor(g.length/2);e.median=0===g.length%2?(g[h]+g[h+1])/2:g[h]}}):b.meta=a},getDataSettings:function(){var a=this.getColumnSelections(),b=[];this.log(" columnSelections:",a),b=[a.X.colIndex-1,a.Y.colIndex-1],this.$dataControl.find("#include-id-checkbox").attr("checked")&&b.push(a.ID.colIndex-1);var c={data_type:"raw_data",provider:"column_with_stats",columns:"["+b+"]"};return this.log(" data settings (url params):",c),c},getColumnSelections:function(){var a={};return this.$dataControl.find("div.column-select select").each(function(){var b=$(this),c=b.val();a[b.attr("name")]={colIndex:c,colName:b.children('[value="'+c+'"]').text()}}),a},getChartSettings:function(){var a={},b=this.getColumnSelections();a.datapointSize=this.$chartControl.find("#datapointSize.numeric-slider-input").find(".slider").slider("value"),a.width=this.$chartControl.find("#width.numeric-slider-input").find(".slider").slider("value"),a.height=this.$chartControl.find("#height.numeric-slider-input").find(".slider").slider("value");var c=this.$chartControl.find("input#X-axis-label").val(),d=this.$chartControl.find("input#Y-axis-label").val();return a.xLabel="X"===c?b.X.colName:c,a.yLabel="Y"===d?b.Y.colName:d,a.animDuration=this.$chartControl.find("#animate-chart").is(":checked")?this.chart.defaults.animDuration:0,this.log(" chartSettings:",a),a},toString:function(){return"ScatterplotControlForm("+(this.dataset?this.dataset.id:"")+")"}});ScatterplotControlForm.templates={mainLayout:Templates.scatterplotControlForm,dataControl:Templates.dataControl,chartControl:Templates.chartControl,statsDisplay:Templates.statsDisplay,chartDisplay:Templates.chartDisplay}; \ No newline at end of file diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/scatterplot/templates/scatterplot.mako --- /dev/null +++ b/config/plugins/visualizations/scatterplot/templates/scatterplot.mako @@ -0,0 +1,67 @@ +<!DOCTYPE HTML> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<title>${hda.name} | ${visualization_name}</title> + +## ---------------------------------------------------------------------------- +<link type="text/css" rel="Stylesheet" media="screen" href="/static/style/base.css"> +<link type="text/css" rel="Stylesheet" media="screen" href="/static/style/jquery-ui/smoothness/jquery-ui.css"> + +<link type="text/css" rel="Stylesheet" media="screen" href="/plugins/visualizations/scatterplot/static/scatterplot.css"> + +## ---------------------------------------------------------------------------- +<script type="text/javascript" src="/static/scripts/libs/jquery/jquery.js"></script> +<script type="text/javascript" src="/static/scripts/libs/jquery/jquery.migrate.js"></script> +<script type="text/javascript" src="/static/scripts/libs/underscore.js"></script> +<script type="text/javascript" src="/static/scripts/libs/backbone/backbone.js"></script> +<script type="text/javascript" src="/static/scripts/libs/backbone/backbone-relational.js"></script> +<script type="text/javascript" src="/static/scripts/libs/handlebars.runtime.js"></script> +<script type="text/javascript" src="/static/scripts/libs/d3.js"></script> +<script type="text/javascript" src="/static/scripts/libs/bootstrap.js"></script> +<script type="text/javascript" src="/static/scripts/libs/jquery/jquery-ui.js"></script> +<script type="text/javascript" src="/static/scripts/utils/LazyDataLoader.js"></script> +<script type="text/javascript" src="/static/scripts/mvc/base-mvc.js"></script> + +<script type="text/javascript" src="/plugins/visualizations/scatterplot/static/scatterplot.js"></script> + +</head> + +## ---------------------------------------------------------------------------- +<body> +%if not embedded: +## dataset info: only show if on own page +<div id="chart-header" class="header"> + <h2 class="title">Scatterplot of '${hda.name}'</h2> + <p class="subtitle">${hda.info}</p> +</div> +%endif + +<div id="scatterplot" class="scatterplot-control-form"></div> + +<script type="text/javascript"> +$(function(){ + var hda = ${h.to_json_string( trans.security.encode_dict_ids( hda.get_api_value() ) )}, + querySettings = ${h.to_json_string( query_args )}, + chartConfig = _.extend( querySettings, { + containerSelector : '#chart', + //TODO: move to ScatterplotControlForm.initialize + marginTop : ( querySettings.marginTop > 20 )?( querySettings.marginTop ):( 20 ), + + xColumn : querySettings.xColumn, + yColumn : querySettings.yColumn, + idColumn : querySettings.idColumn + }); + //console.debug( querySettings ); + + var settingsForm = new ScatterplotControlForm({ + dataset : hda, + apiDatasetsURL : "${h.url_for( controller='/api/datasets', action='index' )}", + el : $( '#scatterplot' ), + chartConfig : chartConfig + }).render(); + +}); +</script> + +</body> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/sweepster/config/sweepster.xml --- /dev/null +++ b/config/plugins/visualizations/sweepster/config/sweepster.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE visualization SYSTEM "../../visualization.dtd"> +<visualization name="sweepster"> + <data_sources> + <data_source> + <model_class>HistoryDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="hda">hda_ldda</to_param> + </data_source> + <data_source> + <model_class>LibraryDatasetDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="ldda">hda_ldda</to_param> + </data_source> + </data_sources> + <params> + <param type="visualization" var_name_in_template="viz">visualization</param> + <param type="hda_or_ldda" var_name_in_template="dataset">dataset_id</param> + <param_modifier type="string" modifies="dataset_id">hda_ldda</param_modifier> + </params> + <!-- template_root and template are currently ignored for the 'built-in' visualizations --> + <template_root>webapps/galaxy/visualization</template_root> + <template>sweepster.mako</template> + <render_location>_top</render_location> +</visualization> diff -r b076688903e1f96fb0453c61d0ea38b5d92cd89c -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 config/plugins/visualizations/trackster/config/trackster.xml --- /dev/null +++ b/config/plugins/visualizations/trackster/config/trackster.xml @@ -0,0 +1,30 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE visualization SYSTEM "../../visualization.dtd"> +<visualization name="trackster"> + <!--not tested yet --> + <data_sources> + <data_source> + <model_class>HistoryDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="hda">hda_ldda</to_param> + <to_param param_attr="dbkey">dbkey</to_param> + </data_source> + <data_source> + <model_class>LibraryDatasetDatasetAssociation</model_class> + <test type="isinstance" test_attr="datatype" result_type="datatype">data.Data</test> + <to_param param_attr="id">dataset_id</to_param> + <to_param assign="ldda">hda_ldda</to_param> + </data_source> + </data_sources> + <params> + <param type="visualization">id</param> + <param type="dataset">dataset_id</param> + <param type="genome_region">genome_region</param> + <param type="dbkey">dbkey</param> + </params> + <!-- template_root and template are currently ignored for the 'built-in' visualizations --> + <template_root>webapps/galaxy/visualization/tracks</template_root> + <template>browser.mako</template> + <render_location>_top</render_location> +</visualization> This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/31092efd9e1e/ Changeset: 31092efd9e1e User: ross...@gmail.com Date: 2013-08-12 05:04:57 Summary: tweaks. ready for branch merge Affected #: 2 files diff -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 -r 31092efd9e1e5d80adb8c968d34f4c04a3a487d1 tools/rgenetics/htseqsams2mx.xml --- a/tools/rgenetics/htseqsams2mx.xml +++ b/tools/rgenetics/htseqsams2mx.xml @@ -1,5 +1,5 @@ -<tool id="htseqsams2mx" name="SAM/BAM to count matrix" version="0.4"> - <description>using HTSeq code</description> +<tool id="htseqsams2mxlocal" name="SAM/BAM to count matrix" version="0.4"> + <description>using HTSeq code locally</description><stdio><exit_code range="666" level="warning" description="Exit code 666 encountered" /> diff -r c7d0b32602fb1f3ed4290359212ca24ea3bd1931 -r 31092efd9e1e5d80adb8c968d34f4c04a3a487d1 tools/sr_mapping/bowtie2_wrapper.xml --- a/tools/sr_mapping/bowtie2_wrapper.xml +++ b/tools/sr_mapping/bowtie2_wrapper.xml @@ -233,7 +233,7 @@ </action></actions></data> - <data format="bam" name="output" label="${tool.name} on ${on_string}_${jobtitle}_aligned.bam"> + <data format="bam" name="output" label="${on_string}_${jobtitle}_aligned.bam"><actions><conditional name="reference_genome.source"><when value="indexed"> https://bitbucket.org/galaxy/galaxy-central/commits/f830e3d7c6c0/ Changeset: f830e3d7c6c0 User: ross...@gmail.com Date: 2013-08-12 05:07:48 Summary: branch merge Affected #: 198 files diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -109,6 +109,7 @@ self.allow_user_creation = string_as_bool( kwargs.get( "allow_user_creation", "True" ) ) self.allow_user_deletion = string_as_bool( kwargs.get( "allow_user_deletion", "False" ) ) self.allow_user_dataset_purge = string_as_bool( kwargs.get( "allow_user_dataset_purge", "False" ) ) + self.use_data_id_on_string = string_as_bool( kwargs.get( "use_data_id_on_string", "False" ) ) self.allow_user_impersonation = string_as_bool( kwargs.get( "allow_user_impersonation", "False" ) ) self.new_user_dataset_access_role_default_private = string_as_bool( kwargs.get( "new_user_dataset_access_role_default_private", "False" ) ) self.collect_outputs_from = [ x.strip() for x in kwargs.get( 'collect_outputs_from', 'new_file_path,job_working_directory' ).lower().split(',') ] diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -1,5 +1,6 @@ import os import galaxy.tools +import re from galaxy.exceptions import ObjectInvalid from galaxy.model import LibraryDatasetDatasetAssociation @@ -191,14 +192,20 @@ data = data.to_history_dataset_association( None ) inp_data[name] = data - else: # HDA - if data.hid: - input_names.append( 'data %s' % data.hid ) +# else: # HDA +# if data.hid: +# input_names.append( 'data %s' % data.hid ) input_ext = data.ext if data.dbkey not in [None, '?']: input_dbkey = data.dbkey - + data_name_sane = re.sub('[^a-zA-Z0-9_]+', '', data.name) + if trans.app.config.use_data_id_on_string: + # we want names in our on_strings not numbers + input_names.append(data_name_sane) + else: + if data.hid: + input_names.append('data %s' % data.hid) # Collect chromInfo dataset and add as parameters to incoming db_datasets = {} db_dataset = trans.db_dataset_for( input_dbkey ) @@ -232,11 +239,14 @@ if len( input_names ) == 1: on_text = input_names[0] elif len( input_names ) == 2: - on_text = '%s and %s' % tuple(input_names[0:2]) + #on_text = '%s and %s' % tuple(input_names[0:2]) + on_text = '%s_%s' % tuple(input_names[0:2]) elif len( input_names ) == 3: - on_text = '%s, %s, and %s' % tuple(input_names[0:3]) + #on_text = '%s, %s, and %s' % tuple(input_names[0:3]) + on_text = '%s_%s_%s' % tuple(input_names[0:3]) elif len( input_names ) > 3: - on_text = '%s, %s, and others' % tuple(input_names[0:2]) + #on_text = '%s, %s, and others' % tuple(input_names[0:2]) + on_text = '%s_%s_and_others' % tuple(input_names[0:2]) else: on_text = "" # Add the dbkey to the incoming parameters diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 tool-data/bowtie2_indices.loc.sample --- a/tool-data/bowtie2_indices.loc.sample +++ b/tool-data/bowtie2_indices.loc.sample @@ -1,37 +1,37 @@ -# bowtie2_indices.loc.sample -# This is a *.loc.sample file distributed with Galaxy that enables tools -# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. -# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup -# First create these data files and save them in your own data directory structure. -# Then, create a bowtie_indices.loc file to use those indexes with tools. -# Copy this file, save it with the same name (minus the .sample), -# follow the format examples, and store the result in this directory. -# The file should include an one line entry for each index set. -# The path points to the "basename" for the set, not a specific file. -# It has four text columns seperated by TABS. +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie2 indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie2_indices.loc +#file has this format (longer white space characters are TAB characters): # -# <unique_build_id><dbkey><display_name><file_base_path> +#<unique_build_id><dbkey><display_name><file_base_path> # -# So, for example, if you had hg18 indexes stored in: +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie2/hg18/, +#then the bowtie2_indices.loc entry would look like this: # -# /depot/data2/galaxy/hg19/bowtie2/ +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18 # -# containing hg19 genome and hg19.*.bt2 files, such as: -# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa -# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 -# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 -# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 -# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +#and your /depot/data2/galaxy/bowtie2/hg18/ directory +#would contain hg18.*.ebwt files: # -# then the bowtie2_indices.loc entry could look like this: +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... # -#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +#Your bowtie2_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: # -#More examples: +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19 +#...etc... # -#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 -#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. # -# diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 tools/ngs_rna/tophat2_wrapper.xml --- a/tools/ngs_rna/tophat2_wrapper.xml +++ b/tools/ngs_rna/tophat2_wrapper.xml @@ -1,5 +1,5 @@ -<tool id="tophat2" name="Tophat2" version="0.6"> - <!-- Wrapper compatible with Tophat version 2.0.0+ --> +<tool id="tophat2" name="Tophat2" version="0.5"> + <!-- Wrapper compatible with Tophat version 2.0.0 --><description>Gapped-read mapper for RNA-seq data</description><version_command>tophat2 --version</version_command><requirements> @@ -126,6 +126,14 @@ </command><inputs> + <param name="jobname" type="text" value="Tophat2" size="80" label="Job title for outputs" + help="Output name to remind you what this was for"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="_" /> + </valid> + </sanitizer> + </param><conditional name="singlePaired"><param name="sPaired" type="select" label="Is this library mate-paired?"><option value="single">Single-end</option> @@ -268,25 +276,24 @@ </inputs><stdio> - <regex match="Exception|Error" source="both" level="fatal" description="Tool execution failed"/> + <regex match="Exception" source="both" level="fatal" description="Tool exception"/><regex match=".*" source="both" level="log" description="tool progress"/></stdio><outputs> - <data format="txt" name="align_summary" label="${tool.name} on ${on_string}: align_summary" from_work_dir="tophat_out/align_summary.txt"/> - <data format="tabular" name="fusions" label="${tool.name} on ${on_string}: fusions" from_work_dir="tophat_out/fusions.out"> + <data format="tabular" name="fusions" label="${on_string}_${jobname}_fusions.xls" from_work_dir="tophat_out/fusions.out"><filter>(params['settingsType'] == 'full' and params['fusion_search']['do_search'] == 'Yes')</filter></data> - <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed"> + <data format="bed" name="insertions" label="${on_string}_${jobname}_ins.bed" from_work_dir="tophat_out/insertions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed"> + <data format="bed" name="deletions" label="${on_string}_${jobname}_del.bed" from_work_dir="tophat_out/deletions.bed"><expand macro="dbKeyActions" /></data> - <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions" from_work_dir="tophat_out/junctions.bed"> + <data format="bed" name="junctions" label="${on_string}_${jobname}_splicejunc.bed" from_work_dir="tophat_out/junctions.bed"><expand macro="dbKeyActions" /></data> - <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits" from_work_dir="tophat_out/accepted_hits.bam"> + <data format="bam" name="accepted_hits" label="${on_string}_${jobname}_hits.bam" from_work_dir="tophat_out/accepted_hits.bam"><expand macro="dbKeyActions" /></data></outputs> @@ -313,7 +320,6 @@ </actions></macro></macros> - <tests><!-- Test base-space single-end reads with pre-built index and preset parameters --><test> @@ -452,8 +458,7 @@ <help> **Tophat Overview** -TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie(2), and then analyzes the mapping results to identify splice junctions between exons. Please cite: Kim D, Pertea G, Trapnell C, Pimentel H, Kelley R, and Salzberg SL. TopHat2: accurate alignment -of transcriptomes in the presence of insertions, deletions and gene fusions. Genome Biol 14:R36, 2013. +TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009). .. _Tophat: http://tophat.cbcb.umd.edu/ diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 tools/rgedgeR/rgToolFactory.py --- /dev/null +++ b/tools/rgedgeR/rgToolFactory.py @@ -0,0 +1,605 @@ +# rgToolFactory.py +# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 +# +# all rights reserved +# Licensed under the LGPL +# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# july 2013 +# added ability to combine images and individual log files into html output +# just make sure there's a log file foo.log and it will be output +# together with all images named like "foo_*.pdf +# otherwise old format for html +# +# January 2013 +# problem pointed out by Carlos Borroto +# added escaping for <>$ - thought I did that ages ago... +# +# August 11 2012 +# changed to use shell=False and cl as a sequence + +# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye. +# It also serves as the wrapper for the new tool. +# +# you paste and run your script +# Only works for simple scripts that read one input from the history. +# Optionally can write one new history dataset, +# and optionally collect any number of outputs into links on an autogenerated HTML page. + +# DO NOT install on a public or important site - please. + +# installed generated tools are fine if the script is safe. +# They just run normally and their user cannot do anything unusually insecure +# but please, practice safe toolshed. +# Read the fucking code before you install any tool +# especially this one + +# After you get the script working on some test data, you can +# optionally generate a toolshed compatible gzip file +# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for +# safe and largely automated installation in a production Galaxy. + +# If you opt for an HTML output, you get all the script outputs arranged +# as a single Html history item - all output files are linked, thumbnails for all the pdfs. +# Ugly but really inexpensive. +# +# Patches appreciated please. +# +# +# long route to June 2012 product +# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them +# derived from an integrated script model +# called rgBaseScriptWrapper.py +# Note to the unwary: +# This tool allows arbitrary scripting on your Galaxy as the Galaxy user +# There is nothing stopping a malicious user doing whatever they choose +# Extremely dangerous!! +# Totally insecure. So, trusted users only +# +# preferred model is a developer using their throw away workstation instance - ie a private site. +# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool. +# + +import sys +import shutil +import subprocess +import os +import time +import tempfile +import optparse +import tarfile +import re +import shutil +import math + +progname = os.path.split(sys.argv[0])[1] +myversion = 'V000.2 June 2012' +verbose = False +debug = False +toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory' + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +html_escape_table = { + "&": "&", + ">": ">", + "<": "<", + "$": "\$" + } + +def html_escape(text): + """Produce entities within text.""" + return "".join(html_escape_table.get(c,c) for c in text) + +def cmd_exists(cmd): + return subprocess.call("type " + cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + + +class ScriptRunner: + """class is a wrapper for an arbitrary script + """ + + def __init__(self,opts=None,treatbashSpecial=True): + """ + cleanup inputs, setup some outputs + + """ + self.useGM = cmd_exists('gm') + self.useIM = cmd_exists('convert') + self.useGS = cmd_exists('gs') + self.treatbashSpecial = treatbashSpecial + if opts.output_dir: # simplify for the tool tarball + os.chdir(opts.output_dir) + self.thumbformat = 'png' + self.opts = opts + self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but.. + self.toolid = self.toolname + self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later + self.pyfile = self.myname # crude but efficient - the cruft won't hurt much + self.xmlfile = '%s.xml' % self.toolname + s = open(self.opts.script_path,'r').readlines() + s = [x.rstrip() for x in s] # remove pesky dos line endings if needed + self.script = '\n'.join(s) + fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter)) + tscript = open(self.sfile,'w') # use self.sfile as script source for Popen + tscript.write(self.script) + tscript.close() + self.indentedScript = '\n'.join([' %s' % x for x in s]) # for restructured text in help + self.escapedScript = '\n'.join([html_escape(x) for x in s]) + self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname) + if opts.output_dir: # may not want these complexities + self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname) + art = '%s.%s' % (self.toolname,opts.interpreter) + artpath = os.path.join(self.opts.output_dir,art) # need full path + artifact = open(artpath,'w') # use self.sfile as script source for Popen + artifact.write(self.script) + artifact.close() + self.cl = [] + self.html = [] + a = self.cl.append + a(opts.interpreter) + if self.treatbashSpecial and opts.interpreter in ['bash','sh']: + a(self.sfile) + else: + a('-') # stdin + a(opts.input_tab) + a(opts.output_tab) + self.outFormats = 'tabular' # TODO make this an option at tool generation time + self.inputFormats = 'tabular' # TODO make this an option at tool generation time + self.test1Input = '%s_test1_input.xls' % self.toolname + self.test1Output = '%s_test1_output.xls' % self.toolname + self.test1HTML = '%s_test1_output.html' % self.toolname + + def makeXML(self): + """ + Create a Galaxy xml tool wrapper for the new script as a string to write out + fixme - use templating or something less fugly than this example of what we produce + + <tool id="reverse" name="reverse" version="0.01"> + <description>a tabular file</description> + <command interpreter="python"> + reverse.py --script_path "$runMe" --interpreter "python" + --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file" + </command> + <inputs> + <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/> + + </inputs> + <outputs> + <data format="tabular" name="tab_file" label="${job_name}"/> + + </outputs> + <help> + +**What it Does** + +Reverse the columns in a tabular file + + </help> + <configfiles> + <configfile name="runMe"> + +# reverse order of columns in a tabular file +import sys +inp = sys.argv[1] +outp = sys.argv[2] +i = open(inp,'r') +o = open(outp,'w') +for row in i: + rs = row.rstrip().split('\t') + rs.reverse() + o.write('\t'.join(rs)) + o.write('\n') +i.close() +o.close() + + + </configfile> + </configfiles> + </tool> + + """ + newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s"> + %(tooldesc)s + %(command)s + <inputs> + %(inputs)s + </inputs> + <outputs> + %(outputs)s + </outputs> + <configfiles> + <configfile name="runMe"> + %(script)s + </configfile> + </configfiles> + %(tooltests)s + <help> + %(help)s + </help> + </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto + + newCommand="""<command interpreter="python"> + %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" + --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s + </command>""" # may NOT be an input or htmlout + tooltestsTabOnly = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="tab_file" file="%(test1Output)s" ftype="tabular"/> + </test></tests>""" + tooltestsHTMLOnly = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/> + </test></tests>""" + tooltestsBoth = """<tests><test> + <param name="input1" value="%(test1Input)s" ftype="tabular"/> + <param name="job_name" value="test1"/> + <param name="runMe" value="$runMe"/> + <output name="tab_file" file="%(test1Output)s" ftype="tabular" /> + <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/> + </test></tests>""" + xdict = {} + xdict['tool_version'] = self.opts.tool_version + xdict['test1Input'] = self.test1Input + xdict['test1HTML'] = self.test1HTML + xdict['test1Output'] = self.test1Output + if self.opts.make_HTML and self.opts.output_tab <> 'None': + xdict['tooltests'] = tooltestsBoth % xdict + elif self.opts.make_HTML: + xdict['tooltests'] = tooltestsHTMLOnly % xdict + else: + xdict['tooltests'] = tooltestsTabOnly % xdict + xdict['script'] = self.escapedScript + # configfile is least painful way to embed script to avoid external dependencies + # but requires escaping of <, > and $ to avoid Mako parsing + if self.opts.help_text: + xdict['help'] = open(self.opts.help_text,'r').read() + else: + xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation' + coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::'] + coda.append(self.indentedScript) + coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow())) + coda.append('See %s for details of that project' % (toolFactoryURL)) + coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ') + coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573') + xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda)) + if self.opts.tool_desc: + xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc + else: + xdict['tooldesc'] = '' + xdict['command_outputs'] = '' + xdict['outputs'] = '' + if self.opts.input_tab <> 'None': + xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something + xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats + else: + xdict['command_inputs'] = '' # assume no input - eg a random data generator + xdict['inputs'] = '' + xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname + xdict['toolname'] = self.toolname + xdict['toolid'] = self.toolid + xdict['interpreter'] = self.opts.interpreter + xdict['scriptname'] = self.sfile + if self.opts.make_HTML: + xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" ' + xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n' + if self.opts.output_tab <> 'None': + xdict['command_outputs'] += ' --output_tab "$tab_file"' + xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats + xdict['command'] = newCommand % xdict + xmls = newXML % xdict + xf = open(self.xmlfile,'w') + xf.write(xmls) + xf.write('\n') + xf.close() + # ready for the tarball + + + def makeTooltar(self): + """ + a tool is a gz tarball with eg + /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ... + """ + retval = self.run() + if retval: + print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry' + sys.exit(1) + self.makeXML() + tdir = self.toolname + os.mkdir(tdir) + if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe.. + testdir = os.path.join(tdir,'test-data') + os.mkdir(testdir) # make tests directory + shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input)) + if self.opts.output_tab <> 'None': + shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output)) + if self.opts.make_HTML: + shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML)) + if self.opts.output_dir: + shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log')) + op = '%s.py' % self.toolname # new name + outpiname = os.path.join(tdir,op) # path for the tool tarball + pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM) + notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),] + notes.append('# to make a new Galaxy tool called %s\n' % self.toolname) + notes.append('# User %s at %s\n' % (self.opts.user_email,timenow())) + pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm + notes += pi + outpi = open(outpiname,'w') + outpi.write(''.join(notes)) + outpi.write('\n') + outpi.close() + stname = os.path.join(tdir,self.sfile) + if not os.path.exists(stname): + shutil.copyfile(self.sfile, stname) + xtname = os.path.join(tdir,self.xmlfile) + if not os.path.exists(xtname): + shutil.copyfile(self.xmlfile,xtname) + tarpath = "%s.gz" % self.toolname + tar = tarfile.open(tarpath, "w:gz") + tar.add(tdir,arcname=self.toolname) + tar.close() + shutil.copyfile(tarpath,self.opts.new_tool) + shutil.rmtree(tdir) + ## TODO: replace with optional direct upload to local toolshed? + return retval + + + def compressPDF(self,inpdf=None,thumbformat='png'): + """need absolute path to pdf + """ + assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) + hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname) + sto = open(hlog,'w') + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] + x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval1 = x.wait() + if retval1 == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) + if self.useGM: + cl2 = ['gm convert', inpdf, outpng] + else: # assume imagemagick + cl2 = ['convert', inpdf, outpng] + x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + retval2 = x.wait() + sto.close() + retval = retval1 or retval2 + return retval + + + def getfSize(self,fpath,outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath,fpath) + if os.path.isfile(fp): + size = '0 B' + n = float(os.path.getsize(fp)) + if n > 2**20: + size = '%1.1f MB' % (n/2**20) + elif n > 2**10: + size = '%1.1f KB' % (n/2**10) + elif n > 0: + size = '%d B' % (int(n)) + return size + + def makeHtml(self): + """ Create an HTML file content to list all the artifacts found in the output_dir + """ + + galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> + <title></title> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + </head> + <body> + <div class="toolFormBody"> + """ + galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" + galhtmlpostfix = """</div></body></html>\n""" + + flist = os.listdir(self.opts.output_dir) + flist = [x for x in flist if x <> 'Rplots.pdf'] + flist.sort() + html = [] + html.append(galhtmlprefix % progname) + html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow())) + fhtml = [] + if len(flist) > 0: + logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections + logfiles.sort() + logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)] + logfiles.append(os.path.abspath(self.tlog)) # make it the last one + pdflist = [] + npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) + for rownum,fname in enumerate(flist): + dname,e = os.path.splitext(fname) + sfsize = self.getfSize(fname,self.opts.output_dir) + if e.lower() == '.pdf' : # compress and make a thumbnail + thumb = '%s.%s' % (dname,self.thumbformat) + pdff = os.path.join(self.opts.output_dir,fname) + retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) + if retval == 0: + pdflist.append((fname,thumb)) + if (rownum+1) % 2 == 0: + fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + else: + fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) + for logfname in logfiles: # expect at least tlog - if more + if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later + sectionname = 'All tool run' + if (len(logfiles) > 1): + sectionname = 'Other' + ourpdfs = pdflist + else: + realname = os.path.basename(logfname) + sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log + ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] + pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove + nacross = 1 + npdf = len(ourpdfs) + + if npdf > 0: + nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) + if int(nacross)**2 != npdf: + nacross += 1 + nacross = int(nacross) + width = min(400,int(1200/nacross)) + html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) + html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') + ntogo = nacross # counter for table row padding with empty cells + html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') + for i,paths in enumerate(ourpdfs): + fname,thumb = paths + s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" + alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) + if ((i+1) % nacross == 0): + s += '</tr>\n' + ntogo = 0 + if i < (npdf - 1): # more to come + s += '<tr>' + ntogo = nacross + else: + ntogo -= 1 + html.append(s) + if html[-1].strip().endswith('</tr>'): + html.append('</table></div>\n') + else: + if ntogo > 0: # pad + html.append('<td> </td>'*ntogo) + html.append('</tr></table></div>\n') + logt = open(logfname,'r').readlines() + logtext = [x for x in logt if x.strip() > ''] + html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) + if len(logtext) > 1: + html.append('\n<pre>\n') + html += logtext + html.append('\n</pre>\n') + else: + html.append('%s is empty<br/>' % logfname) + if len(fhtml) > 0: + fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') + fhtml.append('</table></div><br/>') + html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') + html += fhtml # add all non-pdf files to the end of the display + else: + html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) + html.append(galhtmlpostfix) + htmlf = file(self.opts.output_html,'w') + htmlf.write('\n'.join(html)) + htmlf.write('\n') + htmlf.close() + self.html = html + + + def run(self): + """ + scripts must be small enough not to fill the pipe! + """ + if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: + retval = self.runBash() + else: + if self.opts.output_dir: + ste = open(self.elog,'w') + sto = open(self.tlog,'w') + sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) + p.stdin.write(self.script) + p.stdin.close() + retval = p.wait() + if self.opts.output_dir: + sto.close() + ste.close() + err = open(self.elog,'r').readlines() + if retval <> 0 and err: # problem + print >> sys.stderr,err + if self.opts.make_HTML: + self.makeHtml() + return retval + + def runBash(self): + """ + cannot use - for bash so use self.sfile + """ + if self.opts.output_dir: + s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) + sto = open(self.tlog,'w') + sto.write(s) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) + else: + p = subprocess.Popen(self.cl,shell=False) + retval = p.wait() + if self.opts.output_dir: + sto.close() + if self.opts.make_HTML: + self.makeHtml() + return retval + + +def main(): + u = """ + This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: + <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" + </command> + """ + op = optparse.OptionParser() + a = op.add_option + a('--script_path',default=None) + a('--tool_name',default=None) + a('--interpreter',default=None) + a('--output_dir',default=None) + a('--output_html',default=None) + a('--input_tab',default="None") + a('--output_tab',default="None") + a('--user_email',default='Unknown') + a('--bad_user',default=None) + a('--make_Tool',default=None) + a('--make_HTML',default=None) + a('--help_text',default=None) + a('--tool_desc',default=None) + a('--new_tool',default=None) + a('--tool_version',default=None) + opts, args = op.parse_args() + assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) + assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq' + assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript' + assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' + if opts.output_dir: + try: + os.makedirs(opts.output_dir) + except: + pass + r = ScriptRunner(opts) + if opts.make_Tool: + retcode = r.makeTooltar() + else: + retcode = r.run() + os.unlink(r.sfile) + if retcode: + sys.exit(retcode) # indicate failure to job runner + + +if __name__ == "__main__": + main() + + diff -r 009088d5e76fb00794da78dc0ee3cdaa8524b7d8 -r f830e3d7c6c096a9a840dad523540bdcc84327b2 tools/rgedgeR/rgedgeRpaired.xml --- /dev/null +++ b/tools/rgedgeR/rgedgeRpaired.xml @@ -0,0 +1,1079 @@ +<tool id="rgDifferentialCount" name="Differential_Count" version="0.20"> + <description>models using BioConductor packages</description> + <requirements> + <requirement type="package" version="2.12">biocbasics</requirement> + <requirement type="package" version="3.0.1">r3</requirement> + <requirement type="package" version="1.3.18">graphicsmagick</requirement> + <requirement type="package" version="9.07">ghostscript</requirement> + </requirements> + + <command interpreter="python"> + rgToolFactory.py --script_path "$runme" --interpreter "Rscript" --tool_name "DifferentialCounts" + --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" + </command> + <inputs> + <param name="input1" type="data" format="tabular" label="Select an input matrix - rows are contigs, columns are counts for each sample" + help="Use the HTSeq based count matrix preparation tool to create these matrices from BAM/SAM files and a GTF file of genomic features"/> + <param name="title" type="text" value="Differential Counts" size="80" label="Title for job outputs" + help="Supply a meaningful name here to remind you what the outputs contain"> + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="treatment_name" type="text" value="Treatment" size="50" label="Treatment Name"/> + <param name="Treat_cols" label="Select columns containing treatment." type="data_column" data_ref="input1" numerical="True" + multiple="true" use_header_names="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column."/> + </param> + <param name="control_name" type="text" value="Control" size="50" label="Control Name"/> + <param name="Control_cols" label="Select columns containing control." type="data_column" data_ref="input1" numerical="True" + multiple="true" use_header_names="true" size="120" display="checkboxes" optional="true"> + </param> + <param name="subjectids" type="text" optional="true" size="120" value = "" + label="IF SUBJECTS NOT ALL INDEPENDENT! Enter comma separated strings to indicate sample labels for (eg) pairing - must be one for every column in input" + help="Leave blank if no pairing, but eg if data from sample id A99 is in columns 2,4 and id C21 is in 3,5 then enter 'A99,C21,A99,C21'"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="," /></valid> + </sanitizer> + </param> + <param name="fQ" type="float" value="0.3" size="5" label="Non-differential contig count quantile threshold - zero to analyze all non-zero read count contigs" + help="May be a good or a bad idea depending on the biology and the question. EG 0.3 = sparsest 30% of contigs with at least one read are removed before analysis"/> + <param name="useNDF" type="boolean" truevalue="T" falsevalue="F" checked="false" size="1" + label="Non differential filter - remove contigs below a threshold (1 per million) for half or more samples" + help="May be a good or a bad idea depending on the biology and the question. This was the old default. Quantile based is available as an alternative"/> + + <conditional name="edgeR"> + <param name="doedgeR" type="select" + label="Run this model using edgeR" + help="edgeR uses a negative binomial model and seems to be powerful, even with few replicates"> + <option value="F">Do not run edgeR</option> + <option value="T" selected="true">Run edgeR</option> + </param> + <when value="T"> + <param name="edgeR_priordf" type="integer" value="20" size="3" + label="prior.df for tagwise dispersion - lower value = more emphasis on each tag's variance. Replaces prior.n and prior.df = prior.n * residual.df" + help="0 = Use edgeR default. Use a small value to 'smooth' small samples. See edgeR docs and note below"/> + </when> + <when value="F"></when> + </conditional> + <conditional name="DESeq2"> + <param name="doDESeq2" type="select" + label="Run the same model with DESeq2 and compare findings" + help="DESeq2 is an update to the DESeq package. It uses different assumptions and methods to edgeR"> + <option value="F" selected="true">Do not run DESeq2</option> + <option value="T">Run DESeq2</option> + </param> + <when value="T"> + <param name="DESeq_fitType" type="select"> + <option value="parametric" selected="true">Parametric (default) fit for dispersions</option> + <option value="local">Local fit - this will automagically be used if parametric fit fails</option> + <option value="mean">Mean dispersion fit- use this if you really understand what you're doing - read the fine manual linked below in the documentation</option> + </param> + </when> + <when value="F"></when> + </conditional> + <param name="doVoom" type="select" + label="Run the same model with Voom/limma and compare findings" + help="Voom uses counts per million and a precise transformation of variance so count data can be analysed using limma"> + <option value="F" selected="true">Do not run VOOM</option> + <option value="T">Run VOOM</option> + </param> + <conditional name="camera"> + <param name="doCamera" type="select" label="Run the edgeR implementation of Camera GSEA for up/down gene sets" + help="If yes, you can choose a set of genesets to test and/or supply a gmt format geneset collection from your history"> + <option value="F" selected="true">Do not run GSEA tests with the Camera algorithm</option> + <option value="T">Run GSEA tests with the Camera algorithm</option> + </param> + <when value="T"> + <conditional name="gmtSource"> + <param name="refgmtSource" type="select" + label="Use a gene set (.gmt) from your history and/or use a built-in (MSigDB etc) gene set"> + <option value="indexed" selected="true">Use a built-in gene set</option> + <option value="history">Use a gene set from my history</option> + <option value="both">Add a gene set from my history to a built in gene set</option> + </param> + <when value="indexed"> + <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis"> + <options from_data_table="gseaGMT_3.1"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No GMT v3.1 files are available - please install them"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownGMT" type="data" format="gmt" label="Select a Gene Set from your history" /> + </when> + <when value="both"> + <param name="ownGMT" type="data" format="gseagmt" label="Select a Gene Set from your history" /> + <param name="builtinGMT" type="select" label="Select a gene set matrix (.gmt) file to use for the analysis"> + <options from_data_table="gseaGMT_4"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No GMT v4 files are available - please fix tool_data_table and loc files"/> + </options> + </param> + </when> + </conditional> + </when> + <when value="F"> + </when> + </conditional> + <param name="fdrthresh" type="float" value="0.05" size="5" label="P value threshold for FDR filtering for amily wise error rate control" + help="Conventional default value of 0.05 recommended"/> + <param name="fdrtype" type="select" label="FDR (Type II error) control method" + help="Use fdr or bh typically to control for the number of tests in a reliable way"> + <option value="fdr" selected="true">fdr</option> + <option value="BH">Benjamini Hochberg</option> + <option value="BY">Benjamini Yukateli</option> + <option value="bonferroni">Bonferroni</option> + <option value="hochberg">Hochberg</option> + <option value="holm">Holm</option> + <option value="hommel">Hommel</option> + <option value="none">no control for multiple tests</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="out_edgeR" label="${title}_topTable_edgeR.xls"> + <filter>edgeR['doedgeR'] == "T"</filter> + </data> + <data format="tabular" name="out_DESeq2" label="${title}_topTable_DESeq2.xls"> + <filter>DESeq2['doDESeq2'] == "T"</filter> + </data> + <data format="tabular" name="out_VOOM" label="${title}_topTable_VOOM.xls"> + <filter>doVoom == "T"</filter> + </data> + <data format="html" name="html_file" label="${title}.html"/> + </outputs> + <stdio> + <exit_code range="4" level="fatal" description="Number of subject ids must match total number of samples in the input matrix" /> + </stdio> + <tests> +<test> +<param name='input1' value='test_bams2mx.xls' ftype='tabular' /> + <param name='treatment_name' value='case' /> + <param name='title' value='edgeRtest' /> + <param name='useNDF' value='' /> + <param name='doedgeR' value='T' /> + <param name='doVoom' value='T' /> + <param name='doDESeq2' value='T' /> + <param name='fdrtype' value='fdr' /> + <param name='edgeR_priordf' value="8" /> + <param name='fdrthresh' value="0.05" /> + <param name='control_name' value='control' /> + <param name='subjectids' value='' /> + <param name='Treat_cols' value='3,4,5,9' /> + <param name='Control_cols' value='2,6,7,8' /> + <output name='out_edgeR' file='edgeRtest1out.xls' compare='diff' /> + <output name='html_file' file='edgeRtest1out.html' compare='diff' lines_diff='20' /> +</test> +</tests> + +<configfiles> +<configfile name="runme"> +<![CDATA[ +# +# edgeR.Rscript +# updated npv 2011 for R 2.14.0 and edgeR 2.4.0 by ross +# Performs DGE on a count table containing n replicates of two conditions +# +# Parameters +# +# 1 - Output Dir + +# Original edgeR code by: S.Lunke and A.Kaspi +reallybig = log10(.Machine\$double.xmax) +reallysmall = log10(.Machine\$double.xmin) +library('stringr') +library('gplots') +library('edgeR') +hmap2 = function(cmat,nsamp=100,outpdfname='heatmap2.pdf', TName='Treatment',group=NA,myTitle='title goes here') +{ +# Perform clustering for significant pvalues after controlling FWER + samples = colnames(cmat) + gu = unique(group) + gn = rownames(cmat) + if (length(gu) == 2) { + col.map = function(g) {if (g==gu[1]) "#FF0000" else "#0000FF"} + pcols = unlist(lapply(group,col.map)) + } else { + colours = rainbow(length(gu),start=0,end=4/6) + pcols = colours[match(group,gu)] } + dm = cmat[(! is.na(gn)),] + # remove unlabelled hm rows + nprobes = nrow(dm) + # sub = paste('Showing',nprobes,'contigs ranked for evidence of differential abundance') + if (nprobes > nsamp) { + dm =dm[1:nsamp,] + #sub = paste('Showing',nsamp,'contigs ranked for evidence for differential abundance out of',nprobes,'total') + } + newcolnames = substr(colnames(dm),1,20) + colnames(dm) = newcolnames + pdf(outpdfname) + heatmap.2(dm,main=myTitle,ColSideColors=pcols,col=topo.colors(100),dendrogram="col",key=T,density.info='none', + Rowv=F,scale='row',trace='none',margins=c(8,8),cexRow=0.4,cexCol=0.5) + dev.off() +} + +hmap = function(cmat,nmeans=4,outpdfname="heatMap.pdf",nsamp=250,TName='Treatment',group=NA,myTitle="Title goes here") +{ + # for 2 groups only was + #col.map = function(g) {if (g==TName) "#FF0000" else "#0000FF"} + #pcols = unlist(lapply(group,col.map)) + gu = unique(group) + colours = rainbow(length(gu),start=0.3,end=0.6) + pcols = colours[match(group,gu)] + nrows = nrow(cmat) + mtitle = paste(myTitle,'Heatmap: n contigs =',nrows) + if (nrows > nsamp) { + cmat = cmat[c(1:nsamp),] + mtitle = paste('Heatmap: Top ',nsamp,' DE contigs (of ',nrows,')',sep='') + } + newcolnames = substr(colnames(cmat),1,20) + colnames(cmat) = newcolnames + pdf(outpdfname) + heatmap(cmat,scale='row',main=mtitle,cexRow=0.3,cexCol=0.4,Rowv=NA,ColSideColors=pcols) + dev.off() +} + +qqPlot = function(descr='qqplot',pvector, outpdf='qqplot.pdf',...) +# stolen from https://gist.github.com/703512 +{ + o = -log10(sort(pvector,decreasing=F)) + e = -log10( 1:length(o)/length(o) ) + o[o==-Inf] = reallysmall + o[o==Inf] = reallybig + maint = descr + pdf(outpdf) + plot(e,o,pch=19,cex=1, main=maint, ..., + xlab=expression(Expected~~-log[10](italic(p))), + ylab=expression(Observed~~-log[10](italic(p))), + xlim=c(0,max(e)), ylim=c(0,max(o))) + lines(e,e,col="red") + grid(col = "lightgray", lty = "dotted") + dev.off() +} + +smearPlot = function(DGEList,deTags, outSmear, outMain) + { + pdf(outSmear) + plotSmear(DGEList,de.tags=deTags,main=outMain) + grid(col="lightgray", lty="dotted") + dev.off() + } + +boxPlot = function(rawrs,cleanrs,maint,myTitle,pdfname) +{ # + nc = ncol(rawrs) + for (i in c(1:nc)) {rawrs[(rawrs[,i] < 0),i] = NA} + fullnames = colnames(rawrs) + newcolnames = substr(colnames(rawrs),1,20) + colnames(rawrs) = newcolnames + newcolnames = substr(colnames(cleanrs),1,20) + colnames(cleanrs) = newcolnames + defpar = par(no.readonly=T) + print.noquote('raw contig counts by sample:') + print.noquote(summary(rawrs)) + print.noquote('normalised contig counts by sample:') + print.noquote(summary(cleanrs)) + pdf(pdfname) + par(mfrow=c(1,2)) + boxplot(rawrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('Raw:',maint)) + grid(col="lightgray",lty="dotted") + boxplot(cleanrs,varwidth=T,notch=T,ylab='log contig count',col="maroon",las=3,cex.axis=0.35,main=paste('After ',maint)) + grid(col="lightgray",lty="dotted") + dev.off() + pdfname = "sample_counts_histogram.pdf" + nc = ncol(rawrs) + print.noquote(paste('Using ncol rawrs=',nc)) + ncroot = round(sqrt(nc)) + if (ncroot*ncroot < nc) { ncroot = ncroot + 1 } + m = c() + for (i in c(1:nc)) { + rhist = hist(rawrs[,i],breaks=100,plot=F) + m = append(m,max(rhist\$counts)) + } + ymax = max(m) + ncols = length(fullnames) + if (ncols > 20) + { + scale = 7*ncols/20 + pdf(pdfname,width=scale,height=scale) + } else { + pdf(pdfname) + } + par(mfrow=c(ncroot,ncroot)) + for (i in c(1:nc)) { + hist(rawrs[,i], main=paste("Contig logcount",i), xlab='log raw count', col="maroon", + breaks=100,sub=fullnames[i],cex=0.8,ylim=c(0,ymax)) + } + dev.off() + par(defpar) + +} + +cumPlot = function(rawrs,cleanrs,maint,myTitle) +{ # updated to use ecdf + pdfname = "Filtering_rowsum_bar_charts.pdf" + defpar = par(no.readonly=T) + lrs = log(rawrs,10) + lim = max(lrs) + pdf(pdfname) + par(mfrow=c(2,1)) + hist(lrs,breaks=100,main=paste('Before:',maint),xlab="# Reads (log)", + ylab="Count",col="maroon",sub=myTitle, xlim=c(0,lim),las=1) + grid(col="lightgray", lty="dotted") + lrs = log(cleanrs,10) + hist(lrs,breaks=100,main=paste('After:',maint),xlab="# Reads (log)", + ylab="Count",col="maroon",sub=myTitle,xlim=c(0,lim),las=1) + grid(col="lightgray", lty="dotted") + dev.off() + par(defpar) +} + +cumPlot1 = function(rawrs,cleanrs,maint,myTitle) +{ # updated to use ecdf + pdfname = paste(gsub(" ","", myTitle , fixed=TRUE),"RowsumCum.pdf",sep='_') + pdf(pdfname) + par(mfrow=c(2,1)) + lastx = max(rawrs) + rawe = knots(ecdf(rawrs)) + cleane = knots(ecdf(cleanrs)) + cy = 1:length(cleane)/length(cleane) + ry = 1:length(rawe)/length(rawe) + plot(rawe,ry,type='l',main=paste('Before',maint),xlab="Log Contig Total Reads", + ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) + grid(col="blue") + plot(cleane,cy,type='l',main=paste('After',maint),xlab="Log Contig Total Reads", + ylab="Cumulative proportion",col="maroon",log='x',xlim=c(1,lastx),sub=myTitle) + grid(col="blue") + dev.off() +} + + + +doGSEAold = function(y=NULL,design=NULL,histgmt="", + bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") +{ + sink('Camera.log') + genesets = c() + if (bigmt > "") + { + bigenesets = readLines(bigmt) + genesets = bigenesets + } + if (histgmt > "") + { + hgenesets = readLines(histgmt) + if (bigmt > "") { + genesets = rbind(genesets,hgenesets) + } else { + genesets = hgenesets + } # use only history if no bi + } + print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) + genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n + outf = outfname + head=paste(myTitle,'edgeR GSEA') + write(head,file=outfname,append=F) + ntest=length(genesets) + urownames = toupper(rownames(y)) + upcam = c() + downcam = c() + for (i in 1:ntest) { + gs = unlist(genesets[i]) + g = gs[1] # geneset_id + u = gs[2] + if (u > "") { u = paste("<a href=\'",u,"\'>",u,"</a>",sep="") } + glist = gs[3:length(gs)] # member gene symbols + glist = toupper(glist) + inglist = urownames %in% glist + nin = sum(inglist) + if ((nin > minnin) && (nin < maxnin)) { + ### print(paste('@@found',sum(inglist),'genes in glist')) + camres = camera(y=y,index=inglist,design=design) + if (! is.null(camres)) { + rownames(camres) = g # gene set name + camres = cbind(GeneSet=g,URL=u,camres) + if (camres\$Direction == "Up") + { + upcam = rbind(upcam,camres) } else { + downcam = rbind(downcam,camres) + } + } + } + } + uscam = upcam[order(upcam\$PValue),] + unadjp = uscam\$PValue + uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + nup = max(10,sum((uscam\$adjPValue < fdrthresh))) + dscam = downcam[order(downcam\$PValue),] + unadjp = dscam\$PValue + dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) + write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) + write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) + write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) + write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) + sink() +} + + + + +doGSEA = function(y=NULL,design=NULL,histgmt="", + bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + ntest=0, myTitle="myTitle", outfname="GSEA.xls", minnin=5, maxnin=2000,fdrthresh=0.05,fdrtype="BH") +{ + sink('Camera.log') + genesets = c() + if (bigmt > "") + { + bigenesets = readLines(bigmt) + genesets = bigenesets + } + if (histgmt > "") + { + hgenesets = readLines(histgmt) + if (bigmt > "") { + genesets = rbind(genesets,hgenesets) + } else { + genesets = hgenesets + } # use only history if no bi + } + print.noquote(paste("@@@read",length(genesets), 'genesets from',histgmt,bigmt)) + genesets = strsplit(genesets,'\t') # tabular. genesetid\tURLorwhatever\tgene_1\t..\tgene_n + outf = outfname + head=paste(myTitle,'edgeR GSEA') + write(head,file=outfname,append=F) + ntest=length(genesets) + urownames = toupper(rownames(y)) + upcam = c() + downcam = c() + incam = c() + urls = c() + gsids = c() + for (i in 1:ntest) { + gs = unlist(genesets[i]) + gsid = gs[1] # geneset_id + url = gs[2] + if (url > "") { url = paste("<a href=\'",url,"\'>",url,"</a>",sep="") } + glist = gs[3:length(gs)] # member gene symbols + glist = toupper(glist) + inglist = urownames %in% glist + nin = sum(inglist) + if ((nin > minnin) && (nin < maxnin)) { + incam = c(incam,inglist) + gsids = c(gsids,gsid) + urls = c(urls,url) + } + } + incam = as.list(incam) + names(incam) = gsids + allcam = camera(y=y,index=incam,design=design) + allcamres = cbind(geneset=gsids,allcam,URL=urls) + for (i in 1:ntest) { + camres = allcamres[i] + res = try(test = (camres\$Direction == "Up")) + if ("try-error" %in% class(res)) { + cat("test failed, camres = :") + print.noquote(camres) + } else { if (camres\$Direction == "Up") + { upcam = rbind(upcam,camres) + } else { downcam = rbind(downcam,camres) + } + + } + } + uscam = upcam[order(upcam\$PValue),] + unadjp = uscam\$PValue + uscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + nup = max(10,sum((uscam\$adjPValue < fdrthresh))) + dscam = downcam[order(downcam\$PValue),] + unadjp = dscam\$PValue + dscam\$adjPValue = p.adjust(unadjp,method=fdrtype) + ndown = max(10,sum((dscam\$adjPValue < fdrthresh))) + write.table(uscam,file=paste('camera_up',outfname,sep='_'),quote=F,sep='\t',row.names=F) + write.table(dscam,file=paste('camera_down',outfname,sep='_'),quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera up top',nup,'gene sets:')) + write.table(head(uscam,nup),file="",quote=F,sep='\t',row.names=F) + print.noquote(paste('@@@@@ Camera down top',ndown,'gene sets:')) + write.table(head(dscam,ndown),file="",quote=F,sep='\t',row.names=F) + sink() + } + + +edgeIt = function (Count_Matrix=c(),group=c(),out_edgeR=F,out_VOOM=F,out_DESeq2=F,fdrtype='fdr',priordf=5, + fdrthresh=0.05,outputdir='.', myTitle='Differential Counts',libSize=c(),useNDF=F, + filterquantile=0.2, subjects=c(),mydesign=NULL, + doDESeq2=T,doVoom=T,doCamera=T,doedgeR=T,org='hg19', + histgmt="", bigmt="/data/genomes/gsea/3.1/Abetterchoice_nocgp_c2_c3_c5_symbols_all.gmt", + doCook=F,DESeq_fitType="parameteric") +{ + # Error handling + if (length(unique(group))!=2){ + print("Number of conditions identified in experiment does not equal 2") + q() + } + require(edgeR) + options(width = 512) + mt = paste(unlist(strsplit(myTitle,'_')),collapse=" ") + allN = nrow(Count_Matrix) + nscut = round(ncol(Count_Matrix)/2) + colTotmillionreads = colSums(Count_Matrix)/1e6 + counts.dataframe = as.data.frame(c()) + rawrs = rowSums(Count_Matrix) + nonzerod = Count_Matrix[(rawrs > 0),] # remove all zero count genes + nzN = nrow(nonzerod) + nzrs = rowSums(nonzerod) + zN = allN - nzN + print('# Quantiles for non-zero row counts:',quote=F) + print(quantile(nzrs,probs=seq(0,1,0.1)),quote=F) + if (useNDF == T) + { + gt1rpin3 = rowSums(Count_Matrix/expandAsMatrix(colTotmillionreads,dim(Count_Matrix)) >= 1) >= nscut + lo = colSums(Count_Matrix[!gt1rpin3,]) + workCM = Count_Matrix[gt1rpin3,] + cleanrs = rowSums(workCM) + cleanN = length(cleanrs) + meth = paste( "After removing",length(lo),"contigs with fewer than ",nscut," sample read counts >= 1 per million, there are",sep="") + print(paste("Read",allN,"contigs. Removed",zN,"contigs with no reads.",meth,cleanN,"contigs"),quote=F) + maint = paste('Filter >=1/million reads in >=',nscut,'samples') + } else { + useme = (nzrs > quantile(nzrs,filterquantile)) + workCM = nonzerod[useme,] + lo = colSums(nonzerod[!useme,]) + cleanrs = rowSums(workCM) + cleanN = length(cleanrs) + meth = paste("After filtering at count quantile =",filterquantile,", there are",sep="") + print(paste('Read',allN,"contigs. Removed",zN,"with no reads.",meth,cleanN,"contigs"),quote=F) + maint = paste('Filter below',filterquantile,'quantile') + } + cumPlot(rawrs=rawrs,cleanrs=cleanrs,maint=maint,myTitle=myTitle) + allgenes = rownames(workCM) + reg = "^chr([0-9]+):([0-9]+)-([0-9]+)" + genecards="<a href=\'http://www.genecards.org/index.php?path=/Search/keyword/" + ucsc = paste("<a href=\'http://genome.ucsc.edu/cgi-bin/hgTracks?db=",org,sep='') + testreg = str_match(allgenes,reg) + if (sum(!is.na(testreg[,1]))/length(testreg[,1]) > 0.8) # is ucsc style string + { + print("@@ using ucsc substitution for urls") + contigurls = paste0(ucsc,"&position=chr",testreg[,2],":",testreg[,3],"-",testreg[,4],"\'>",allgenes,"</a>") + } else { + print("@@ using genecards substitution for urls") + contigurls = paste0(genecards,allgenes,"\'>",allgenes,"</a>") + } + print.noquote("# urls") + print.noquote(head(contigurls)) + print(paste("# Total low count contigs per sample = ",paste(lo,collapse=',')),quote=F) + cmrowsums = rowSums(workCM) + TName=unique(group)[1] + CName=unique(group)[2] + if (is.null(mydesign)) { + if (length(subjects) == 0) + { + mydesign = model.matrix(~group) + } + else { + subjf = factor(subjects) + mydesign = model.matrix(~subjf+group) # we block on subject so make group last to simplify finding it + } + } + print.noquote(paste('Using samples:',paste(colnames(workCM),collapse=','))) + print.noquote('Using design matrix:') + print.noquote(mydesign) + if (doedgeR) { + sink('edgeR.log') + #### Setup DGEList object + DGEList = DGEList(counts=workCM, group = group) + DGEList = calcNormFactors(DGEList) + + DGEList = estimateGLMCommonDisp(DGEList,mydesign) + comdisp = DGEList\$common.dispersion + DGEList = estimateGLMTrendedDisp(DGEList,mydesign) + if (edgeR_priordf > 0) { + print.noquote(paste("prior.df =",edgeR_priordf)) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign,prior.df = edgeR_priordf) + } else { + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + } + DGLM = glmFit(DGEList,design=mydesign) + DE = glmLRT(DGLM,coef=ncol(DGLM\$design)) # always last one - subject is first if needed + efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors + normData = (1e+06*DGEList\$counts/efflib) + uoutput = cbind( + Name=as.character(rownames(DGEList\$counts)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums,normData, + DGEList\$counts + ) + soutput = uoutput[order(DE\$table\$PValue),] # sorted into p value order - for quick toptable + goodness = gof(DGLM, pcutoff=fdrthresh) + if (sum(goodness\$outlier) > 0) { + print.noquote('GLM outliers:') + print(paste(rownames(DGLM)[(goodness\$outlier)],collapse=','),quote=F) + } else { + print('No GLM fit outlier genes found\n') + } + z = limma::zscoreGamma(goodness\$gof.statistic, shape=goodness\$df/2, scale=2) + pdf("edgeR_GoodnessofFit.pdf") + qq = qqnorm(z, panel.first=grid(), main="tagwise dispersion") + abline(0,1,lwd=3) + points(qq\$x[goodness\$outlier],qq\$y[goodness\$outlier], pch=16, col="maroon") + dev.off() + estpriorn = getPriorN(DGEList) + print(paste("Common Dispersion =",comdisp,"CV = ",sqrt(comdisp),"getPriorN = ",estpriorn),quote=F) + efflib = DGEList\$samples\$lib.size*DGEList\$samples\$norm.factors + normData = (1e+06*DGEList\$counts/efflib) + uniqueg = unique(group) + #### Plot MDS + sample_colors = match(group,levels(group)) + sampleTypes = levels(factor(group)) + print.noquote(sampleTypes) + pdf("edgeR_MDSplot.pdf") + plotMDS.DGEList(DGEList,main=paste("edgeR MDS for",myTitle),cex=0.5,col=sample_colors,pch=sample_colors) + legend(x="topleft", legend = sampleTypes,col=c(1:length(sampleTypes)), pch=19) + grid(col="blue") + dev.off() + colnames(normData) = paste( colnames(normData),'N',sep="_") + print(paste('Raw sample read totals',paste(colSums(nonzerod,na.rm=T),collapse=','))) + nzd = data.frame(log(nonzerod + 1e-2,10)) + try( boxPlot(rawrs=nzd,cleanrs=log(normData,10),maint='TMM Normalisation',myTitle=myTitle,pdfname="edgeR_raw_norm_counts_box.pdf") ) + write.table(soutput,file=out_edgeR, quote=FALSE, sep="\t",row.names=F) + tt = cbind( + Name=as.character(rownames(DGEList\$counts)), + DE\$table, + adj.p.value=p.adjust(DE\$table\$PValue, method=fdrtype), + Dispersion=DGEList\$tagwise.dispersion,totreads=cmrowsums + ) + print.noquote("# edgeR Top tags\n") + tt = cbind(tt,URL=contigurls) # add to end so table isn't laid out strangely + tt = tt[order(DE\$table\$PValue),] + print.noquote(tt[1:50,]) + deTags = rownames(uoutput[uoutput\$adj.p.value < fdrthresh,]) + nsig = length(deTags) + print(paste('#',nsig,'tags significant at adj p=',fdrthresh),quote=F) + deColours = ifelse(deTags,'red','black') + pdf("edgeR_BCV_vs_abundance.pdf") + plotBCV(DGEList, cex=0.3, main="Biological CV vs abundance") + dev.off() + dg = DGEList[order(DE\$table\$PValue),] + #normData = (1e+06 * dg\$counts/expandAsMatrix(dg\$samples\$lib.size, dim(dg))) + efflib = dg\$samples\$lib.size*dg\$samples\$norm.factors + normData = (1e+06*dg\$counts/efflib) + outpdfname="edgeR_top_100_heatmap.pdf" + hmap2(normData,nsamp=100,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('edgeR Heatmap',myTitle)) + outSmear = "edgeR_smearplot.pdf" + outMain = paste("Smear Plot for ",TName,' Vs ',CName,' (FDR@',fdrthresh,' N = ',nsig,')',sep='') + smearPlot(DGEList=DGEList,deTags=deTags, outSmear=outSmear, outMain = outMain) + qqPlot(descr=paste(myTitle,'edgeR adj p QQ plot'),pvector=tt\$adj.p.value,outpdf='edgeR_qqplot.pdf') + norm.factor = DGEList\$samples\$norm.factors + topresults.edgeR = soutput[which(soutput\$adj.p.value < fdrthresh), ] + edgeRcountsindex = which(allgenes %in% rownames(topresults.edgeR)) + edgeRcounts = rep(0, length(allgenes)) + edgeRcounts[edgeRcountsindex] = 1 # Create venn diagram of hits + sink() + } ### doedgeR + if (doDESeq2 == T) + { + sink("DESeq2.log") + # DESeq2 + require('DESeq2') + library('RColorBrewer') + pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) + if (length(subjects) == 0) + { + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) + } else { + deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) + } + #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) + #rDESeq = results(DESeq2) + #newCountDataSet(workCM, group) + deSeqDatsizefac = estimateSizeFactors(deSEQds) + deSeqDatdisp = estimateDispersions(deSeqDatsizefac,fitType=DESeq_fitType) + resDESeq = nbinomWaldTest(deSeqDatdisp, pAdjustMethod=fdrtype) + rDESeq = as.data.frame(results(resDESeq)) + rDESeq = cbind(Contig=rownames(workCM),rDESeq,NReads=cmrowsums,URL=contigurls) + srDESeq = rDESeq[order(rDESeq\$pvalue),] + qqPlot(descr=paste(myTitle,'DESeq2 adj p qq plot'),pvector=rDESeq\$padj,outpdf='DESeq2_qqplot.pdf') + cat("# DESeq top 50\n") + print.noquote(srDESeq[1:50,]) + write.table(srDESeq,file=out_DESeq2, quote=FALSE, sep="\t",row.names=F) + topresults.DESeq = rDESeq[which(rDESeq\$padj < fdrthresh), ] + DESeqcountsindex = which(allgenes %in% rownames(topresults.DESeq)) + DESeqcounts = rep(0, length(allgenes)) + DESeqcounts[DESeqcountsindex] = 1 + pdf("DESeq2_dispersion_estimates.pdf") + plotDispEsts(resDESeq) + dev.off() + ysmall = abs(min(rDESeq\$log2FoldChange)) + ybig = abs(max(rDESeq\$log2FoldChange)) + ylimit = min(4,ysmall,ybig) + pdf("DESeq2_MA_plot.pdf") + plotMA(resDESeq,main=paste(myTitle,"DESeq2 MA plot"),ylim=c(-ylimit,ylimit)) + dev.off() + rlogres = rlogTransformation(resDESeq) + sampledists = dist( t( assay(rlogres) ) ) + sdmat = as.matrix(sampledists) + pdf("DESeq2_sample_distance_plot.pdf") + heatmap.2(sdmat,trace="none",main=paste(myTitle,"DESeq2 sample distances"), + col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) + dev.off() + ###outpdfname="DESeq2_top50_heatmap.pdf" + ###hmap2(sresDESeq,nsamp=50,TName=TName,group=group,outpdfname=outpdfname,myTitle=paste('DESeq2 vst rlog Heatmap',myTitle)) + sink() + result = try( (ppca = plotPCA( varianceStabilizingTransformation(deSeqDatdisp,blind=T), intgroup=c("Rx","Name")) ) ) + if ("try-error" %in% class(result)) { + print.noquote('DESeq2 plotPCA failed.') + } else { + pdf("DESeq2_PCA_plot.pdf") + #### wtf - print? Seems needed to get this to work + print(ppca) + dev.off() + } + } + + if (doVoom == T) { + sink('VOOM.log') + if (doedgeR == F) { + #### Setup DGEList object + DGEList = DGEList(counts=workCM, group = group) + DGEList = calcNormFactors(DGEList) + DGEList = estimateGLMCommonDisp(DGEList,mydesign) + DGEList = estimateGLMTrendedDisp(DGEList,mydesign) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + DGEList = estimateGLMTagwiseDisp(DGEList,mydesign) + norm.factor = DGEList\$samples\$norm.factors + } + pdf("VOOM_mean_variance_plot.pdf") + dat.voomed = voom(DGEList, mydesign, plot = TRUE, lib.size = colSums(workCM) * norm.factor) + dev.off() + # Use limma to fit data + fit = lmFit(dat.voomed, mydesign) + fit = eBayes(fit) + rvoom = topTable(fit, coef = length(colnames(mydesign)), adj = fdrtype, n = Inf, sort="none") + qqPlot(descr=paste(myTitle,'VOOM-limma adj p QQ plot'),pvector=rvoom\$adj.P.Val,outpdf='VOOM_qqplot.pdf') + rownames(rvoom) = rownames(workCM) + rvoom = cbind(rvoom,NReads=cmrowsums,URL=contigurls) + srvoom = rvoom[order(rvoom\$P.Value),] + cat("# VOOM top 50\n") + print(srvoom[1:50,]) + write.table(srvoom,file=out_VOOM, quote=FALSE, sep="\t",row.names=F) + # Use an FDR cutoff to find interesting samples for edgeR, DESeq and voom/limma + topresults.voom = rvoom[which(rvoom\$adj.P.Val < fdrthresh), ] + voomcountsindex = which(allgenes %in% topresults.voom\$ID) + voomcounts = rep(0, length(allgenes)) + voomcounts[voomcountsindex] = 1 + sink() + } + + if (doCamera) { + doGSEA(y=DGEList,design=mydesign,histgmt=histgmt,bigmt=bigmt,ntest=20,myTitle=myTitle, + outfname=paste(mt,"GSEA.xls",sep="_"),fdrthresh=fdrthresh,fdrtype=fdrtype) + } + + if ((doDESeq2==T) || (doVoom==T) || (doedgeR==T)) { + if ((doVoom==T) && (doDESeq2==T) && (doedgeR==T)) { + vennmain = paste(mt,'Voom,edgeR and DESeq2 overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, + VOOM_limma = voomcounts, row.names = allgenes) + } else if ((doDESeq2==T) && (doedgeR==T)) { + vennmain = paste(mt,'DESeq2 and edgeR overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, DESeq2 = DESeqcounts, row.names = allgenes) + } else if ((doVoom==T) && (doedgeR==T)) { + vennmain = paste(mt,'Voom and edgeR overlap at FDR=',fdrthresh) + counts.dataframe = data.frame(edgeR = edgeRcounts, VOOM_limma = voomcounts, row.names = allgenes) + } + + if (nrow(counts.dataframe > 1)) { + counts.venn = vennCounts(counts.dataframe) + vennf = "Venn_significant_genes_overlap.pdf" + pdf(vennf) + vennDiagram(counts.venn,main=vennmain,col="maroon") + dev.off() + } + } #### doDESeq2 or doVoom + +} +#### Done + +###sink(stdout(),append=T,type="message") +builtin_gmt = "" +history_gmt = "" +history_gmt_name = "" +out_edgeR = F +out_DESeq2 = F +out_VOOM = "$out_VOOM" +doDESeq2 = $DESeq2.doDESeq2 # make these T or F +doVoom = $doVoom +doCamera = $camera.doCamera +doedgeR = $edgeR.doedgeR +edgeR_priordf = 0 + + +#if $doVoom == "T": + out_VOOM = "$out_VOOM" +#end if + +#if $DESeq2.doDESeq2 == "T": + out_DESeq2 = "$out_DESeq2" + DESeq_fitType = "$DESeq2.DESeq_fitType" +#end if + +#if $edgeR.doedgeR == "T": + out_edgeR = "$out_edgeR" + edgeR_priordf = $edgeR.edgeR_priordf +#end if + +#if $camera.doCamera == 'T' + #if $camera.gmtSource.refgmtSource == "indexed" or $camera.gmtSource.refgmtSource == "both": + builtin_gmt = "${camera.gmtSource.builtinGMT.fields.path}" + #end if + #if $camera.gmtSource.refgmtSource == "history" or $camera.gmtSource.refgmtSource == "both": + history_gmt = "${camera.gmtSource.ownGMT}" + history_gmt_name = "${camera.gmtSource.ownGMT.name}" + #end if +#end if + + +if (sum(c(doedgeR,doVoom,doDESeq2)) == 0) +{ +write("No methods chosen - nothing to do! Please try again after choosing one or more methods", stderr()) +quit(save="no",status=2) +} + +Out_Dir = "$html_file.files_path" +Input = "$input1" +TreatmentName = "$treatment_name" +TreatmentCols = "$Treat_cols" +ControlName = "$control_name" +ControlCols= "$Control_cols" +org = "$input1.dbkey" +if (org == "") { org = "hg19"} +fdrtype = "$fdrtype" +fdrthresh = $fdrthresh +useNDF = $useNDF +fQ = $fQ # non-differential centile cutoff +myTitle = "$title" +sids = strsplit("$subjectids",',') +subjects = unlist(sids) +nsubj = length(subjects) +TCols = as.numeric(strsplit(TreatmentCols,",")[[1]])-1 +CCols = as.numeric(strsplit(ControlCols,",")[[1]])-1 +cat('Got TCols=') +cat(TCols) +cat('; CCols=') +cat(CCols) +cat('\n') +useCols = c(TCols,CCols) +if (file.exists(Out_Dir) == F) dir.create(Out_Dir) +Count_Matrix = read.table(Input,header=T,row.names=1,sep='\t') #Load tab file assume header +snames = colnames(Count_Matrix) +nsamples = length(snames) +if (nsubj > 0 & nsubj != nsamples) { +options("show.error.messages"=T) +mess = paste('Fatal error: Supplied subject id list',paste(subjects,collapse=','), + 'has length',nsubj,'but there are',nsamples,'samples',paste(snames,collapse=',')) +write(mess, stderr()) +quit(save="no",status=4) +} +if (length(subjects) != 0) {subjects = subjects[useCols]} +Count_Matrix = Count_Matrix[,useCols] ### reorder columns +rn = rownames(Count_Matrix) +islib = rn %in% c('librarySize','NotInBedRegions') +LibSizes = Count_Matrix[subset(rn,islib),][1] # take first +Count_Matrix = Count_Matrix[subset(rn,! islib),] +group = c(rep(TreatmentName,length(TCols)), rep(ControlName,length(CCols)) ) #Build a group descriptor +group = factor(group, levels=c(ControlName,TreatmentName)) +colnames(Count_Matrix) = paste(group,colnames(Count_Matrix),sep="_") #Relable columns +results = edgeIt(Count_Matrix=Count_Matrix,group=group, out_edgeR=out_edgeR, out_VOOM=out_VOOM, out_DESeq2=out_DESeq2, + fdrtype='BH',mydesign=NULL,priordf=edgeR_priordf,fdrthresh=fdrthresh,outputdir='.', + myTitle=myTitle,useNDF=F,libSize=c(),filterquantile=fQ,subjects=subjects, + doDESeq2=doDESeq2,doVoom=doVoom,doCamera=doCamera,doedgeR=doedgeR,org=org, + histgmt=history_gmt,bigmt=builtin_gmt,DESeq_fitType=DESeq_fitType) +sessionInfo() +]]> +</configfile> +</configfiles> +<help> + +**What it does** + +Allows short read sequence counts from controlled experiments to be analysed for differentially expressed genes. +Optionally adds a term for subject if not all samples are independent or if some other factor needs to be blocked in the design. + +**Input** + +Requires a count matrix as a tabular file. These are best made using the companion HTSeq_ based counter Galaxy wrapper +and your fave gene model to generate inputs. Each row is a genomic feature (gene or exon eg) and each column the +non-negative integer count of reads from one sample overlapping the feature. +The matrix must have a header row uniquely identifying the source samples, and unique row names in +the first column. Typically the row names are gene symbols or probe ids for downstream use in GSEA and other methods. + +**Specifying comparisons** + +This is basically dumbed down for two factors - case vs control. + +More complex interfaces are possible but painful at present. +Probably need to specify a phenotype file to do this better. +Work in progress. Send code. + +If you have (eg) paired samples and wish to include a term in the GLM to account for some other factor (subject in the case of paired samples), +put a comma separated list of indicators for every sample (whether modelled or not!) indicating (eg) the subject number or +A list of integers, one for each subject or an empty string if samples are all independent. +If not empty, there must be exactly as many integers in the supplied integer list as there are columns (samples) in the count matrix. +Integers for samples that are not in the analysis *must* be present in the string as filler even if not used. + +So if you have 2 pairs out of 6 samples, you need to put in unique integers for the unpaired ones +eg if you had 6 samples with the first two independent but the second and third pairs each being from independent subjects. you might use +8,9,1,1,2,2 +as subject IDs to indicate two paired samples from the same subject in columns 3/4 and 5/6 + +**Methods available** + +You can run 3 popular Bioconductor packages available for count data. + +edgeR - see edgeR_ for details + +VOOM/limma - see limma_VOOM_ for details + +DESeq2 - see DESeq2_ for details + +and optionally camera in edgeR which works better if MSigDB is installed. + +**Outputs** + +Some helpful plots and analysis results. Note that most of these are produced using R code +suggested by the excellent documentation and vignettes for the Bioconductor +packages invoked. The Tool Factory is used to automatically lay these out for you to enjoy. + +**Note on Voom** + +The voom from limma version 3.16.6 help in R includes this from the authors - but you should read the paper to interpret this method. + +This function is intended to process RNA-Seq or ChIP-Seq data prior to linear modelling in limma. + +voom is an acronym for mean-variance modelling at the observational level. +The key concern is to estimate the mean-variance relationship in the data, then use this to compute appropriate weights for each observation. +Count data almost show non-trivial mean-variance relationships. Raw counts show increasing variance with increasing count size, while log-counts typically show a decreasing mean-variance trend. +This function estimates the mean-variance trend for log-counts, then assigns a weight to each observation based on its predicted variance. +The weights are then used in the linear modelling process to adjust for heteroscedasticity. + +In an experiment, a count value is observed for each tag in each sample. A tag-wise mean-variance trend is computed using lowess. +The tag-wise mean is the mean log2 count with an offset of 0.5, across samples for a given tag. +The tag-wise variance is the quarter-root-variance of normalized log2 counts per million values with an offset of 0.5, across samples for a given tag. +Tags with zero counts across all samples are not included in the lowess fit. Optional normalization is performed using normalizeBetweenArrays. +Using fitted values of log2 counts from a linear model fit by lmFit, variances from the mean-variance trend were interpolated for each observation. +This was carried out by approxfun. Inverse variance weights can be used to correct for mean-variance trend in the count data. + + +Author(s) + +Charity Law and Gordon Smyth + +References + +Law, CW (2013). Precision weights for gene expression analysis. PhD Thesis. University of Melbourne, Australia. + +Law, CW, Chen, Y, Shi, W, Smyth, GK (2013). Voom! Precision weights unlock linear model analysis tools for RNA-seq read counts. +Technical Report 1 May 2013, Bioinformatics Division, Walter and Eliza Hall Institute of Medical Reseach, Melbourne, Australia. +http://www.statsci.org/smyth/pubs/VoomPreprint.pdf + +See Also + +A voom case study is given in the edgeR User's Guide. + +vooma is a similar function but for microarrays instead of RNA-seq. + + +***old rant on changes to Bioconductor package variable names between versions*** + +The edgeR authors made a small cosmetic change in the name of one important variable (from p.value to PValue) +breaking this and all other code that assumed the old name for this variable, +between edgeR2.4.4 and 2.4.6 (the version for R 2.14 as at the time of writing). +This means that all code using edgeR is sensitive to the version. I think this was a very unwise thing +to do because it wasted hours of my time to track down and will similarly cost other edgeR users dearly +when their old scripts break. This tool currently now works with 2.4.6. + +**Note on prior.N** + +http://seqanswers.com/forums/showthread.php?t=5591 says: + +*prior.n* + +The value for prior.n determines the amount of smoothing of tagwise dispersions towards the common dispersion. +You can think of it as like a "weight" for the common value. (It is actually the weight for the common likelihood +in the weighted likelihood equation). The larger the value for prior.n, the more smoothing, i.e. the closer your +tagwise dispersion estimates will be to the common dispersion. If you use a prior.n of 1, then that gives the +common likelihood the weight of one observation. + +In answer to your question, it is a good thing to squeeze the tagwise dispersions towards a common value, +or else you will be using very unreliable estimates of the dispersion. I would not recommend using the value that +you obtained from estimateSmoothing()---this is far too small and would result in virtually no moderation +(squeezing) of the tagwise dispersions. How many samples do you have in your experiment? +What is the experimental design? If you have few samples (less than 6) then I would suggest a prior.n of at least 10. +If you have more samples, then the tagwise dispersion estimates will be more reliable, +so you could consider using a smaller prior.n, although I would hesitate to use a prior.n less than 5. + + +From Bioconductor Digest, Vol 118, Issue 5, Gordon writes: + +Dear Dorota, + +The important settings are prior.df and trend. + +prior.n and prior.df are related through prior.df = prior.n * residual.df, +and your experiment has residual.df = 36 - 12 = 24. So the old setting of +prior.n=10 is equivalent for your data to prior.df = 240, a very large +value. Going the other way, the new setting of prior.df=10 is equivalent +to prior.n=10/24. + +To recover old results with the current software you would use + + estimateTagwiseDisp(object, prior.df=240, trend="none") + +To get the new default from old software you would use + + estimateTagwiseDisp(object, prior.n=10/24, trend=TRUE) + +Actually the old trend method is equivalent to trend="loess" in the new +software. You should use plotBCV(object) to see whether a trend is +required. + +Note you could also use + + prior.n = getPriorN(object, prior.df=10) + +to map between prior.df and prior.n. + +---- + +**Attributions** + +edgeR - edgeR_ + +VOOM/limma - limma_VOOM_ + +DESeq2 - DESeq2_ for details + +See above for Bioconductor package documentation for packages exposed in Galaxy by this tool and app store package. + +Galaxy_ (that's what you are using right now!) for gluing everything together + +Otherwise, all code and documentation comprising this tool was written by Ross Lazarus and is +licensed to you under the LGPL_ like other rgenetics artefacts + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html +.. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html +.. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html +.. _DESeq2: http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html +.. _limma_VOOM: http://www.bioconductor.org/packages/release/bioc/html/limma.html +.. _Galaxy: http://getgalaxy.org +</help> + +</tool> + + This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/91bece401d51/ Changeset: 91bece401d51 User: fubar Date: 2013-08-17 01:55:18 Summary: Merged galaxy/galaxy-central into default Affected #: 35 files diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/datatypes/dataproviders/base.py --- a/lib/galaxy/datatypes/dataproviders/base.py +++ b/lib/galaxy/datatypes/dataproviders/base.py @@ -247,17 +247,22 @@ `offset`, begin providing datat, and stop when `num_data_returned` is greater than `offset`. """ + if self.limit != None and self.limit <= 0: + return + yield + parent_gen = super( LimitedOffsetDataProvider, self ).__iter__() for datum in parent_gen: - - if self.limit != None and self.num_data_returned > self.limit: - break + self.num_data_returned -= 1 + #print 'self.num_data_returned:', self.num_data_returned + #print 'self.num_valid_data_read:', self.num_valid_data_read if self.num_valid_data_read > self.offset: + self.num_data_returned += 1 yield datum - else: - # wot a cheezy way of doing this... - self.num_data_returned -= 1 + + if self.limit != None and self.num_data_returned >= self.limit: + break #TODO: skipping lines is inefficient - somehow cache file position/line_num pair and allow provider # to seek to a pos/line and then begin providing lines diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/datatypes/dataproviders/dataset.py --- a/lib/galaxy/datatypes/dataproviders/dataset.py +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -6,19 +6,13 @@ (e.g. parsing genomic regions from their source) """ +from galaxy import eggs + import pkg_resources pkg_resources.require( 'bx-python' ) from bx import seq as bx_seq from bx import wiggle as bx_wig -import galaxy.model -import galaxy.datatypes -import galaxy.datatypes.data - -#TODO: can't import these due to circular ref in model/registry -#import galaxy.datatypes.binary -#import galaxy.datatypes.tabular - import exceptions import base import line @@ -27,8 +21,9 @@ _TODO = """ use bx as much as possible -the use of DatasetInstance seems to create some import problems gff3 hierarchies + +change SamtoolsDataProvider to use pysam """ import logging @@ -50,11 +45,8 @@ """ :param dataset: the Galaxy dataset whose file will be the source :type dataset: model.DatasetInstance - - :raises exceptions.InvalidDataProviderSource: if not a DatsetInstance """ - if not isinstance( dataset, galaxy.model.DatasetInstance ): - raise exceptions.InvalidDataProviderSource( "Data provider can only be used with a DatasetInstance" ) + #precondition: dataset is a galaxy.model.DatasetInstance self.dataset = dataset # this dataset file is obviously the source #TODO: this might be a good place to interface with the object_store... @@ -615,13 +607,7 @@ """ #TODO: into validate_source - #TODO: have to import these here due to circular ref in model/datatypes - import galaxy.datatypes.binary - import galaxy.datatypes.tabular - if( not( isinstance( dataset.datatype, galaxy.datatypes.tabular.Sam ) - or isinstance( dataset.datatype, galaxy.datatypes.binary.Bam ) ) ): - raise exceptions.InvalidDataProviderSource( - 'dataset must be a Sam or Bam datatype: %s' %( str( dataset.datatype ) ) ) + #precondition: dataset.datatype is a tabular.Sam or binary.Bam self.dataset = dataset options_dict = options_dict or {} @@ -661,8 +647,9 @@ validated_flag_list = set([ flag for flag in options_string if flag in self.FLAGS_WO_ARGS ]) # if sam add -S - if( ( isinstance( self.dataset.datatype, galaxy.datatypes.tabular.Sam ) - and ( 'S' not in validated_flag_list ) ) ): + #TODO: not the best test in the world... + if( ( self.dataset.ext == 'sam' ) + and ( 'S' not in validated_flag_list ) ): validated_flag_list.append( 'S' ) if validated_flag_list: diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/datatypes/dataproviders/line.py --- a/lib/galaxy/datatypes/dataproviders/line.py +++ b/lib/galaxy/datatypes/dataproviders/line.py @@ -28,18 +28,25 @@ """ DEFAULT_COMMENT_CHAR = '#' settings = { - 'string_lines' : 'bool', + 'strip_lines' : 'bool', + 'strip_newlines': 'bool', 'provide_blank' : 'bool', 'comment_char' : 'str', } - def __init__( self, source, strip_lines=True, provide_blank=False, comment_char=DEFAULT_COMMENT_CHAR, **kwargs ): + def __init__( self, source, strip_lines=True, strip_newlines=False, provide_blank=False, + comment_char=DEFAULT_COMMENT_CHAR, **kwargs ): """ :param strip_lines: remove whitespace from the beginning an ending of each line (or not). Optional: defaults to True :type strip_lines: bool + :param strip_newlines: remove newlines only + (only functions when ``strip_lines`` is false) + Optional: defaults to False + :type strip_lines: bool + :param provide_blank: are empty lines considered valid and provided? Optional: defaults to False :type provide_blank: bool @@ -51,6 +58,7 @@ """ super( FilteredLineDataProvider, self ).__init__( source, **kwargs ) self.strip_lines = strip_lines + self.strip_newlines = strip_newlines self.provide_blank = provide_blank self.comment_char = comment_char @@ -62,17 +70,18 @@ :type line: str :returns: a line or `None` """ - line = super( FilteredLineDataProvider, self ).filter( line ) if line != None: - # is this the proper order? + #??: shouldn't it strip newlines regardless, if not why not use on of the base.dprovs if self.strip_lines: line = line.strip() + elif self.strip_newlines: + line = line.strip( '\n' ) if not self.provide_blank and line == '': return None elif line.startswith( self.comment_char ): return None - return line + return super( FilteredLineDataProvider, self ).filter( line ) class RegexLineDataProvider( FilteredLineDataProvider ): @@ -108,6 +117,7 @@ #NOTE: no support for flags def filter( self, line ): + #NOTE: filter_fn will occur BEFORE any matching line = super( RegexLineDataProvider, self ).filter( line ) if line != None and self.compiled_regex_list: line = self.filter_by_regex( line ) @@ -144,16 +154,15 @@ :type block_filter_fn: function """ # composition - not inheritance - #TODO: don't pass any? - line_provider = FilteredLineDataProvider( source ) - super( BlockDataProvider, self ).__init__( line_provider, **kwargs ) + #TODO: not a fan of this: + ( filter_fn, limit, offset ) = ( kwargs.pop( 'filter_fn', None ), + kwargs.pop( 'limit', None ), kwargs.pop( 'offset', 0 ) ) + line_provider = FilteredLineDataProvider( source, **kwargs ) + super( BlockDataProvider, self ).__init__( line_provider, filter_fn=filter_fn, limit=limit, offset=offset ) self.new_block_delim_fn = new_block_delim_fn self.block_filter_fn = block_filter_fn self.init_new_block() - # ...well, this is kinda lame - but prevents returning first empty block - #TODO: maybe better way in iter - self.is_inside_block = False def init_new_block( self ): """ @@ -161,7 +170,6 @@ """ # called in __init__ and after yielding the prev. block self.block_lines = collections.deque([]) - self.block = {} def __iter__( self ): """ @@ -171,8 +179,8 @@ for block in parent_gen: yield block - last_block = self.filter_block( self.assemble_current_block() ) - if last_block != None and self.num_data_returned < self.limit: + last_block = self.handle_last_block() + if last_block != None: self.num_data_returned += 1 yield last_block @@ -186,26 +194,23 @@ :returns: a block or `None` """ line = super( BlockDataProvider, self ).filter( line ) + #HACK + self.num_data_read -= 1 if line == None: return None + block_to_return = None if self.is_new_block( line ): # if we're already in a block, return the prev. block and add the line to a new block - #TODO: maybe better way in iter - if self.is_inside_block: - filtered_block = self.filter_block( self.assemble_current_block() ) + if self.block_lines: + block_to_return = self.assemble_current_block() + block_to_return = self.filter_block( block_to_return ) + self.num_data_read += 1 + self.init_new_block() - self.add_line_to_block( line ) - - # return an assembled block datum if it passed the filter - if filtered_block != None: - return filtered_block - - else: - self.is_inside_block = True self.add_line_to_block( line ) - return None + return block_to_return def is_new_block( self, line ): """ @@ -239,7 +244,6 @@ Called per block (just before providing). """ # empty block_lines and assemble block - # NOTE: we don't want to have mem == 2*data here so - careful return list( ( self.block_lines.popleft() for i in xrange( len( self.block_lines ) ) ) ) def filter_block( self, block ): @@ -251,3 +255,20 @@ if self.block_filter_fn: return self.block_filter_fn( block ) return block + + def handle_last_block( self ): + """ + Handle any blocks remaining after the main loop. + """ + if self.limit != None and self.num_data_returned >= self.limit: + return None + + last_block = self.assemble_current_block() + self.num_data_read += 1 + + last_block = self.filter_block( last_block ) + if last_block != None: + self.num_valid_data_read += 1 + + return last_block + diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/tools/__init__.py --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -94,6 +94,7 @@ class ToolBox( object ): """Container for a collection of tools""" + def __init__( self, config_filenames, tool_root_dir, app ): """ Create a toolbox from the config files named by `config_filenames`, using @@ -144,6 +145,7 @@ # config files, adding or removing locally developed tools or workflows. The value of integrated_tool_panel # will be False when things like functional tests are the caller. self.write_integrated_tool_panel_config_file() + def init_tools( self, config_filename ): """ Read the configuration file and load each tool. The following tags are currently supported: @@ -199,11 +201,13 @@ tool_path=tool_path, config_elems=config_elems ) self.shed_tool_confs.append( shed_tool_conf_dict ) + def get_shed_config_dict_by_filename( self, filename, default=None ): for shed_config_dict in self.shed_tool_confs: if shed_config_dict[ 'config_filename' ] == filename: return shed_config_dict return default + def __add_tool_to_tool_panel( self, tool_id, panel_component, section=False ): # See if a version of this tool is already loaded into the tool panel. The value of panel_component # will be a ToolSection (if the value of section=True) or self.tool_panel (if section=False). @@ -239,6 +243,7 @@ del panel_dict[ loaded_version_key ] panel_dict.insert( index, key, tool ) log.debug( "Loaded tool id: %s, version: %s into tool panel." % ( tool.id, tool.version ) ) + def load_tool_panel( self ): for key, val in self.integrated_tool_panel.items(): if key.startswith( 'tool_' ): @@ -276,6 +281,7 @@ section.elems[ section_key ] = section_val log.debug( "Loaded label: %s" % ( section_val.text ) ) self.tool_panel[ key ] = section + def load_integrated_tool_panel_keys( self ): """ Load the integrated tool panel keys, setting values for tools and workflows to None. The values will @@ -308,6 +314,7 @@ elif elem.tag == 'label': key = 'label_%s' % elem.get( 'id' ) self.integrated_tool_panel[ key ] = None + def write_integrated_tool_panel_config_file( self ): """ Write the current in-memory version of the integrated_tool_panel.xml file to disk. Since Galaxy administrators @@ -350,6 +357,7 @@ os.close( fd ) shutil.move( filename, os.path.abspath( self.integrated_tool_panel_config ) ) os.chmod( self.integrated_tool_panel_config, 0644 ) + def get_tool( self, tool_id, tool_version=None, get_all_versions=False ): """Attempt to locate a tool in the tool box.""" if tool_id in self.tools_by_id and not get_all_versions: @@ -380,6 +388,7 @@ #No tool matches by version, simply return the first available tool found return rval[0] return None + def get_loaded_tools_by_lineage( self, tool_id ): """Get all loaded tools associated by lineage to the tool whose id is tool_id.""" tv = self.__get_tool_version( tool_id ) @@ -395,6 +404,7 @@ tool = self.tools_by_id[ tool_id ] return [ tool ] return [] + def __get_tool_version( self, tool_id ): """Return a ToolVersion if one exists for the tool_id""" return self.sa_session.query( self.app.model.ToolVersion ) \ @@ -527,6 +537,7 @@ integrated_panel_dict.insert( index, key, tool ) except: log.exception( "Error reading tool from path: %s" % path ) + def load_workflow_tag_set( self, elem, panel_dict, integrated_panel_dict, load_panel_dict, index=None ): try: # TODO: should id be encoded? @@ -543,6 +554,7 @@ integrated_panel_dict.insert( index, key, workflow ) except: log.exception( "Error loading workflow: %s" % workflow_id ) + def load_label_tag_set( self, elem, panel_dict, integrated_panel_dict, load_panel_dict, index=None ): label = ToolSectionLabel( elem ) key = 'label_' + label.id @@ -552,6 +564,7 @@ integrated_panel_dict[ key ] = label else: integrated_panel_dict.insert( index, key, label ) + def load_section_tag_set( self, elem, tool_path, load_panel_dict, index=None ): key = 'section_' + elem.get( "id" ) if key in self.tool_panel: @@ -580,6 +593,7 @@ self.integrated_tool_panel[ key ] = integrated_section else: self.integrated_tool_panel.insert( index, key, integrated_section ) + def load_tool( self, config_file, guid=None, repository_id=None, **kwds ): """Load a single tool from the file named by `config_file` and return an instance of `Tool`.""" # Parse XML configuration file and get the root element @@ -597,6 +611,7 @@ else: ToolClass = Tool return ToolClass( config_file, root, self.app, guid=guid, repository_id=repository_id, **kwds ) + def reload_tool_by_id( self, tool_id ): """ Attempt to reload the tool identified by 'tool_id', if successful @@ -634,6 +649,7 @@ message += "<b>version:</b> %s" % old_tool.version status = 'done' return message, status + def remove_tool_by_id( self, tool_id ): """ Attempt to remove the tool identified by 'tool_id'. @@ -662,6 +678,7 @@ message += "<b>version:</b> %s" % tool.version status = 'done' return message, status + def load_workflow( self, workflow_id ): """ Return an instance of 'Workflow' identified by `id`, @@ -670,11 +687,13 @@ id = self.app.security.decode_id( workflow_id ) stored = self.app.model.context.query( self.app.model.StoredWorkflow ).get( id ) return stored.latest_workflow + def init_dependency_manager( self ): if self.app.config.use_tool_dependencies: self.dependency_manager = DependencyManager( [ self.app.config.tool_dependency_dir ] ) else: self.dependency_manager = None + @property def sa_session( self ): """ @@ -683,9 +702,10 @@ return self.app.model.context def to_dict( self, trans, in_panel=True, **kwds ): - # - # Dictify toolbox. - # + """ + Dictify toolbox. + """ + context = Bunch( toolbox=self, trans=trans, **kwds ) if in_panel: panel_elts = [ val for val in self.tool_panel.itervalues() ] @@ -705,8 +725,8 @@ rval.append( elt.to_dict( trans, for_link=True ) ) else: tools = [] - for id, tool in self.toolbox.tools_by_id.items(): - tools.append( tool.to_dict( trans ) ) + for id, tool in self.tools_by_id.items(): + tools.append( tool.to_dict( trans, for_link=True ) ) rval = tools return rval diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/tools/actions/history_imp_exp.py --- a/lib/galaxy/tools/actions/history_imp_exp.py +++ b/lib/galaxy/tools/actions/history_imp_exp.py @@ -1,7 +1,7 @@ -import tempfile +import tempfile, os from __init__ import ToolAction from galaxy.util.odict import odict -from galaxy.tools.imp_exp import * +from galaxy.tools.imp_exp import JobImportHistoryArchiveWrapper, JobExportHistoryArchiveWrapper import logging log = logging.getLogger( __name__ ) diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 lib/galaxy/tools/imp_exp/__init__.py --- a/lib/galaxy/tools/imp_exp/__init__.py +++ b/lib/galaxy/tools/imp_exp/__init__.py @@ -26,7 +26,7 @@ <data format="gzip" name="output_file"/></outputs></tool> - """ + """ # Load export tool. tmp_name = tempfile.NamedTemporaryFile() diff -r f830e3d7c6c096a9a840dad523540bdcc84327b2 -r 91bece401d51a8b9be8850208bd56fa838087a64 scripts/cleanup_datasets/cleanup_datasets.py --- a/scripts/cleanup_datasets/cleanup_datasets.py +++ b/scripts/cleanup_datasets/cleanup_datasets.py @@ -175,7 +175,7 @@ app.model.History.table.c.update_time < cutoff_time ) ) \ .options( eagerload( 'datasets' ) ) for history in histories: - print "### Processing history id %d (%s)" % (history.id, history.name) + print ("### Processing history id %d (%s)" % (history.id, history.name)).encode('utf-8') for dataset_assoc in history.datasets: _purge_dataset_instance( dataset_assoc, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable if not info_only: This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/0bdee4933794/ Changeset: 0bdee4933794 User: fubar Date: 2013-08-29 05:36:22 Summary: Merged galaxy/galaxy-central into default Affected #: 142 files diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 .hgtags --- a/.hgtags +++ b/.hgtags @@ -3,3 +3,4 @@ 75f09617abaadbc8cc732bb8ee519decaeb56ea7 release_2013.04.01 2cc8d10988e03257dc7b97f8bb332c7df745d1dd security_2013.04.08 524f246ca85395082719ae7a6ff72260d7ad5612 release_2013.06.03 +1ae95b3aa98d1ccf15b243ac3ce6a895eb7efc53 release_2013.08.12 diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 config/plugins/visualizations/scatterplot/templates/scatterplot.mako --- a/config/plugins/visualizations/scatterplot/templates/scatterplot.mako +++ b/config/plugins/visualizations/scatterplot/templates/scatterplot.mako @@ -41,7 +41,7 @@ <script type="text/javascript"> $(function(){ - var hda = ${h.to_json_string( trans.security.encode_dict_ids( hda.get_api_value() ) )}, + var hda = ${h.to_json_string( trans.security.encode_dict_ids( hda.dictify() ) )}, querySettings = ${h.to_json_string( query_args )}, chartConfig = _.extend( querySettings, { containerSelector : '#chart', diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -285,9 +285,6 @@ <sniffer type="galaxy.datatypes.tabular:Sam"/><sniffer type="galaxy.datatypes.data:Newick"/><sniffer type="galaxy.datatypes.data:Nexus"/> - <sniffer type="galaxy.datatypes.graph:Xgmml"/> - <sniffer type="galaxy.datatypes.graph:Sif"/> - <sniffer type="galaxy.datatypes.graph:Rdf"/><sniffer type="galaxy.datatypes.images:Jpg"/><sniffer type="galaxy.datatypes.images:Png"/><sniffer type="galaxy.datatypes.images:Tiff"/> diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 eggs.ini --- a/eggs.ini +++ b/eggs.ini @@ -18,6 +18,7 @@ MarkupSafe = 0.12 mercurial = 2.2.3 MySQL_python = 1.2.3c1 +PyRods = 3.2.4 numpy = 1.6.0 pbs_python = 4.3.5 psycopg2 = 2.0.13 diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -210,6 +210,7 @@ if self.nginx_upload_store: self.nginx_upload_store = os.path.abspath( self.nginx_upload_store ) self.object_store = kwargs.get( 'object_store', 'disk' ) + self.object_store_cache_path = resolve_path( kwargs.get( "object_store_cache_path", "database/object_store_cache" ), self.root ) # Handle AWS-specific config options for backward compatibility if kwargs.get( 'aws_access_key', None) is not None: self.os_access_key= kwargs.get( 'aws_access_key', None ) @@ -229,6 +230,8 @@ self.distributed_object_store_config_file = kwargs.get( 'distributed_object_store_config_file', None ) if self.distributed_object_store_config_file is not None: self.distributed_object_store_config_file = resolve_path( self.distributed_object_store_config_file, self.root ) + self.irods_root_collection_path = kwargs.get( 'irods_root_collection_path', None ) + self.irods_default_resource = kwargs.get( 'irods_default_resource', None ) # Parse global_conf and save the parser global_conf = kwargs.get( 'global_conf', None ) global_conf_parser = ConfigParser.ConfigParser() @@ -370,6 +373,7 @@ self.nginx_upload_store, \ './static/genetrack/plots', \ self.whoosh_index_dir, \ + self.object_store_cache_path, \ os.path.join( self.tool_data_path, 'shared', 'jars' ): if path not in [ None, False ] and not os.path.isdir( path ): try: diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/eggs/__init__.py --- a/lib/galaxy/eggs/__init__.py +++ b/lib/galaxy/eggs/__init__.py @@ -387,7 +387,8 @@ "guppy": lambda: self.config.get( "app:main", "use_memdump" ), "python_openid": lambda: self.config.get( "app:main", "enable_openid" ), "python_daemon": lambda: sys.version_info[:2] >= ( 2, 5 ), - "pysam": lambda: check_pysam() + "pysam": lambda: check_pysam(), + "PyRods": lambda: self.config.get( "app:main", "object_store" ) == "irods" }.get( egg_name, lambda: True )() except: return False diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/jobs/actions/post.py --- a/lib/galaxy/jobs/actions/post.py +++ b/lib/galaxy/jobs/actions/post.py @@ -242,8 +242,8 @@ } """ @classmethod - def get_short_str(cls, trans): - return "Hide this dataset." + def get_short_str(cls, pja): + return "Hide output '%s'." % pja.output_name class DeleteDatasetAction(DefaultJobAction): # This is disabled for right now. Deleting a dataset in the middle of a workflow causes errors (obviously) for the subsequent steps using the data. diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -23,7 +23,7 @@ import galaxy.datatypes.registry import galaxy.security.passwords from galaxy.datatypes.metadata import MetadataCollection -from galaxy.model.item_attrs import APIItem, UsesAnnotations +from galaxy.model.item_attrs import DictifiableMixin, UsesAnnotations from galaxy.security import get_permitted_actions from galaxy.util import is_multi_byte, nice_size, Params, restore_text, send_mail from galaxy.util.bunch import Bunch @@ -61,16 +61,16 @@ datatypes_registry = d_registry -class User( object, APIItem ): +class User( object, DictifiableMixin ): use_pbkdf2 = True """ Data for a Galaxy user or admin and relations to their histories, credentials, and roles. """ - # attributes that will be accessed and returned when calling get_api_value( view='collection' ) - api_collection_visible_keys = ( 'id', 'email' ) - # attributes that will be accessed and returned when calling get_api_value( view='element' ) - api_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' ) + # attributes that will be accessed and returned when calling dictify( view='collection' ) + dict_collection_visible_keys = ( 'id', 'email' ) + # attributes that will be accessed and returned when calling dictify( view='element' ) + dict_element_visible_keys = ( 'id', 'email', 'username', 'total_disk_usage', 'nice_total_disk_usage' ) def __init__( self, email=None, password=None ): self.email = email @@ -157,9 +157,9 @@ return total -class Job( object, APIItem ): - api_collection_visible_keys = [ 'id' ] - api_element_visible_keys = [ 'id' ] +class Job( object, DictifiableMixin ): + dict_collection_visible_keys = [ 'id' ] + dict_element_visible_keys = [ 'id' ] """ A job represents a request to run a tool given input datasets, tool @@ -363,8 +363,8 @@ dataset.blurb = 'deleted' dataset.peek = 'Job deleted' dataset.info = 'Job output deleted by user before job completed' - def get_api_value( self, view='collection' ): - rval = super( Job, self ).get_api_value( view=view ) + def dictify( self, view='collection' ): + rval = super( Job, self ).dictify( view=view ) rval['tool_name'] = self.tool_id param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] ) rval['params'] = param_dict @@ -649,9 +649,9 @@ else: return False -class Group( object, APIItem ): - api_collection_visible_keys = ( 'id', 'name' ) - api_element_visible_keys = ( 'id', 'name' ) +class Group( object, DictifiableMixin ): + dict_collection_visible_keys = ( 'id', 'name' ) + dict_element_visible_keys = ( 'id', 'name' ) def __init__( self, name = None ): self.name = name @@ -662,10 +662,10 @@ self.user = user self.group = group -class History( object, UsesAnnotations ): +class History( object, DictifiableMixin, UsesAnnotations ): - api_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' ) - api_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged' ) + dict_collection_visible_keys = ( 'id', 'name', 'published', 'deleted' ) + dict_element_visible_keys = ( 'id', 'name', 'published', 'deleted', 'genome_build', 'purged' ) default_name = 'Unnamed history' def __init__( self, id=None, name=None, user=None ): @@ -680,6 +680,7 @@ self.user = user self.datasets = [] self.galaxy_sessions = [] + self.tags = [] def _next_hid( self ): # TODO: override this with something in the database that ensures @@ -779,45 +780,34 @@ history_name = unicode(history_name, 'utf-8') return history_name - def get_api_value( self, view='collection', value_mapper = None ): - if value_mapper is None: - value_mapper = {} - rval = {} + def dictify( self, view='collection', value_mapper = None ): - try: - visible_keys = self.__getattribute__( 'api_' + view + '_visible_keys' ) - except AttributeError: - raise Exception( 'Unknown API view: %s' % view ) - for key in visible_keys: - try: - rval[key] = self.__getattribute__( key ) - if key in value_mapper: - rval[key] = value_mapper.get( key )( rval[key] ) - except AttributeError: - rval[key] = None - + # Get basic value. + rval = super( History, self ).dictify( view=view, value_mapper=value_mapper ) + + # Add tags. tags_str_list = [] for tag in self.tags: tag_str = tag.user_tname if tag.value is not None: tag_str += ":" + tag.user_value tags_str_list.append( tag_str ) - rval['tags'] = tags_str_list - rval['model_class'] = self.__class__.__name__ + rval[ 'tags' ] = tags_str_list + return rval def set_from_dict( self, new_data ): #AKA: set_api_value """ Set object attributes to the values in dictionary new_data limiting - to only those keys in api_element_visible_keys. + to only those keys in dict_element_visible_keys. Returns a dictionary of the keys, values that have been changed. """ # precondition: keys are proper, values are parsed and validated changed = {} # unknown keys are ignored here - for key in [ k for k in new_data.keys() if k in self.api_element_visible_keys ]: + for key in [ k for k in new_data.keys() if k in self.dict_element_visible_keys ]: new_val = new_data[ key ] old_val = self.__getattribute__( key ) if new_val == old_val: @@ -879,9 +869,9 @@ self.group = group self.role = role -class Role( object, APIItem ): - api_collection_visible_keys = ( 'id', 'name' ) - api_element_visible_keys = ( 'id', 'name', 'description', 'type' ) +class Role( object, DictifiableMixin ): + dict_collection_visible_keys = ( 'id', 'name' ) + dict_element_visible_keys = ( 'id', 'name', 'description', 'type' ) private_id = None types = Bunch( PRIVATE = 'private', @@ -896,21 +886,21 @@ self.type = type self.deleted = deleted -class UserQuotaAssociation( object, APIItem ): - api_element_visible_keys = ( 'user', ) +class UserQuotaAssociation( object, DictifiableMixin ): + dict_element_visible_keys = ( 'user', ) def __init__( self, user, quota ): self.user = user self.quota = quota -class GroupQuotaAssociation( object, APIItem ): - api_element_visible_keys = ( 'group', ) +class GroupQuotaAssociation( object, DictifiableMixin ): + dict_element_visible_keys = ( 'group', ) def __init__( self, group, quota ): self.group = group self.quota = quota -class Quota( object, APIItem ): - api_collection_visible_keys = ( 'id', 'name' ) - api_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' ) +class Quota( object, DictifiableMixin ): + dict_collection_visible_keys = ( 'id', 'name' ) + dict_element_visible_keys = ( 'id', 'name', 'description', 'bytes', 'operation', 'display_amount', 'default', 'users', 'groups' ) valid_operations = ( '+', '-', '=' ) def __init__( self, name="", description="", amount=0, operation="=" ): self.name = name @@ -937,8 +927,8 @@ else: return nice_size( self.bytes ) -class DefaultQuotaAssociation( Quota, APIItem ): - api_element_visible_keys = ( 'type', ) +class DefaultQuotaAssociation( Quota, DictifiableMixin ): + dict_element_visible_keys = ( 'type', ) types = Bunch( UNREGISTERED = 'unregistered', REGISTERED = 'registered' @@ -1518,7 +1508,7 @@ return msg -class HistoryDatasetAssociation( DatasetInstance, UsesAnnotations ): +class HistoryDatasetAssociation( DatasetInstance, DictifiableMixin, UsesAnnotations ): """ Resource class that creates a relation between a dataset and a user history. """ @@ -1690,7 +1680,7 @@ rval += child.get_disk_usage( user ) return rval - def get_api_value( self, view='collection' ): + def dictify( self, view='collection' ): """ Return attributes of this HDA that are exposed using the API. """ @@ -1769,10 +1759,10 @@ self.subset = subset self.location = location -class Library( object, APIItem ): +class Library( object, DictifiableMixin ): permitted_actions = get_permitted_actions( filter='LIBRARY' ) - api_collection_visible_keys = ( 'id', 'name' ) - api_element_visible_keys = ( 'id', 'deleted', 'name', 'description', 'synopsis' ) + dict_collection_visible_keys = ( 'id', 'name' ) + dict_element_visible_keys = ( 'id', 'deleted', 'name', 'description', 'synopsis' ) def __init__( self, name=None, description=None, synopsis=None, root_folder=None ): self.name = name or "Unnamed library" self.description = description @@ -1838,8 +1828,8 @@ name = unicode( name, 'utf-8' ) return name -class LibraryFolder( object, APIItem ): - api_element_visible_keys = ( 'id', 'parent_id', 'name', 'description', 'item_count', 'genome_build' ) +class LibraryFolder( object, DictifiableMixin ): + dict_element_visible_keys = ( 'id', 'parent_id', 'name', 'description', 'item_count', 'genome_build' ) def __init__( self, name=None, description=None, item_count=0, order_id=None ): self.name = name or "Unnamed folder" self.description = description @@ -1910,8 +1900,8 @@ if isinstance( name, str ): name = unicode( name, 'utf-8' ) return name - def get_api_value( self, view='collection' ): - rval = super( LibraryFolder, self ).get_api_value( view=view ) + def dictify( self, view='collection' ): + rval = super( LibraryFolder, self ).dictify( view=view ) info_association, inherited = self.get_info_association() if info_association: if inherited: @@ -1976,7 +1966,7 @@ name = property( get_name, set_name ) def display_name( self ): self.library_dataset_dataset_association.display_name() - def get_api_value( self, view='collection' ): + def dictify( self, view='collection' ): # Since this class is a proxy to rather complex attributes we want to # display in other objects, we can't use the simpler method used by # other model classes. @@ -2106,7 +2096,7 @@ if restrict: return None, inherited return self.library_dataset.folder.get_info_association( inherited=True ) - def get_api_value( self, view='collection' ): + def dictify( self, view='collection' ): # Since this class is a proxy to rather complex attributes we want to # display in other objects, we can't use the simpler method used by # other model classes. @@ -2333,9 +2323,9 @@ self.id = None self.user = None -class StoredWorkflow( object, APIItem): - api_collection_visible_keys = ( 'id', 'name', 'published' ) - api_element_visible_keys = ( 'id', 'name', 'published' ) +class StoredWorkflow( object, DictifiableMixin): + dict_collection_visible_keys = ( 'id', 'name', 'published' ) + dict_element_visible_keys = ( 'id', 'name', 'published' ) def __init__( self ): self.id = None self.user = None @@ -2351,8 +2341,8 @@ new_swta.user = target_user self.tags.append(new_swta) - def get_api_value( self, view='collection', value_mapper = None ): - rval = APIItem.get_api_value(self, view=view, value_mapper = value_mapper) + def dictify( self, view='collection', value_mapper = None ): + rval = DictifiableMixin.dictify(self, view=view, value_mapper = value_mapper) tags_str_list = [] for tag in self.tags: tag_str = tag.user_tname @@ -2444,7 +2434,7 @@ return os.path.abspath( os.path.join( path, "metadata_%d.dat" % self.id ) ) -class FormDefinition( object, APIItem ): +class FormDefinition( object, DictifiableMixin ): # The following form_builder classes are supported by the FormDefinition class. supported_field_types = [ AddressField, CheckboxField, PasswordField, SelectField, TextArea, TextField, WorkflowField, WorkflowMappingField, HistoryField ] types = Bunch( REQUEST = 'Sequencing Request Form', @@ -2453,8 +2443,8 @@ RUN_DETAILS_TEMPLATE = 'Sample run details template', LIBRARY_INFO_TEMPLATE = 'Library information template', USER_INFO = 'User Information' ) - api_collection_visible_keys = ( 'id', 'name' ) - api_element_visible_keys = ( 'id', 'name', 'desc', 'form_definition_current_id', 'fields', 'layout' ) + dict_collection_visible_keys = ( 'id', 'name' ) + dict_element_visible_keys = ( 'id', 'name', 'desc', 'form_definition_current_id', 'fields', 'layout' ) def __init__( self, name=None, desc=None, fields=[], form_definition_current=None, form_type=None, layout=None ): self.name = name self.desc = desc @@ -2572,12 +2562,12 @@ self.form_definition = form_def self.content = content -class Request( object, APIItem ): +class Request( object, DictifiableMixin ): states = Bunch( NEW = 'New', SUBMITTED = 'In Progress', REJECTED = 'Rejected', COMPLETE = 'Complete' ) - api_collection_visible_keys = ( 'id', 'name', 'state' ) + dict_collection_visible_keys = ( 'id', 'name', 'state' ) def __init__( self, name=None, desc=None, request_type=None, user=None, form_values=None, notification=None ): self.name = name self.desc = desc @@ -2763,9 +2753,9 @@ def populate_actions( self, trans, item, param_dict=None ): return self.get_external_service_type( trans ).actions.populate( self, item, param_dict=param_dict ) -class RequestType( object, APIItem ): - api_collection_visible_keys = ( 'id', 'name', 'desc' ) - api_element_visible_keys = ( 'id', 'name', 'desc', 'request_form_id', 'sample_form_id' ) +class RequestType( object, DictifiableMixin ): + dict_collection_visible_keys = ( 'id', 'name', 'desc' ) + dict_element_visible_keys = ( 'id', 'name', 'desc', 'request_form_id', 'sample_form_id' ) rename_dataset_options = Bunch( NO = 'Do not rename', SAMPLE_NAME = 'Preprend sample name', EXPERIMENT_NAME = 'Prepend experiment name', @@ -2849,12 +2839,12 @@ self.request_type = request_type self.role = role -class Sample( object, APIItem ): +class Sample( object, DictifiableMixin ): # The following form_builder classes are supported by the Sample class. supported_field_types = [ CheckboxField, SelectField, TextField, WorkflowField, WorkflowMappingField, HistoryField ] bulk_operations = Bunch( CHANGE_STATE = 'Change state', SELECT_LIBRARY = 'Select data library and folder' ) - api_collection_visible_keys = ( 'id', 'name' ) + dict_collection_visible_keys = ( 'id', 'name' ) def __init__(self, name=None, desc=None, request=None, form_values=None, bar_code=None, library=None, folder=None, workflow=None, history=None): self.name = name self.desc = desc @@ -3179,9 +3169,9 @@ def __str__ ( self ): return "Tag(id=%s, type=%i, parent_id=%s, name=%s)" % ( self.id, self.type, self.parent_id, self.name ) -class ItemTagAssociation ( object, APIItem ): - api_collection_visible_keys = ( 'id', 'user_tname', 'user_value' ) - api_element_visible_keys = api_collection_visible_keys +class ItemTagAssociation ( object, DictifiableMixin ): + dict_collection_visible_keys = ( 'id', 'user_tname', 'user_value' ) + dict_element_visible_keys = dict_collection_visible_keys def __init__( self, id=None, user=None, item_id=None, tag_id=None, user_tname=None, value=None ): self.id = id @@ -3317,9 +3307,9 @@ pass class ToolShedRepository( object ): - api_collection_visible_keys = ( 'id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', + dict_collection_visible_keys = ( 'id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', 'update_available', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message' ) - api_element_visible_keys = ( 'id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', + dict_element_visible_keys = ( 'id', 'tool_shed', 'name', 'owner', 'installed_changeset_revision', 'changeset_revision', 'ctx_rev', 'includes_datatypes', 'update_available', 'deleted', 'uninstalled', 'dist_to_shed', 'status', 'error_message' ) installation_status = Bunch( NEW='New', CLONING='Cloning', @@ -3357,7 +3347,7 @@ self.status = status self.error_message = error_message def as_dict( self, value_mapper=None ): - return self.get_api_value( view='element', value_mapper=value_mapper ) + return self.dictify( view='element', value_mapper=value_mapper ) def repo_files_directory( self, app ): repo_path = self.repo_path( app ) if repo_path: @@ -3445,12 +3435,12 @@ if self.shed_config_filename == shed_tool_conf_dict[ 'config_filename' ]: return shed_tool_conf_dict return default - def get_api_value( self, view='collection', value_mapper=None ): + def dictify( self, view='collection', value_mapper=None ): if value_mapper is None: value_mapper = {} rval = {} try: - visible_keys = self.__getattribute__( 'api_' + view + '_visible_keys' ) + visible_keys = self.__getattribute__( 'dict_' + view + '_visible_keys' ) except AttributeError: raise Exception( 'Unknown API view: %s' % view ) for key in visible_keys: @@ -3548,7 +3538,8 @@ for rrda in self.required_repositories: repository_dependency = rrda.repository_dependency required_repository = repository_dependency.repository - required_repositories.append( required_repository ) + if required_repository: + required_repositories.append( required_repository ) return required_repositories @property def installed_repository_dependencies( self ): @@ -3568,6 +3559,7 @@ return missing_required_repositories @property def repository_dependencies_being_installed( self ): + """Return the repository's repository dependencies that are currently being installed.""" required_repositories_being_installed = [] for required_repository in self.repository_dependencies: if required_repository.status == self.installation_status.INSTALLING: @@ -3575,6 +3567,7 @@ return required_repositories_being_installed @property def repository_dependencies_missing_or_being_installed( self ): + """Return the repository's repository dependencies that are either missing or currently being installed.""" required_repositories_missing_or_being_installed = [] for required_repository in self.repository_dependencies: if required_repository.status in [ self.installation_status.ERROR, @@ -3585,6 +3578,7 @@ return required_repositories_missing_or_being_installed @property def repository_dependencies_with_installation_errors( self ): + """Return the repository's repository dependencies that have installation errors.""" required_repositories_with_installation_errors = [] for required_repository in self.repository_dependencies: if required_repository.status == self.installation_status.ERROR: @@ -3705,8 +3699,8 @@ self.tool_shed_repository.name, self.tool_shed_repository.installed_changeset_revision ) -class ToolVersion( object, APIItem ): - api_element_visible_keys = ( 'id', 'tool_shed_repository' ) +class ToolVersion( object, DictifiableMixin ): + dict_element_visible_keys = ( 'id', 'tool_shed_repository' ) def __init__( self, id=None, create_time=None, tool_id=None, tool_shed_repository=None ): self.id = id self.create_time = create_time @@ -3763,8 +3757,8 @@ return version_ids return [ tool_version.tool_id for tool_version in self.get_versions( app ) ] - def get_api_value( self, view='element' ): - rval = APIItem.get_api_value(self, view) + def dictify( self, view='element' ): + rval = DictifiableMixin.dictify(self, view) rval['tool_name'] = self.tool_id for a in self.parent_tool_association: rval['parent_tool_id'] = a.parent_id diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/model/item_attrs.py --- a/lib/galaxy/model/item_attrs.py +++ b/lib/galaxy/model/item_attrs.py @@ -158,36 +158,52 @@ class_name = '%sAnnotationAssociation' % item.__class__.__name__ return getattr( galaxy.model, class_name, None ) -class APIItem: - """ Mixin for api representation. """ - #api_collection_visible_keys = ( 'id' ) - #api_element_visible_keys = ( 'id' ) - def get_api_value( self, view='collection', value_mapper = None ): +class DictifiableMixin: + """ Mixin that enables objects to be converted to dictionaries. This is useful + when for sharing objects across boundaries, such as the API, tool scripts, + and JavaScript code. """ + + def dictify( self, view='collection', value_mapper=None ): + """ + Return item dictionary. + """ + + if not value_mapper: + value_mapper = {} + def get_value( key, item ): + """ + Recursive helper function to get item values. + """ + # FIXME: why use exception here? Why not look for key in value_mapper + # first and then default to dictify? try: - return item.get_api_value( view=view, value_mapper=value_mapper ) + return item.dictify( view=view, value_mapper=value_mapper ) except: if key in value_mapper: return value_mapper.get( key )( item ) return item - if value_mapper is None: - value_mapper = {} - rval = {} + + # Create dict to represent item. + rval = dict( + model_class=self.__class__.__name__ + ) + + # Fill item dict with visible keys. try: - visible_keys = self.__getattribute__( 'api_' + view + '_visible_keys' ) + visible_keys = self.__getattribute__( 'dict_' + view + '_visible_keys' ) except AttributeError: - raise Exception( 'Unknown API view: %s' % view ) + raise Exception( 'Unknown DictifiableMixin view: %s' % view ) for key in visible_keys: try: item = self.__getattribute__( key ) if type( item ) == InstrumentedList: - rval[key] = [] + rval[ key ] = [] for i in item: - rval[key].append( get_value( key, i ) ) + rval[ key ].append( get_value( key, i ) ) else: - rval[key] = get_value( key, item ) + rval[ key ] = get_value( key, item ) except AttributeError: - rval[key] = None + rval[ key ] = None - rval['model_class'] = self.__class__.__name__ return rval diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -560,7 +560,7 @@ return self.view.get_results(True) def item_to_api_value(self, item): - r = item.get_api_value( view='element' ) + r = item.dictify( view='element' ) if self.query.field_list.count("*"): return r o = {} diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/objectstore/__init__.py --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -5,14 +5,11 @@ """ import os -import sys import time import random import shutil import logging import threading -import subprocess -from datetime import datetime from galaxy import util from galaxy.jobs import Sleeper @@ -21,22 +18,14 @@ from sqlalchemy.orm import object_session -import multiprocessing -from galaxy.objectstore.s3_multipart_upload import multipart_upload -import boto -from boto.s3.key import Key -from boto.s3.connection import S3Connection -from boto.exception import S3ResponseError - log = logging.getLogger( __name__ ) -logging.getLogger('boto').setLevel(logging.INFO) # Otherwise boto is quite noisy class ObjectStore(object): """ ObjectStore abstract interface """ - def __init__(self): + def __init__(self, config, **kwargs): self.running = True self.extra_dirs = {} @@ -206,7 +195,7 @@ >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' """ def __init__(self, config, file_path=None, extra_dirs=None): - super(DiskObjectStore, self).__init__() + super(DiskObjectStore, self).__init__(config, file_path=file_path, extra_dirs=extra_dirs) self.file_path = file_path or config.file_path self.config = config self.extra_dirs['job_work'] = config.job_working_directory @@ -224,6 +213,7 @@ if not os.path.exists(path): return self._construct_path(obj, base_dir=base_dir, dir_only=dir_only, extra_dir=extra_dir, extra_dir_at_root=extra_dir_at_root, alt_name=alt_name) + # TODO: rename to _disk_path or something like that to avoid conflicts with children that'll use the local_extra_dirs decorator, e.g. S3 def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, **kwargs): """ Construct the expected absolute path for accessing the object identified by `obj`.id. @@ -297,7 +287,7 @@ util.umask_fix_perms(path, self.config.umask, 0666) def empty(self, obj, **kwargs): - return os.path.getsize(self.get_filename(obj, **kwargs)) > 0 + return os.path.getsize(self.get_filename(obj, **kwargs)) == 0 def size(self, obj, **kwargs): if self.exists(obj, **kwargs): @@ -373,490 +363,6 @@ super(CachingObjectStore, self).__init__(self, path, backend) -class S3ObjectStore(ObjectStore): - """ - Object store that stores objects as items in an AWS S3 bucket. A local - cache exists that is used as an intermediate location for files between - Galaxy and S3. - """ - def __init__(self, config): - super(S3ObjectStore, self).__init__() - self.config = config - self.staging_path = self.config.file_path - self.s3_conn = get_OS_connection(self.config) - self.bucket = self._get_bucket(self.config.os_bucket_name) - self.use_rr = self.config.os_use_reduced_redundancy - self.cache_size = self.config.object_store_cache_size - self.transfer_progress = 0 - # Clean cache only if value is set in universe_wsgi.ini - if self.cache_size != -1: - # Convert GBs to bytes for comparison - self.cache_size = self.cache_size * 1073741824 - # Helper for interruptable sleep - self.sleeper = Sleeper() - self.cache_monitor_thread = threading.Thread(target=self.__cache_monitor) - self.cache_monitor_thread.start() - log.info("Cache cleaner manager started") - # Test if 'axel' is available for parallel download and pull the key into cache - try: - subprocess.call('axel') - self.use_axel = True - except OSError: - self.use_axel = False - - def __cache_monitor(self): - time.sleep(2) # Wait for things to load before starting the monitor - while self.running: - total_size = 0 - # Is this going to be too expensive of an operation to be done frequently? - file_list = [] - for dirpath, dirnames, filenames in os.walk(self.staging_path): - for f in filenames: - fp = os.path.join(dirpath, f) - file_size = os.path.getsize(fp) - total_size += file_size - # Get the time given file was last accessed - last_access_time = time.localtime(os.stat(fp)[7]) - # Compose a tuple of the access time and the file path - file_tuple = last_access_time, fp, file_size - file_list.append(file_tuple) - # Sort the file list (based on access time) - file_list.sort() - # Initiate cleaning once within 10% of the defined cache size? - cache_limit = self.cache_size * 0.9 - if total_size > cache_limit: - log.info("Initiating cache cleaning: current cache size: %s; clean until smaller than: %s" \ - % (convert_bytes(total_size), convert_bytes(cache_limit))) - # How much to delete? If simply deleting up to the cache-10% limit, - # is likely to be deleting frequently and may run the risk of hitting - # the limit - maybe delete additional #%? - # For now, delete enough to leave at least 10% of the total cache free - delete_this_much = total_size - cache_limit - self.__clean_cache(file_list, delete_this_much) - self.sleeper.sleep(30) # Test cache size every 30 seconds? - - def __clean_cache(self, file_list, delete_this_much): - """ Keep deleting files from the file_list until the size of the deleted - files is greater than the value in delete_this_much parameter. - - :type file_list: list - :param file_list: List of candidate files that can be deleted. This method - will start deleting files from the beginning of the list so the list - should be sorted accordingly. The list must contains 3-element tuples, - positioned as follows: position 0 holds file last accessed timestamp - (as time.struct_time), position 1 holds file path, and position 2 has - file size (e.g., (<access time>, /mnt/data/dataset_1.dat), 472394) - - :type delete_this_much: int - :param delete_this_much: Total size of files, in bytes, that should be deleted. - """ - # Keep deleting datasets from file_list until deleted_amount does not - # exceed delete_this_much; start deleting from the front of the file list, - # which assumes the oldest files come first on the list. - deleted_amount = 0 - for i, f in enumerate(file_list): - if deleted_amount < delete_this_much: - deleted_amount += f[2] - os.remove(f[1]) - # Debugging code for printing deleted files' stats - # folder, file_name = os.path.split(f[1]) - # file_date = time.strftime("%m/%d/%y %H:%M:%S", f[0]) - # log.debug("%s. %-25s %s, size %s (deleted %s/%s)" \ - # % (i, file_name, convert_bytes(f[2]), file_date, \ - # convert_bytes(deleted_amount), convert_bytes(delete_this_much))) - else: - log.debug("Cache cleaning done. Total space freed: %s" % convert_bytes(deleted_amount)) - return - - def _get_bucket(self, bucket_name): - """ Sometimes a handle to a bucket is not established right away so try - it a few times. Raise error is connection is not established. """ - for i in range(5): - try: - bucket = self.s3_conn.get_bucket(bucket_name) - log.debug("Using cloud object store with bucket '%s'" % bucket.name) - return bucket - except S3ResponseError: - log.debug("Could not get bucket '%s', attempt %s/5" % (bucket_name, i+1)) - time.sleep(2) - # All the attempts have been exhausted and connection was not established, - # raise error - raise S3ResponseError - - def _fix_permissions(self, rel_path): - """ Set permissions on rel_path""" - for basedir, dirs, files in os.walk(rel_path): - util.umask_fix_perms(basedir, self.config.umask, 0777, self.config.gid) - for f in files: - path = os.path.join(basedir, f) - # Ignore symlinks - if os.path.islink(path): - continue - util.umask_fix_perms( path, self.config.umask, 0666, self.config.gid ) - - def _construct_path(self, obj, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, **kwargs): - rel_path = os.path.join(*directory_hash_id(obj.id)) - if extra_dir is not None: - if extra_dir_at_root: - rel_path = os.path.join(extra_dir, rel_path) - else: - rel_path = os.path.join(rel_path, extra_dir) - # S3 folders are marked by having trailing '/' so add it now - rel_path = '%s/' % rel_path - if not dir_only: - rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) - return rel_path - - def _get_cache_path(self, rel_path): - return os.path.abspath(os.path.join(self.staging_path, rel_path)) - - def _get_transfer_progress(self): - return self.transfer_progress - - def _get_size_in_s3(self, rel_path): - try: - key = self.bucket.get_key(rel_path) - if key: - return key.size - except S3ResponseError, ex: - log.error("Could not get size of key '%s' from S3: %s" % (rel_path, ex)) - except Exception, ex: - log.error("Could not get reference to the key object '%s'; returning -1 for key size: %s" % (rel_path, ex)) - return -1 - - def _key_exists(self, rel_path): - exists = False - try: - # A hackish way of testing if the rel_path is a folder vs a file - is_dir = rel_path[-1] == '/' - if is_dir: - rs = self.bucket.get_all_keys(prefix=rel_path) - if len(rs) > 0: - exists = True - else: - exists = False - else: - key = Key(self.bucket, rel_path) - exists = key.exists() - except S3ResponseError, ex: - log.error("Trouble checking existence of S3 key '%s': %s" % (rel_path, ex)) - return False - #print "Checking if '%s' exists in S3: %s" % (rel_path, exists) - if rel_path[0] == '/': - raise - return exists - - def _in_cache(self, rel_path): - """ Check if the given dataset is in the local cache and return True if so. """ - # log.debug("------ Checking cache for rel_path %s" % rel_path) - cache_path = self._get_cache_path(rel_path) - return os.path.exists(cache_path) - # TODO: Part of checking if a file is in cache should be to ensure the - # size of the cached file matches that on S3. Once the upload tool explicitly - # creates, this check sould be implemented- in the mean time, it's not - # looking likely to be implementable reliably. - # if os.path.exists(cache_path): - # # print "***1 %s exists" % cache_path - # if self._key_exists(rel_path): - # # print "***2 %s exists in S3" % rel_path - # # Make sure the size in cache is available in its entirety - # # print "File '%s' cache size: %s, S3 size: %s" % (cache_path, os.path.getsize(cache_path), self._get_size_in_s3(rel_path)) - # if os.path.getsize(cache_path) == self._get_size_in_s3(rel_path): - # # print "***2.1 %s exists in S3 and the size is the same as in cache (in_cache=True)" % rel_path - # exists = True - # else: - # # print "***2.2 %s exists but differs in size from cache (in_cache=False)" % cache_path - # exists = False - # else: - # # Although not perfect decision making, this most likely means - # # that the file is currently being uploaded - # # print "***3 %s found in cache but not in S3 (in_cache=True)" % cache_path - # exists = True - # else: - # return False - - def _pull_into_cache(self, rel_path): - # Ensure the cache directory structure exists (e.g., dataset_#_files/) - rel_path_dir = os.path.dirname(rel_path) - if not os.path.exists(self._get_cache_path(rel_path_dir)): - os.makedirs(self._get_cache_path(rel_path_dir)) - # Now pull in the file - ok = self._download(rel_path) - self._fix_permissions(self._get_cache_path(rel_path_dir)) - return ok - - def _transfer_cb(self, complete, total): - self.transfer_progress += 10 - - def _download(self, rel_path): - try: - log.debug("Pulling key '%s' into cache to %s" % (rel_path, self._get_cache_path(rel_path))) - key = self.bucket.get_key(rel_path) - # Test if cache is large enough to hold the new file - if self.cache_size > 0 and key.size > self.cache_size: - log.critical("File %s is larger (%s) than the cache size (%s). Cannot download." \ - % (rel_path, key.size, self.cache_size)) - return False - if self.use_axel: - log.debug("Parallel pulled key '%s' into cache to %s" % (rel_path, self._get_cache_path(rel_path))) - ncores = multiprocessing.cpu_count() - url = key.generate_url(7200) - ret_code = subprocess.call("axel -a -n %s '%s'" % (ncores, url)) - if ret_code == 0: - return True - else: - log.debug("Pulled key '%s' into cache to %s" % (rel_path, self._get_cache_path(rel_path))) - self.transfer_progress = 0 # Reset transfer progress counter - key.get_contents_to_filename(self._get_cache_path(rel_path), cb=self._transfer_cb, num_cb=10) - return True - except S3ResponseError, ex: - log.error("Problem downloading key '%s' from S3 bucket '%s': %s" % (rel_path, self.bucket.name, ex)) - return False - - def _push_to_os(self, rel_path, source_file=None, from_string=None): - """ - Push the file pointed to by ``rel_path`` to the object store naming the key - ``rel_path``. If ``source_file`` is provided, push that file instead while - still using ``rel_path`` as the key name. - If ``from_string`` is provided, set contents of the file to the value of - the string. - """ - try: - source_file = source_file if source_file else self._get_cache_path(rel_path) - if os.path.exists(source_file): - key = Key(self.bucket, rel_path) - if os.path.getsize(source_file) == 0 and key.exists(): - log.debug("Wanted to push file '%s' to S3 key '%s' but its size is 0; skipping." % (source_file, rel_path)) - return True - if from_string: - key.set_contents_from_string(from_string, reduced_redundancy=self.use_rr) - log.debug("Pushed data from string '%s' to key '%s'" % (from_string, rel_path)) - else: - start_time = datetime.now() - # print "Pushing cache file '%s' of size %s bytes to key '%s'" % (source_file, os.path.getsize(source_file), rel_path) - # print "+ Push started at '%s'" % start_time - mb_size = os.path.getsize(source_file) / 1e6 - if mb_size < 60 or self.config.object_store == 'swift': - self.transfer_progress = 0 # Reset transfer progress counter - key.set_contents_from_filename(source_file, reduced_redundancy=self.use_rr, - cb=self._transfer_cb, num_cb=10) - else: - multipart_upload(self.bucket, key.name, source_file, mb_size, use_rr=self.use_rr) - end_time = datetime.now() - # print "+ Push ended at '%s'; %s bytes transfered in %ssec" % (end_time, os.path.getsize(source_file), end_time-start_time) - log.debug("Pushed cache file '%s' to key '%s' (%s bytes transfered in %s sec)" % (source_file, rel_path, os.path.getsize(source_file), end_time-start_time)) - return True - else: - log.error("Tried updating key '%s' from source file '%s', but source file does not exist." - % (rel_path, source_file)) - except S3ResponseError, ex: - log.error("Trouble pushing S3 key '%s' from file '%s': %s" % (rel_path, source_file, ex)) - return False - - def file_ready(self, obj, **kwargs): - """ - A helper method that checks if a file corresponding to a dataset is - ready and available to be used. Return ``True`` if so, ``False`` otherwise. - """ - rel_path = self._construct_path(obj, **kwargs) - # Make sure the size in cache is available in its entirety - if self._in_cache(rel_path): - if os.path.getsize(self._get_cache_path(rel_path)) == self._get_size_in_s3(rel_path): - return True - log.debug("Waiting for dataset {0} to transfer from OS: {1}/{2}".format(rel_path, - os.path.getsize(self._get_cache_path(rel_path)), self._get_size_in_s3(rel_path))) - return False - - def exists(self, obj, **kwargs): - in_cache = in_s3 = False - rel_path = self._construct_path(obj, **kwargs) - # Check cache - if self._in_cache(rel_path): - in_cache = True - # Check S3 - in_s3 = self._key_exists(rel_path) - # log.debug("~~~~~~ File '%s' exists in cache: %s; in s3: %s" % (rel_path, in_cache, in_s3)) - # dir_only does not get synced so shortcut the decision - dir_only = kwargs.get('dir_only', False) - if dir_only: - if in_cache or in_s3: - return True - else: - return False - # TODO: Sync should probably not be done here. Add this to an async upload stack? - if in_cache and not in_s3: - self._push_to_os(rel_path, source_file=self._get_cache_path(rel_path)) - return True - elif in_s3: - return True - else: - return False - - def create(self, obj, **kwargs): - if not self.exists(obj, **kwargs): - #print "S3 OS creating a dataset with ID %s" % kwargs - # Pull out locally used fields - extra_dir = kwargs.get('extra_dir', None) - extra_dir_at_root = kwargs.get('extra_dir_at_root', False) - dir_only = kwargs.get('dir_only', False) - alt_name = kwargs.get('alt_name', None) - # print "---- Processing: %s; %s" % (alt_name, locals()) - # Construct hashed path - rel_path = os.path.join(*directory_hash_id(obj.id)) - - # Optionally append extra_dir - if extra_dir is not None: - if extra_dir_at_root: - rel_path = os.path.join(extra_dir, rel_path) - else: - rel_path = os.path.join(rel_path, extra_dir) - # Create given directory in cache - cache_dir = os.path.join(self.staging_path, rel_path) - if not os.path.exists(cache_dir): - os.makedirs(cache_dir) - # Although not really necessary to create S3 folders (because S3 has - # flat namespace), do so for consistency with the regular file system - # S3 folders are marked by having trailing '/' so add it now - # s3_dir = '%s/' % rel_path - # self._push_to_os(s3_dir, from_string='') - # If instructed, create the dataset in cache & in S3 - if not dir_only: - rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) - open(os.path.join(self.staging_path, rel_path), 'w').close() - self._push_to_os(rel_path, from_string='') - - def empty(self, obj, **kwargs): - if self.exists(obj, **kwargs): - return bool(self.size(obj, **kwargs) > 0) - else: - raise ObjectNotFound( 'objectstore.empty, object does not exist: %s, kwargs: %s' - %( str( obj ), str( kwargs ) ) ) - - def size(self, obj, **kwargs): - rel_path = self._construct_path(obj, **kwargs) - if self._in_cache(rel_path): - try: - return os.path.getsize(self._get_cache_path(rel_path)) - except OSError, ex: - log.info("Could not get size of file '%s' in local cache, will try S3. Error: %s" % (rel_path, ex)) - elif self.exists(obj, **kwargs): - return self._get_size_in_s3(rel_path) - log.warning("Did not find dataset '%s', returning 0 for size" % rel_path) - return 0 - - def delete(self, obj, entire_dir=False, **kwargs): - rel_path = self._construct_path(obj, **kwargs) - extra_dir = kwargs.get('extra_dir', None) - try: - # For the case of extra_files, because we don't have a reference to - # individual files/keys we need to remove the entire directory structure - # with all the files in it. This is easy for the local file system, - # but requires iterating through each individual key in S3 and deleing it. - if entire_dir and extra_dir: - shutil.rmtree(self._get_cache_path(rel_path)) - rs = self.bucket.get_all_keys(prefix=rel_path) - for key in rs: - log.debug("Deleting key %s" % key.name) - key.delete() - return True - else: - # Delete from cache first - os.unlink(self._get_cache_path(rel_path)) - # Delete from S3 as well - if self._key_exists(rel_path): - key = Key(self.bucket, rel_path) - log.debug("Deleting key %s" % key.name) - key.delete() - return True - except S3ResponseError, ex: - log.error("Could not delete key '%s' from S3: %s" % (rel_path, ex)) - except OSError, ex: - log.error('%s delete error %s' % (self._get_filename(obj, **kwargs), ex)) - return False - - def get_data(self, obj, start=0, count=-1, **kwargs): - rel_path = self._construct_path(obj, **kwargs) - # Check cache first and get file if not there - if not self._in_cache(rel_path): - self._pull_into_cache(rel_path) - #else: - # print "(cccc) Getting '%s' from cache" % self._get_cache_path(rel_path) - # Read the file content from cache - data_file = open(self._get_cache_path(rel_path), 'r') - data_file.seek(start) - content = data_file.read(count) - data_file.close() - return content - - def get_filename(self, obj, **kwargs): - #print "S3 get_filename for dataset: %s" % dataset_id - dir_only = kwargs.get('dir_only', False) - rel_path = self._construct_path(obj, **kwargs) - cache_path = self._get_cache_path(rel_path) - # S3 does not recognize directories as files so cannot check if those exist. - # So, if checking dir only, ensure given dir exists in cache and return - # the expected cache path. - # dir_only = kwargs.get('dir_only', False) - # if dir_only: - # if not os.path.exists(cache_path): - # os.makedirs(cache_path) - # return cache_path - # Check if the file exists in the cache first - if self._in_cache(rel_path): - return cache_path - # Check if the file exists in persistent storage and, if it does, pull it into cache - elif self.exists(obj, **kwargs): - if dir_only: # Directories do not get pulled into cache - return cache_path - else: - if self._pull_into_cache(rel_path): - return cache_path - # For the case of retrieving a directory only, return the expected path - # even if it does not exist. - # if dir_only: - # return cache_path - raise ObjectNotFound( 'objectstore.get_filename, no cache_path: %s, kwargs: %s' - %( str( obj ), str( kwargs ) ) ) - # return cache_path # Until the upload tool does not explicitly create the dataset, return expected path - - def update_from_file(self, obj, file_name=None, create=False, **kwargs): - if create: - self.create(obj, **kwargs) - if self.exists(obj, **kwargs): - rel_path = self._construct_path(obj, **kwargs) - # Chose whether to use the dataset file itself or an alternate file - if file_name: - source_file = os.path.abspath(file_name) - # Copy into cache - cache_file = self._get_cache_path(rel_path) - try: - if source_file != cache_file: - # FIXME? Should this be a `move`? - shutil.copy2(source_file, cache_file) - self._fix_permissions(cache_file) - except OSError, ex: - log.error("Trouble copying source file '%s' to cache '%s': %s" % (source_file, cache_file, ex)) - else: - source_file = self._get_cache_path(rel_path) - # Update the file on S3 - self._push_to_os(rel_path, source_file) - else: - raise ObjectNotFound( 'objectstore.update_from_file, object does not exist: %s, kwargs: %s' - %( str( obj ), str( kwargs ) ) ) - - def get_object_url(self, obj, **kwargs): - if self.exists(obj, **kwargs): - rel_path = self._construct_path(obj, **kwargs) - try: - key = Key(self.bucket, rel_path) - return key.generate_url(expires_in = 86400) # 24hrs - except S3ResponseError, ex: - log.warning("Trouble generating URL for dataset '%s': %s" % (rel_path, ex)) - return None - - def get_store_usage_percent(self): - return 0.0 - class DistributedObjectStore(ObjectStore): """ ObjectStore that defers to a list of backends, for getting objects the @@ -1011,6 +517,7 @@ return id return None + class HierarchicalObjectStore(ObjectStore): """ ObjectStore that defers to a list of backends, for getting objects the @@ -1021,6 +528,7 @@ def __init__(self, backends=[]): super(HierarchicalObjectStore, self).__init__() + def build_object_store_from_config(config, fsmon=False): """ Depending on the configuration setting, invoke the appropriate object store """ @@ -1028,14 +536,31 @@ if store == 'disk': return DiskObjectStore(config=config) elif store == 's3' or store == 'swift': + from galaxy.objectstore.s3 import S3ObjectStore return S3ObjectStore(config=config) elif store == 'distributed': return DistributedObjectStore(config=config, fsmon=fsmon) elif store == 'hierarchical': return HierarchicalObjectStore() + elif store == 'irods': + from galaxy.objectstore.rods import IRODSObjectStore + return IRODSObjectStore(config=config) else: log.error("Unrecognized object store definition: {0}".format(store)) +def local_extra_dirs( func ): + """ A decorator for non-local plugins to utilize local directories for their extra_dirs (job_working_directory and temp). + """ + def wraps( self, *args, **kwargs ): + if kwargs.get( 'base_dir', None ) is None: + return func( self, *args, **kwargs ) + else: + for c in self.__class__.__mro__: + if c.__name__ == 'DiskObjectStore': + return getattr( c, func.__name__ )( self, *args, **kwargs ) + raise Exception( "Could not call DiskObjectStore's %s method, does your Object Store plugin inherit from DiskObjectStore?" % func.__name__ ) + return wraps + def convert_bytes(bytes): """ A helper function used for pretty printing disk usage """ if bytes is None: @@ -1057,25 +582,3 @@ else: size = '%.2fb' % bytes return size - -def get_OS_connection(config): - """ - Get a connection object for a cloud Object Store specified in the config. - Currently, this is a ``boto`` connection object. - """ - log.debug("Getting a connection object for '{0}' object store".format(config.object_store)) - a_key = config.os_access_key - s_key = config.os_secret_key - if config.object_store == 's3': - return S3Connection(a_key, s_key) - else: - # Establish the connection now - calling_format = boto.s3.connection.OrdinaryCallingFormat() - s3_conn = boto.connect_s3(aws_access_key_id=a_key, - aws_secret_access_key=s_key, - is_secure=config.os_is_secure, - host=config.os_host, - port=int(config.os_port), - calling_format=calling_format, - path=config.os_conn_path) - return s3_conn diff -r 91bece401d51a8b9be8850208bd56fa838087a64 -r 0bdee4933794f61eec10e80d2685ae07e073a796 lib/galaxy/objectstore/rods.py --- /dev/null +++ b/lib/galaxy/objectstore/rods.py @@ -0,0 +1,332 @@ +""" +Object Store plugin for the Integrated Rule-Oriented Data Store (iRODS) + +The module is named rods to avoid conflicting with the PyRods module, irods +""" + +import os +import time +import errno +import logging +#import traceback + +from posixpath import join as path_join +from posixpath import basename as path_basename +from posixpath import dirname as path_dirname + +from galaxy.objectstore import DiskObjectStore, ObjectStore, local_extra_dirs +from galaxy.exceptions import ObjectNotFound, ObjectInvalid + +import galaxy.eggs +galaxy.eggs.require( 'PyRods' ) +import irods + +log = logging.getLogger( __name__ ) + + +class IRODSObjectStore( DiskObjectStore, ObjectStore ): + """ + Galaxy object store based on iRODS + """ + def __init__( self, config, file_path=None, extra_dirs=None ): + super( IRODSObjectStore, self ).__init__( config, file_path=file_path, extra_dirs=extra_dirs ) + self.cache_path = config.object_store_cache_path + self.default_resource = config.irods_default_resource or None + + # Connect to iRODS (AssertionErrors will be raised if anything goes wrong) + self.rods_env, self.rods_conn = rods_connect() + + # if the root collection path in the config is unset or relative, try to use a sensible default + if config.irods_root_collection_path is None or ( config.irods_root_collection_path is not None and not config.irods_root_collection_path.startswith( '/' ) ): + rods_home = self.rods_env.rodsHome + assert rods_home != '', "Unable to initialize iRODS Object Store: rodsHome cannot be determined and irods_root_collection_path in Galaxy config is unset or not absolute." + if config.irods_root_collection_path is None: + self.root_collection_path = path_join( rods_home, 'galaxy_data' ) + else: + self.root_collection_path = path_join( rods_home, config.irods_root_collection_path ) + else: + self.root_collection_path = config.irods_root_collection_path + + # will return a collection object regardless of whether it exists + self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path ) + + if self.root_collection.getId() == -1: + log.warning( "iRODS root collection does not exist, will attempt to create: %s", self.root_collection_path ) + self.root_collection.upCollection() + assert self.root_collection.createCollection( os.path.basename( self.root_collection_path ) ) == 0, "iRODS root collection creation failed: %s" % self.root_collection_path + self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path ) + assert self.root_collection.getId() != -1, "iRODS root collection creation claimed success but still does not exist" + + if self.default_resource is None: + self.default_resource = self.rods_env.rodsDefResource + + log.info( "iRODS data for this instance will be stored in collection: %s, resource: %s", self.root_collection_path, self.default_resource ) + + def __get_rods_path( self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs ): + path = "" + if extra_dir is not None: + path = extra_dir + + # extra_dir_at_root is ignored - since the iRODS plugin does not use + # the directory hash, there is only one level of subdirectory. + + if not dir_only: + # the .dat extension is stripped when stored in iRODS + # TODO: is the strip_dat kwarg the best way to implement this? + if strip_dat and alt_name and alt_name.endswith( '.dat' ): + alt_name = os.path.splitext( alt_name )[0] + default_name = 'dataset_%s' % obj.id + if not strip_dat: + default_name += '.dat' + path = path_join( path, alt_name if alt_name else default_name ) + + path = path_join( self.root_collection_path, path ) + + #log.debug( 'iRODS path for %s %s is %s', obj.__class__.__name__, obj.id, path ) + + return path + + def __get_cache_path( self, obj, **kwargs ): + # FIXME: does not handle collections + # FIXME: collisions could occur here + return os.path.join( self.cache_path, path_basename( self.__get_rods_path( obj, strip_dat=False, **kwargs ) ) ) + + def __clean_cache_entry( self, obj, **kwargs ): + # FIXME: does not handle collections + try: + os.unlink( self.__get_cache_path( obj, **kwargs ) ) + except OSError: + # it is expected that we'll call this method a lot regardless of + # whether we think the cached file exists + pass + + def __get_rods_handle( self, obj, mode='r', **kwargs ): + if kwargs.get( 'dir_only', False ): + return irods.irodsCollection( self.rods_conn, self.__get_rods_path( obj, **kwargs ) ) + else: + return irods.irodsOpen( self.rods_conn, self.__get_rods_path( obj, **kwargs ), mode ) + + def __mkcolls( self, rods_path ): + """ + An os.makedirs() for iRODS collections. `rods_path` is the desired collection to create. + """ + assert rods_path.startswith( self.root_collection_path + '/' ), '__mkcolls(): Creating collections outside the root collection is not allowed (requested path was: %s)' % rods_path + mkcolls = [] + c = irods.irodsCollection( self.rods_conn, rods_path ) + while c.getId() == -1: + assert c.getCollName().startswith( self.root_collection_path + '/' ), '__mkcolls(): Attempted to move above the root collection: %s' % c.getCollName() + mkcolls.append( c.getCollName() ) + c.upCollection() + for collname in reversed( mkcolls ): + log.debug( 'Creating collection %s' % collname ) + ci = irods.collInp_t() + ci.collName = collname + status = rcCollCreate( self.rods_conn, ci ) + assert status == 0, '__mkcolls(): Failed to create collection: %s' % collname + + @local_extra_dirs + def exists( self, obj, **kwargs ): + doi = irods.dataObjInp_t() + doi.objPath = self.__get_rods_path( obj, **kwargs ) + log.debug( 'exists(): checking: %s', doi.objPath ) + return irods.rcObjStat( self.rods_conn, doi ) is not None + + @local_extra_dirs + def create(self, obj, **kwargs): + if not self.exists( obj, **kwargs ): + rods_path = self.__get_rods_path( obj, **kwargs ) + log.debug( 'create(): %s', rods_path ) + dir_only = kwargs.get( 'dir_only', False ) + # short circuit collection creation since most of the time it will + # be the root collection which already exists + collection_path = rods_path if dir_only else path_dirname( rods_path ) + if collection_path != self.root_collection_path: + self.__mkcolls( collection_path ) + if not dir_only: + # rcDataObjCreate is used instead of the irodsOpen wrapper so + # that we can prevent overwriting + doi = irods.dataObjInp_t() + doi.objPath = rods_path + doi.createMode = 0640 + doi.dataSize = 0 # 0 actually means "unknown", although literally 0 would be preferable + irods.addKeyVal( doi.condInput, irods.DEST_RESC_NAME_KW, self.default_resource ) + status = irods.rcDataObjCreate( self.rods_conn, doi ) + assert status >= 0, 'create(): rcDataObjCreate() failed: %s: %s: %s' % ( rods_path, status, irods.strerror( status ) ) + + @local_extra_dirs + def empty( self, obj, **kwargs ): + assert 'dir_only' not in kwargs, 'empty(): `dir_only` parameter is invalid here' + h = self.__get_rods_handle( obj, **kwargs ) + try: + return h.getSize() == 0 + except AttributeError: + # h is None + raise ObjectNotFound() + + def size( self, obj, **kwargs ): + assert 'dir_only' not in kwargs, 'size(): `dir_only` parameter is invalid here' + h = self.__get_rods_handle( obj, **kwargs ) + try: + return h.getSize() + except AttributeError: + # h is None + return 0 + + @local_extra_dirs + def delete( self, obj, entire_dir=False, **kwargs ): + assert 'dir_only' not in kwargs, 'delete(): `dir_only` parameter is invalid here' + rods_path = self.__get_rods_path( obj, **kwargs ) + # __get_rods_path prepends self.root_collection_path but we are going + # to ensure that it's valid anyway for safety's sake + assert rods_path.startswith( self.root_collection_path + '/' ), 'ERROR: attempt to delete object outside root collection (path was: %s)' % rods_path + if entire_dir: + # TODO + raise NotImplementedError() + h = self.__get_rods_handle( obj, **kwargs ) + try: + # note: PyRods' irodsFile.delete() does not set force + status = h.delete() + assert status == 0, '%d: %s' % ( status, irods.strerror( status ) ) + return True + except AttributeError: + log.warning( 'delete(): operation failed: object does not exist: %s', rods_path ) + except AssertionError, e: + # delete() does not raise on deletion failure + log.error( 'delete(): operation failed: %s', e ) + finally: + # remove the cached entry (finally is executed even when the try + # contains a return) + self.__clean_cache_entry( self, obj, **kwargs ) + return False + + @local_extra_dirs + def get_data( self, obj, start=0, count=-1, **kwargs ): + log.debug( 'get_data(): %s' ) + h = self.__get_rods_handle( obj, **kwargs ) + try: + h.seek( start ) + except AttributeError: + raise ObjectNotFound() + if count == -1: + return h.read() + else: + return f.read( count ) + # TODO: make sure implicit close is okay, DiskObjectStore actually + # reads data into a var, closes, and returns the var + + @local_extra_dirs + def get_filename( self, obj, **kwargs ): + log.debug( "get_filename(): called on %s %s. For better performance, avoid this method and use get_data() instead.", obj.__class__.__name__, obj.id ) + + # For finding all places where get_filename is called... + #log.debug( ''.join( traceback.format_stack() ) ) + + cached_path = self.__get_cache_path( obj, **kwargs ) + + if not self.exists( obj, **kwargs ): + raise ObjectNotFound() + + # TODO: implement or define whether dir_only is valid + if 'dir_only' in kwargs: + raise NotImplementedError() + + # cache hit + if os.path.exists( cached_path ): + return os.path.abspath( cached_path ) + + # cache miss + # TODO: thread this + incoming_path = os.path.join( os.path.dirname( cached_path ), "__incoming_%s" % os.path.basename( cached_path ) ) + doi = irods.dataObjInp_t() + doi.objPath = self.__get_rods_path( obj, **kwargs ) + doi.dataSize = 0 # TODO: does this affect performance? should we get size? + doi.numThreads = 0 + # TODO: might want to VERIFY_CHKSUM_KW + log.debug( 'get_filename(): caching %s to %s', doi.objPath, incoming_path ) + + # do the iget + status = irods.rcDataObjGet( self.rods_conn, doi, incoming_path ) + + # if incoming already exists, we'll wait for another process or thread + # to finish caching + if status != irods.OVERWRITE_WITHOUT_FORCE_FLAG: + assert status == 0, 'get_filename(): iget %s failed (%s): %s' % ( doi.objPath, status, irods.strerror( status ) ) + # POSIX rename is atomic + # TODO: rename without clobbering + os.rename( incoming_path, cached_path ) + log.debug( 'get_filename(): cached %s to %s', doi.objPath, cached_path ) + + # another process or thread is caching, wait for it + while not os.path.exists( cached_path ): + # TODO: force restart after mod time > some configurable, or + # otherwise deal with this potential deadlock and interrupted + # transfers + time.sleep( 5 ) + log.debug( "get_filename(): waiting on incoming '%s' for %s %s", incoming_path, obj.__class__.__name__, obj.id ) + + return os.path.abspath( cached_path ) + + @local_extra_dirs + def update_from_file(self, obj, file_name=None, create=False, **kwargs): + assert 'dir_only' not in kwargs, 'update_from_file(): `dir_only` parameter is invalid here' + + # do not create if not requested + if create and not self.exists( obj, **kwargs ): + raise ObjectNotFound() + + if file_name is None: + file_name = self.__get_cache_path( obj, **kwargs ) + + # put will create if necessary + doi = irods.dataObjInp_t() + doi.objPath = self.__get_rods_path( obj, **kwargs ) + doi.createMode = 0640 + doi.dataSize = os.stat( file_name ).st_size + doi.numThreads = 0 + irods.addKeyVal( doi.condInput, irods.DEST_RESC_NAME_KW, self.default_resource ) + irods.addKeyVal( doi.condInput, irods.FORCE_FLAG_KW, '' ) + # TODO: might want to VERIFY_CHKSUM_KW + log.debug( 'update_from_file(): updating %s to %s', file_name, doi.objPath ) + + # do the iput + status = irods.rcDataObjPut( self.rods_conn, doi, file_name ) + assert status == 0, 'update_from_file(): iput %s failed (%s): %s' % ( doi.objPath, status, irods.strerror( status ) ) + + def get_object_url(self, obj, **kwargs): + return None + + def get_store_usage_percent(self): + return 0.0 + +# monkeypatch an strerror method into the irods module +def _rods_strerror( errno ): + """ + The missing `strerror` for iRODS error codes + """ + if not hasattr( irods, '__rods_strerror_map' ): + irods.__rods_strerror_map = {} + for name in dir( irods ): + v = getattr( irods, name ) + if type( v ) == int and v < 0: + irods.__rods_strerror_map[ v ] = name + return irods.__rods_strerror_map.get( errno, 'GALAXY_NO_ERRNO_MAPPING_FOUND' ) + +irods.strerror = _rods_strerror + +def rods_connect(): + """ + A basic iRODS connection mechanism that connects using the current iRODS + environment + """ + status, env = irods.getRodsEnv() + assert status == 0, 'connect(): getRodsEnv() failed (%s): %s' % ( status, irods.strerror( status ) ) + conn, err = irods.rcConnect( env.rodsHost, + env.rodsPort, + env.rodsUserName, + env.rodsZone ) + assert err.status == 0, 'connect(): rcConnect() failed (%s): %s' % ( err.status, err.msg ) + status, pw = irods.obfGetPw() + assert status == 0, 'connect(): getting password with obfGetPw() failed (%s): %s' % ( status, irods.strerror( status ) ) + status = irods.clientLoginWithObfPassword( conn, pw ) + assert status == 0, 'connect(): logging in with clientLoginWithObfPassword() failed (%s): %s' % ( status, irods.strerror( status ) ) + return env, conn This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/e9924d0e725e/ Changeset: e9924d0e725e User: ross...@gmail.com Date: 2013-10-05 01:51:18 Summary: local changes Affected #: 6 files diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc lib/galaxy/tools/actions/__init__.py --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -200,7 +200,7 @@ if data.dbkey not in [None, '?']: input_dbkey = data.dbkey data_name_sane = re.sub('[^a-zA-Z0-9_]+', '', data.name) - if trans.app.config.use_data_id_on_string: + if not trans.app.config.use_data_id_on_string: # we want names in our on_strings not numbers input_names.append(data_name_sane) else: diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc scripts/functional_tests.py --- a/scripts/functional_tests.py +++ b/scripts/functional_tests.py @@ -454,4 +454,5 @@ return 1 if __name__ == "__main__": + print '\n\n\n\n#### SGE_ROOT=', os.environ.get('SGE_ROOT','##### no SGE_ROOT!'),'\n\n\n' sys.exit( main() ) diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -1,6 +1,7 @@ <?xml version="1.0"?><toolbox><section name="Get Data" id="getext"> + <tool file="rlGAT/rlGAT.xml"/><tool file="data_source/upload.xml"/><tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_test.xml" /> diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc tools/rgedgeR/rgedgeRpaired.xml --- a/tools/rgedgeR/rgedgeRpaired.xml +++ b/tools/rgedgeR/rgedgeRpaired.xml @@ -682,11 +682,12 @@ # DESeq2 require('DESeq2') library('RColorBrewer') - pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) if (length(subjects) == 0) { + pdata = data.frame(Name=colnames(workCM),Rx=group,row.names=colnames(workCM)) deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ Rx)) } else { + pdata = data.frame(Name=colnames(workCM),Rx=group,subjects=subjects,row.names=colnames(workCM)) deSEQds = DESeqDataSetFromMatrix(countData = workCM, colData = pdata, design = formula(~ subjects + Rx)) } #DESeq2 = DESeq(deSEQds,fitType='local',pAdjustMethod=fdrtype) diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc tools/rgenetics/bams2mx.py --- a/tools/rgenetics/bams2mx.py +++ b/tools/rgenetics/bams2mx.py @@ -296,6 +296,8 @@ bcolname = [x.split(',')[2].replace("'",'').replace('"','') for x in bamdat] assert len(bamf) == len(baif) == len(bcolname), '##ERROR bams2mx: Count of bam/bai/cname not consistent - %d/%d/%d' % (len(bamf),len(baif),len(bcolname)) for i,b in enumerate(bamf): + if b.lower() == "none": + continue assert os.path.isfile(b),'## Supplied input bam file "%s" not found' % b bn = os.path.basename(b) tf,tbam = tempfile.mkstemp(suffix='%s.bam' % bn,dir=opts.tmpdir) diff -r 0bdee4933794f61eec10e80d2685ae07e073a796 -r e9924d0e725e0fe631cb44cef873e462a24f8fbc tools/rgenetics/bams2mx.xml --- a/tools/rgenetics/bams2mx.xml +++ b/tools/rgenetics/bams2mx.xml @@ -27,7 +27,7 @@ truevalue="true" falsevalue="false" checked="no" help="Controls counting of optical/pcr duplicates if flagged by upstream processing - leave alone unless you know what you are doing" /><param name="firstbamf" type="data" label="BAM file from your history to count reads overlapping BED regions" format="bam" /><repeat name="bamfiles" title="Additional BAM files from your history to count reads overlapping BED regions" min="10"> - <param name="bamf" type="data" label="Additional BAM file from your history" format="bam" size="100"/> + <param name="bamf" type="data" label="Additional BAM file from your history" format="bam" size="100" optional="true"/></repeat></inputs><outputs> Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.