commit/galaxy-central: 6 new changesets
6 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/c21482ac06f8/ Changeset: c21482ac06f8 Branch: next-stable User: natefoo Date: 2014-01-27 19:37:33 Summary: Remove 4 tools from tool_conf.xml.main that were removed in migration 9. Affected #: 1 file diff -r 43d7d4a42cd2a3e31e02d86d0cf7d66dc928c631 -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -139,7 +139,6 @@ <tool file="plotting/histogram2.xml" /><tool file="plotting/scatterplot.xml" /><tool file="plotting/boxplot.xml" /> - <tool file="visualization/GMAJ.xml" /><tool file="visualization/build_ucsc_custom_track.xml" /><tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /> @@ -175,9 +174,6 @@ <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /><!-- <tool file="hyphy/hyphy_dnds_wrapper.xml" /> --></section> - <section id="motifs" name="Motif Tools"> - <tool file="rgenetics/rgWebLogo3.xml" /> - </section><section id="clustal" name="Multiple Alignments"><tool file="rgenetics/rgClustalw.xml" /></section> @@ -253,10 +249,6 @@ <tool file="gatk/variant_eval.xml" /><tool file="gatk/variant_combine.xml" /></section> - <section id="peak_calling" name="NGS: Peak Calling"> - <tool file="peak_calling/macs_wrapper.xml" /> - <tool file="peak_calling/sicer_wrapper.xml" /> - </section><section id="ngs-rna-tools" name="NGS: RNA Analysis"><label id="rna_seq" text="RNA-seq" /><label id="filtering" text="Filtering" /> https://bitbucket.org/galaxy/galaxy-central/commits/401ee23dcf2f/ Changeset: 401ee23dcf2f Branch: next-stable User: natefoo Date: 2014-01-27 19:58:31 Summary: Remove broken and/or obsolete tools. Affected #: 33 files diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -4,28 +4,6 @@ : ${HOSTTYPE:=`uname -m`} -# link to HYPHY is arch-dependent -case "$OSTYPE" in - linux-gnu) - kernel=`uname -r | cut -f1,2 -d.` - HYPHY="/galaxy/software/linux$kernel-$HOSTTYPE/hyphy" - ;; - darwin*) - this_minor=`uname -r | awk -F. '{print ($1-4)}'` - machine=`machine` - for minor in `jot - 3 $this_minor 1`; do - HYPHY="/galaxy/software/macosx10.$minor-$machine/hyphy" - [ -d "$HYPHY" ] && break - done - [ ! -d "$HYPHY" ] && unset HYPHY - ;; - solaris2.10) - # For the psu-production builder which is Solaris, but jobs run on a - # Linux cluster - HYPHY="/galaxy/software/linux2.6-x86_64/hyphy" - ;; -esac - LINKS=" /galaxy/data/location/add_scores.loc /galaxy/data/location/all_fasta.loc @@ -121,12 +99,6 @@ ln -sf $link tool-data done - if [ -d "$HYPHY" ]; then - echo "Linking $HYPHY" - rm -f tool-data/HYPHY - ln -sf $HYPHY tool-data/HYPHY - fi - if [ -d "$JARS" ]; then echo "Linking $JARS" rm -f tool-data/shared/jars diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 doc/source/lib/galaxy.tools.util.rst --- a/doc/source/lib/galaxy.tools.util.rst +++ b/doc/source/lib/galaxy.tools.util.rst @@ -9,14 +9,6 @@ :undoc-members: :show-inheritance: -:mod:`hyphy_util` Module ------------------------- - -.. automodule:: galaxy.tools.util.hyphy_util - :members: - :undoc-members: - :show-inheritance: - :mod:`maf_utilities` Module --------------------------- diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 lib/galaxy/tools/util/hyphy_util.py --- a/lib/galaxy/tools/util/hyphy_util.py +++ /dev/null @@ -1,1163 +0,0 @@ -#Dan Blankenberg -#Contains file contents and helper methods for HYPHY configurations -import tempfile, os - -def get_filled_temp_filename(contents): - fh = tempfile.NamedTemporaryFile('w') - filename = fh.name - fh.close() - fh = open(filename, 'w') - fh.write(contents) - fh.close() - return filename - -NJ_tree_shared_ibf = """ -COUNT_GAPS_IN_FREQUENCIES = 0; -methodIndex = 1; - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function InferTreeTopology(verbFlag) -{ - distanceMatrix = {ds.species,ds.species}; - - MESSAGE_LOGGING = 0; - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def"); - InitializeDistances (0); - - for (i = 0; i<ds.species; i=i+1) - { - for (j = i+1; j<ds.species; j = j+1) - { - distanceMatrix[i][j] = ComputeDistanceFormula (i,j); - } - } - - MESSAGE_LOGGING = 1; - cladesMade = 1; - - - if (ds.species == 2) - { - d1 = distanceMatrix[0][1]/2; - treeNodes = {{0,1,d1__}, - {1,1,d1__}, - {2,0,0}}; - - cladesInfo = {{2,0}}; - } - else - { - if (ds.species == 3) - { - /* generate least squares estimates here */ - - d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2; - d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2; - d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2; - - treeNodes = {{0,1,d1__}, - {1,1,d2__}, - {2,1,d3__} - {3,0,0}}; - - cladesInfo = {{3,0}}; - } - else - { - njm = (distanceMatrix > methodIndex)>=ds.species; - - treeNodes = {2*(ds.species+1),3}; - cladesInfo = {ds.species-1,2}; - - for (i=Rows(treeNodes)-1; i>=0; i=i-1) - { - treeNodes[i][0] = njm[i][0]; - treeNodes[i][1] = njm[i][1]; - treeNodes[i][2] = njm[i][2]; - } - - for (i=Rows(cladesInfo)-1; i>=0; i=i-1) - { - cladesInfo[i][0] = njm[i][3]; - cladesInfo[i][1] = njm[i][4]; - } - - njm = 0; - } - } - return 1.0; -} - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function TreeMatrix2TreeString (doLengths) -{ - treeString = ""; - p = 0; - k = 0; - m = treeNodes[0][1]; - n = treeNodes[0][0]; - treeString*(Rows(treeNodes)*25); - - while (m) - { - if (m>p) - { - if (p) - { - treeString*","; - } - for (j=p;j<m;j=j+1) - { - treeString*"("; - } - } - else - { - if (m<p) - { - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - } - else - { - treeString*","; - } - } - if (n<ds.species) - { - GetString (nodeName, ds, n); - if (doLengths != 1) - { - treeString*nodeName; - } - else - { - treeString*taxonNameMap[nodeName]; - } - } - if (doLengths>.5) - { - nodeName = ":"+treeNodes[k][2]; - treeString*nodeName; - } - k=k+1; - p=m; - n=treeNodes[k][0]; - m=treeNodes[k][1]; - } - - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - - treeString*0; - return treeString; -} -""" - -def get_NJ_tree (filename): - return """ -DISTANCE_PROMPTS = 1; -ExecuteAFile ("%s"); - -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -/* do sequence to branch map */ - -taxonNameMap = {}; - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); -} - -DataSetFilter filteredData = CreateFilter (ds,1); -InferTreeTopology (0); -treeString = TreeMatrix2TreeString (1); - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, treeString); -fscanf (stdin, "String", ps_file); - -if (Abs(ps_file)) -{ - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (ps_file, CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); -} -""" % (filename) - -def get_NJ_treeMF (filename): - return """ -ExecuteAFile ("%s"); - -VERBOSITY_LEVEL = -1; -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - -_linesIn = Columns (inLines); -isomorphicTreesBySequenceCount = {}; - -/*---------------------------------------------------------*/ - -_currentGene = 1; -_currentState = 0; -geneSeqs = ""; -geneSeqs * 128; - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN); -treeOutFile = LAST_FILE_PATH; - -fscanf (stdin,"String", ps_file); -if (Abs(ps_file)) -{ - fprintf (ps_file, CLEAR_FILE, KEEP_OPEN); -} - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - - -if (_currentState == 1) -{ - geneSeqs * 0; - if (Abs(geneSeqs)) - { - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - } -} - -fprintf (treeOutFile,CLOSE_FILE); -if (Abs(ps_file)) -{ - fprintf (ps_file,CLOSE_FILE); -} -/*---------------------------------------------------------*/ - -function _processAGene (_geneID, nwk_file, ps_file) -{ - if (ds.species == 1) - { - fprintf (nwk_file, _geneID-1, "\\tNone \\tNone\\n"); - return 0; - - } - - DataSetFilter filteredData = CreateFilter (ds,1); - - /* do sequence to branch map */ - - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}}); - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - DISTANCE_PROMPTS = (_geneID==1); - - InferTreeTopology (0); - baseTree = TreeMatrix2TreeString (0); - UseModel (USE_NO_MODEL); - - Tree baseTop = baseTree; - - /* standardize this top */ - - for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1) - { - testString = (isomorphicTreesBySequenceCount[filteredData.species])[k]; - Tree testTree = testString; - if (testTree == baseTop) - { - baseTree = testString; - break; - } - } - if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species])) - { - if (k==0) - { - isomorphicTreesBySequenceCount[filteredData.species] = {}; - } - (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree; - } - - fprintf (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n"); - if (Abs(ps_file)) - { - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (stdout, _geneID, ":", givenTree,"\\n"); - fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); - } - return 0; -} -""" % (filename) - -BranchLengthsMF = """ -VERBOSITY_LEVEL = -1; - -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - - - -_linesIn = Columns (inLines); - - - -/*---------------------------------------------------------*/ - - - -_currentGene = 1; - -_currentState = 0; - -geneSeqs = ""; - -geneSeqs * 128; - - - -for (l=0; l<_linesIn; l=l+1) - -{ - - if (Abs(inLines[l]) == 0) - - { - - if (_currentState == 1) - - { - - geneSeqs * 0; - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - geneSeqs * 128; - - _currentGene = _currentGene + 1; - - } - - } - - else - - { - - if (_currentState == 0) - - { - - _currentState = 1; - - } - - geneSeqs * inLines[l]; - - geneSeqs * "\\n"; - - } - -} - - - -if (_currentState == 1) - -{ - - geneSeqs * 0; - - if (Abs(geneSeqs)) - - { - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - } - -} - - - -fprintf (resultFile,CLOSE_FILE); - - - -/*---------------------------------------------------------*/ - - - -function _processAGene (_geneID) - -{ - - DataSetFilter filteredData = CreateFilter (ds,1); - - if (_currentGene == 1) - - { - - SelectTemplateModel (filteredData); - - - - SetDialogPrompt ("Tree file"); - - fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - - fscanf (stdin, "String", resultFile); - - - - /* do sequence to branch map */ - - - - validNames = {}; - - taxonNameMap = {}; - - - - for (k=0; k<TipCount(givenTree); k=k+1) - - { - - validNames[TipName(givenTree,k)&&1] = 1; - - } - - - - for (k=0; k<BranchCount(givenTree); k=k+1) - - { - - thisName = BranchName(givenTree,k); - - taxonNameMap[thisName&&1] = thisName; - - } - - - - storeValidNames = validNames; - - fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n"); - - } - - else - - { - - HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1); - - validNames = storeValidNames; - - } - - - - for (k=0; k<ds.species; k=k+1) - - { - - GetString (thisName, ds,k); - - shortName = (thisName^{{"\\\\..+",""}})&&1; - - if (validNames[shortName]) - - { - - taxonNameMap[shortName] = thisName; - - validNames - (shortName); - - SetParameter (ds,k,shortName); - - } - - else - - { - - fprintf (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n"); - - return 0; - - } - - } - - - - /* */ - - - - LikelihoodFunction lf = (filteredData,givenTree); - - Optimize (res,lf); - - - - timer = Time(0)-timer; - - - - branchNames = BranchName (givenTree,-1); - - branchLengths = BranchLength (givenTree,-1); - - - - - - for (k=0; k<Columns(branchNames)-1; k=k+1) - - { - - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - if (k==0) - - { - - /* compute a scaling factor */ - - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - - scaleFactor = BranchLength (givenTree,0); - - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - - } - - fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); - - } - - - - ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; - - global treeScaler = 1; - - ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); - - COVARIANCE_PARAMETER = "treeScaler"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); - - ClearConstraints (givenTree); - - return 0; - -} -""" - -BranchLengths = """ -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -SelectTemplateModel (filteredData); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); -fscanf (stdin, "String", resultFile); - -/* do sequence to branch map */ - -validNames = {}; -taxonNameMap = {}; - -for (k=0; k<TipCount(givenTree); k=k+1) -{ - validNames[TipName(givenTree,k)&&1] = 1; -} - -for (k=0; k<BranchCount(givenTree); k=k+1) -{ - thisName = BranchName(givenTree,k); - taxonNameMap[thisName&&1] = thisName; -} - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - if (validNames[shortName]) - { - taxonNameMap[shortName] = thisName; - validNames - (shortName); - SetParameter (ds,k,shortName); - } - else - { - fprintf (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree); - return 0; - } -} - -/* */ - -LikelihoodFunction lf = (filteredData,givenTree); - -Optimize (res,lf); - -timer = Time(0)-timer; - -branchNames = BranchName (givenTree,-1); -branchLengths = BranchLength (givenTree,-1); - -fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n"); - -for (k=0; k<Columns(branchNames)-1; k=k+1) -{ - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - if (k==0) - { - /* compute a scaling factor */ - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - scaleFactor = BranchLength (givenTree,0); - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - } - fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); -} - -ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; -global treeScaler = 1; -ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); -COVARIANCE_PARAMETER = "treeScaler"; -COVARIANCE_PRECISION = 0.95; -CovarianceMatrix (cmx,lf); -ClearConstraints (givenTree); -fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); -fprintf (resultFile,CLOSE_FILE); -""" - -SimpleLocalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "AC"; - _analysisHeaders[7] = "AT"; - _analysisHeaders[8] = "CG"; - _analysisHeaders[9] = "CT"; - _analysisHeaders[10] = "GT"; - _analysisHeaders[11] = "Tree"; - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")"; - } - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Local"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = treeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = treeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["Tree"] = Format(codonTree,0,1); - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _returnMe ["length("+branchName+")"] = (_cL["Total"])[_biterator]; - _returnMe ["dS("+branchName+")"] = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]); - _returnMe ["dN("+branchName+")"] = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]); - - ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);"); - _returnMe ["omega("+branchName+")"] = _lom; - } - - return _returnMe; -} - -""" - -SimpleGlobalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "omega"; - _analysisHeaders[7] = "omega_range"; - _analysisHeaders[8] = "AC"; - _analysisHeaders[9] = "AT"; - _analysisHeaders[10] = "CG"; - _analysisHeaders[11] = "CT"; - _analysisHeaders[12] = "GT"; - _analysisHeaders[13] = "Tree"; - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n"); - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - _nucSites = filteredData.sites; - - if (Abs(treeString)) - { - givenTreeString = treeString; - } - else - { - if (_currentGene==1) - { - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf"); - } - givenTreeString = InferTreeTopology (0); - treeString = ""; - } - - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Global"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = givenTreeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = givenTreeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["Stop_codons"] = (_nucSites-filteredData.sites*3)$3; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["omega"] = R; - COVARIANCE_PARAMETER = "R"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - _returnMe ["omega_range"] = ""+cmx[0]+"-"+cmx[2]; - _returnMe ["Tree"] = Format(codonTree,0,1); - - - return _returnMe; -} -""" - -FastaReader = """ -fscanf (stdin, "String", _coreAnalysis); -fscanf (stdin, "String", _outputDriver); - -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf"); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - -treeBranchNames = BranchName (givenTree,-1); -treeBranchCount = Columns (treeBranchNames)-1; -treeString = Format (givenTree,1,1); - -SetDialogPrompt ("Multiple gene FASTA file"); -fscanf (PROMPT_FOR_FILE, "Lines", inLines); -fscanf (stdin, "String", modelSpecString); -fscanf (stdin, "String", _outPath); - -ExecuteAFile (_outputDriver); -ExecuteAFile (_coreAnalysis); - -/*---------------------------------------------------------*/ - -_linesIn = Columns (inLines); -_currentGene = 1; - _currentState = 0; -/* 0 - waiting for a non-empty line */ -/* 1 - reading files */ - -geneSeqs = ""; -geneSeqs * 0; - -_prepareFileOutput (_outPath); - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - -if (_currentState == 1) -{ - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); -} - -_finishFileOutput (0); -""" - -TabWriter = """ -/*---------------------------------------------------------*/ -function _prepareFileOutput (_outPath) -{ - _outputFilePath = _outPath; - - _returnHeaders = returnResultHeaders(0); - - fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]); - } - - - - fprintf (_outputFilePath,"\\n"); - return 0; -} - -/*---------------------------------------------------------*/ - -function _processAGene (valid, _geneID) -{ - if (valid) - { - returnValue = runAGeneFit (_geneID); - fprintf (_outputFilePath, returnValue[_returnHeaders[0]]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]); - } - fprintf (_outputFilePath, "\\n"); - } - /* - else - { - fprintf (_outputFilePath, - _geneID, ", Incorrect number of sequences\\n"); - } - */ - _currentState = 0; - return 0; -} - -/*---------------------------------------------------------*/ -function _finishFileOutput (dummy) -{ - return 0; -} -""" - -def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the DATA READER */ - -_genomeScreenOptions ["0"] = "%s"; - /* which analysis to run on each gene; */ -_genomeScreenOptions ["1"] = "%s"; - /* what output to produce; */ -_genomeScreenOptions ["2"] = "%s"; - /* genetic code */ -_genomeScreenOptions ["3"] = "%s"; - /* tree file */ -_genomeScreenOptions ["4"] = "%s"; - /* alignment file */ -_genomeScreenOptions ["5"] = "%s"; - /* nucleotide bias string; can define any of the 203 models */ -_genomeScreenOptions ["6"] = "%s"; - /* output csv file */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ) - return get_filled_temp_filename(contents) - - -def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the NucDataBranchLengths.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "CUSTOM"; - /* use an arbitrary nucleotide model */ -_genomeScreenOptions ["2"] = "%s"; - /* which model to use */ -_genomeScreenOptions ["3"] = "%s"; - /* model options */ -_genomeScreenOptions ["4"] = "Estimated"; - /* rate parameters */ -_genomeScreenOptions ["5"] = "%s"; - /* base frequencies */ -_genomeScreenOptions ["6"] = "%s"; - /* the tree to use; */ -_genomeScreenOptions ["7"] = "%s"; - /* write .csv output to; */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename) - return get_filled_temp_filename(contents) - - -def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTree.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ -_genomeScreenOptions ["2"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["3"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename) - return get_filled_temp_filename(contents) - - -def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTreeMF.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the multiple alignment file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["2"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ -_genomeScreenOptions ["3"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename) - return get_filled_temp_filename(contents) diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tool-data/shared/ucsc/ucsc_build_sites.txt --- a/tool-data/shared/ucsc/ucsc_build_sites.txt +++ b/tool-data/shared/ucsc/ucsc_build_sites.txt @@ -5,4 +5,3 @@ #Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn test http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks? anoCar1,ce4,ce3,ce2,ce1,loxAfr1,rn2,eschColi_O157H7_1,rn4,droYak1,heliPylo_J99_1,droYak2,dp3,dp2,caeRem2,caeRem1,oryLat1,eschColi_K12_1,homIni13,homIni14,droAna1,droAna2,oryCun1,sacCer1,heliHepa1,droGri1,sc1,dasNov1,choHof1,tupBel1,mm9,mm8,vibrChol1,mm5,mm4,mm7,mm6,mm3,mm2,rn3,venter1,galGal3,galGal2,ornAna1,equCab1,cioSav2,rheMac2,eutHer13,droPer1,droVir2,droVir1,heliPylo_26695_1,euaGli13,calJac1,campJeju1,droSim1,hg13,hg15,hg16,hg17,monDom1,monDom4,droMoj1,petMar1,droMoj2,vibrChol_MO10_1,vibrPara1,gliRes13,vibrVuln_YJ016_1,braFlo1,cioSav1,lauRas13,dm1,canFam1,canFam2,ci1,echTel1,ci2,caePb1,dm3,ponAbe2,falciparum,xenTro1,xenTro2,nonAfr13,fr2,fr1,gasAcu1,dm2,apiMel1,apiMel2,eschColi_O157H7EDL933_1,priPac1,panTro1,hg18,panTro2,campJeju_RM1221_1,canHg12,vibrChol_O395_1,vibrFisc_ES114_1,danRer5,danRer4,danRer3,danRer2,danRer1,tetNig1,afrOth13,bosTau1,eschColi_CFT073_1,bosTau3,bosTau2,bosTau4,rodEnt13,droEre1,priMat13,vibrVuln_CMCP6_1,cb2,cb3,cb1,borEut13,droSec1,felCat3,strPur1,strPur2,otoGar1,catArr1,anoGam1,triCas2 ucla http://epigenomics.mcdb.ucla.edu/cgi-bin/hgTracks? araTha1 -psu bx main http://main.genome-browser.bx.psu.edu/cgi-bin/hgTracks? hg18,hg19,mm8,mm9 diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -4,32 +4,19 @@ <tool file="data_source/upload.xml" /><tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/biomart.xml" /><tool file="data_source/gramene_mart.xml" /> - <tool file="data_source/flymine.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -87,7 +74,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="features" name="Extract Features"><tool file="filters/ucsc_gene_bed_to_exon_bed.xml" /> @@ -111,7 +97,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -169,11 +154,6 @@ <tool file="multivariate_stats/kpca.xml" /><tool file="multivariate_stats/kcca.xml" /></section> - <section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <!-- <tool file="hyphy/hyphy_dnds_wrapper.xml" /> --> - </section><section id="clustal" name="Multiple Alignments"><tool file="rgenetics/rgClustalw.xml" /></section> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -5,7 +5,6 @@ <tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_test.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/microbial_import.xml" /><tool file="data_source/biomart.xml" /> @@ -13,34 +12,18 @@ <tool file="data_source/cbi_rice_mart.xml" /><tool file="data_source/gramene_mart.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/flymine.xml" /> - <tool file="data_source/flymine_test.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /> - <tool file="data_source/metabolicmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/wormbase_test.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /> - <tool file="data_source/epigraph_import_test.xml" /><tool file="data_source/hbvar.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /><tool file="validation/fix_errors.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /> - <tool file="data_destination/epigraph_test.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -81,7 +64,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="convert" name="Convert Formats"><tool file="filters/axt_to_concat_fasta.xml" /> @@ -124,7 +106,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -189,9 +170,6 @@ <tool file="multivariate_stats/kcca.xml" /></section><section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <tool file="hyphy/hyphy_dnds_wrapper.xml" /><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" /></section> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_destination/epigraph.xml --- a/tools/data_destination/epigraph.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_export"> - <description> and prediction with EpiGRAPH</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_destination/epigraph_test.xml --- a/tools/data_destination/epigraph_test.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_test_export"> - <description> and prediction with EpiGRAPH Test</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/bx_browser.xml --- a/tools/data_source/bx_browser.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="BX" id="bx_browser" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to BX Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="bx_browser" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="primaryTable" /> - <value galaxy_value="tabular" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/encode_db.xml --- a/tools/data_source/encode_db.xml +++ /dev/null @@ -1,27 +0,0 @@ -<?xml version="1.0"?> - -<tool name="EncodeDB" id="encode_db1"> - - <description> - at NHGRI - </description> - - <command interpreter="python"> - fetch.py "$url" $output - </command> - - <inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> -<!-- <inputs action="http://localhost:9000/prepared"> --> - <display>go to EncodeDB $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" /> - </inputs> - - <uihints minwidth="800"/> - - <outputs> - <data format="bed" name="output" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> \ No newline at end of file diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> - <description> server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> - <description> test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine" id="flymine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.flymine.org" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml +++ /dev/null @@ -1,31 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine test" id="flymine_test" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/metabolicmine.xml --- a/tools/data_source/metabolicmine.xml +++ /dev/null @@ -1,13 +0,0 @@ -<?xml version="1.0"?> -<tool name="metabolicMine" id="metabolicmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> - <display>go to metabolicMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/modmine.xml --- a/tools/data_source/modmine.xml +++ /dev/null @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="modENCODE modMine" id="modmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> - <display>go to modENCODE modMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/mousemine.xml --- a/tools/data_source/mousemine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="MouseMine" id="mousemine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.mousemine.org/mousemine/begin.do" check_values="false" method="get"> - <display>go to MouseMine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=mousemine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="MouseMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/ratmine.xml --- a/tools/data_source/ratmine.xml +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Ratmine" id="ratmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> - <display>go to Ratmine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/data_source/yeastmine.xml --- a/tools/data_source/yeastmine.xml +++ /dev/null @@ -1,20 +0,0 @@ -<?xml version="1.0"?> -<tool name="YeastMine" id="yeastmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> - <display>go to yeastMine server $GALAXY_URL</display> - </inputs> - <request_param_translation> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/encode/gencode_partition.xml --- a/tools/encode/gencode_partition.xml +++ /dev/null @@ -1,45 +0,0 @@ -<tool id="gencode_partition1" name="Gencode Partition"> - <description>an interval file</description> - <command interpreter="python">split_by_partitions.py ${GALAXY_DATA_INDEX_DIR} $input1 $out_file1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Partition"/> - </inputs> - <outputs> - <data name="out_file1" format="bed"/> - </outputs> - <tests> - <test> - <param name="input1" value="encode_1.bed"/> - <output name="out_file1" file="gencode_partition_out.bed"/> - </test> - </tests> - <help> -For detailed information about partitioning, click here_. - -.. _here: http://genome.imim.es/gencode/wiki/index.php/Collecting_Feature_Sets_from_Al... - -Datasets are partitioned according to the protocol below: - -A partition scheme has been defined that is similar to what has previously been done with TARs/TRANSFRAGs such that any feature can be classified as falling into one of the following 6 categories: - 1. **Coding** -- coding exons defined from the GENCODE experimentally verified coding set (coding in any transcript) - 2. **5UTR** -- 5' UTR exons defined from the GENCODE experimentally verified coding set (5' UTR in some transcript but never coding in any other) - 3. **3UTR** -- 3' UTR exons defined from the GENCODE experimentally verified coding set (3' UTR in some transcript but never coding in any other) - 4. **Intronic Proximal** -- intronic and no more than 5kb away from an exon. - 5. **Intergenic Proximal** -- between genes and no more than 5kb away from an exon. - 6. **Intronic Distal** -- intronic and greater than 5kb away from an exon. - 7. **Intergenic Distal** -- between genes and greater than 5kb away from an exon. - ------ - -.. class:: infomark - -**Note:** Features overlapping more than one partition will take the identity of the lower-numbered partition. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file diff -r c21482ac06f8319cd3879eaebcdb17b6939b0e9e -r 401ee23dcf2f70d4be0e975bb3e00a43ae1dfdd0 tools/encode/random_intervals.xml --- a/tools/encode/random_intervals.xml +++ /dev/null @@ -1,64 +0,0 @@ -<tool id="random_intervals1" name="Random Intervals"> -<description>create a random set of intervals</description> - <command interpreter="python">random_intervals_no_bits.py $regions $input2 $input1 $out_file1 ${input2.metadata.chromCol} ${input2.metadata.startCol} ${input2.metadata.endCol} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $use_mask $strand_overlaps ${GALAXY_DATA_INDEX_DIR}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Mimick"> - <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16 or hg17. Click the pencil icon in your history item to set the genome build."/> - </param> - <param name="input2" type="data" format="interval" label="Intervals to Mask"/> - <param name="use_mask" type="select" label="Use mask"> - <option value="no_mask">No</option> - <option value="use_mask">Yes</option> - </param> - <param name="strand_overlaps" type="select" label="Allow overlaps"> - <option value="all">Any</option> - <option value="strand">Across Strands</option> - <option value="none">None</option> - </param> - <param name="regions" type="select" label="Regions to use"> - <options from_file="regions.loc"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <column name="dbkey" index="0"/> - <filter type="data_meta" ref="input1" key="dbkey" column="0" /> - <validator type="no_options" message="This tool currently only works with ENCODE data from genome builds hg16 or hg17."/> - </options> - </param> - </inputs> - <outputs> - <data name="out_file1" format="input"/> - </outputs> - <help> - -.. class:: warningmark - -This tool currently only works with ENCODE data from genome builds hg16 or hg17. - ------ - -.. class:: infomark - -**Note:** If you do not wish to mask a set of intervals, change the Use Mask option to No, this option will override any Mask files selected. - ------ - -**Syntax** - -This tool will attempt to create a random set of intervals that mimic those found within your source file. You may also specify a set of intervals to mask. - -**Allow overlaps** options - * **Across Strands** - random regions are allowed to overlap only if they are on different strands. - * **Any** - all overlaps are allowed. - * **None** - no overlapping regions are allowed. - -**Regions to use** options - * Bounding region of interest based on the dataset build. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/275a49bb9e1d/ Changeset: 275a49bb9e1d User: natefoo Date: 2014-01-27 19:59:03 Summary: Merge tool removal from next-stable. Affected #: 33 files diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -4,28 +4,6 @@ : ${HOSTTYPE:=`uname -m`} -# link to HYPHY is arch-dependent -case "$OSTYPE" in - linux-gnu) - kernel=`uname -r | cut -f1,2 -d.` - HYPHY="/galaxy/software/linux$kernel-$HOSTTYPE/hyphy" - ;; - darwin*) - this_minor=`uname -r | awk -F. '{print ($1-4)}'` - machine=`machine` - for minor in `jot - 3 $this_minor 1`; do - HYPHY="/galaxy/software/macosx10.$minor-$machine/hyphy" - [ -d "$HYPHY" ] && break - done - [ ! -d "$HYPHY" ] && unset HYPHY - ;; - solaris2.10) - # For the psu-production builder which is Solaris, but jobs run on a - # Linux cluster - HYPHY="/galaxy/software/linux2.6-x86_64/hyphy" - ;; -esac - LINKS=" /galaxy/data/location/add_scores.loc /galaxy/data/location/all_fasta.loc @@ -121,12 +99,6 @@ ln -sf $link tool-data done - if [ -d "$HYPHY" ]; then - echo "Linking $HYPHY" - rm -f tool-data/HYPHY - ln -sf $HYPHY tool-data/HYPHY - fi - if [ -d "$JARS" ]; then echo "Linking $JARS" rm -f tool-data/shared/jars diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b doc/source/lib/galaxy.tools.util.rst --- a/doc/source/lib/galaxy.tools.util.rst +++ b/doc/source/lib/galaxy.tools.util.rst @@ -9,14 +9,6 @@ :undoc-members: :show-inheritance: -:mod:`hyphy_util` Module ------------------------- - -.. automodule:: galaxy.tools.util.hyphy_util - :members: - :undoc-members: - :show-inheritance: - :mod:`maf_utilities` Module --------------------------- diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b lib/galaxy/tools/util/hyphy_util.py --- a/lib/galaxy/tools/util/hyphy_util.py +++ /dev/null @@ -1,1163 +0,0 @@ -#Dan Blankenberg -#Contains file contents and helper methods for HYPHY configurations -import tempfile, os - -def get_filled_temp_filename(contents): - fh = tempfile.NamedTemporaryFile('w') - filename = fh.name - fh.close() - fh = open(filename, 'w') - fh.write(contents) - fh.close() - return filename - -NJ_tree_shared_ibf = """ -COUNT_GAPS_IN_FREQUENCIES = 0; -methodIndex = 1; - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function InferTreeTopology(verbFlag) -{ - distanceMatrix = {ds.species,ds.species}; - - MESSAGE_LOGGING = 0; - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def"); - InitializeDistances (0); - - for (i = 0; i<ds.species; i=i+1) - { - for (j = i+1; j<ds.species; j = j+1) - { - distanceMatrix[i][j] = ComputeDistanceFormula (i,j); - } - } - - MESSAGE_LOGGING = 1; - cladesMade = 1; - - - if (ds.species == 2) - { - d1 = distanceMatrix[0][1]/2; - treeNodes = {{0,1,d1__}, - {1,1,d1__}, - {2,0,0}}; - - cladesInfo = {{2,0}}; - } - else - { - if (ds.species == 3) - { - /* generate least squares estimates here */ - - d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2; - d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2; - d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2; - - treeNodes = {{0,1,d1__}, - {1,1,d2__}, - {2,1,d3__} - {3,0,0}}; - - cladesInfo = {{3,0}}; - } - else - { - njm = (distanceMatrix > methodIndex)>=ds.species; - - treeNodes = {2*(ds.species+1),3}; - cladesInfo = {ds.species-1,2}; - - for (i=Rows(treeNodes)-1; i>=0; i=i-1) - { - treeNodes[i][0] = njm[i][0]; - treeNodes[i][1] = njm[i][1]; - treeNodes[i][2] = njm[i][2]; - } - - for (i=Rows(cladesInfo)-1; i>=0; i=i-1) - { - cladesInfo[i][0] = njm[i][3]; - cladesInfo[i][1] = njm[i][4]; - } - - njm = 0; - } - } - return 1.0; -} - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function TreeMatrix2TreeString (doLengths) -{ - treeString = ""; - p = 0; - k = 0; - m = treeNodes[0][1]; - n = treeNodes[0][0]; - treeString*(Rows(treeNodes)*25); - - while (m) - { - if (m>p) - { - if (p) - { - treeString*","; - } - for (j=p;j<m;j=j+1) - { - treeString*"("; - } - } - else - { - if (m<p) - { - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - } - else - { - treeString*","; - } - } - if (n<ds.species) - { - GetString (nodeName, ds, n); - if (doLengths != 1) - { - treeString*nodeName; - } - else - { - treeString*taxonNameMap[nodeName]; - } - } - if (doLengths>.5) - { - nodeName = ":"+treeNodes[k][2]; - treeString*nodeName; - } - k=k+1; - p=m; - n=treeNodes[k][0]; - m=treeNodes[k][1]; - } - - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - - treeString*0; - return treeString; -} -""" - -def get_NJ_tree (filename): - return """ -DISTANCE_PROMPTS = 1; -ExecuteAFile ("%s"); - -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -/* do sequence to branch map */ - -taxonNameMap = {}; - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); -} - -DataSetFilter filteredData = CreateFilter (ds,1); -InferTreeTopology (0); -treeString = TreeMatrix2TreeString (1); - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, treeString); -fscanf (stdin, "String", ps_file); - -if (Abs(ps_file)) -{ - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (ps_file, CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); -} -""" % (filename) - -def get_NJ_treeMF (filename): - return """ -ExecuteAFile ("%s"); - -VERBOSITY_LEVEL = -1; -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - -_linesIn = Columns (inLines); -isomorphicTreesBySequenceCount = {}; - -/*---------------------------------------------------------*/ - -_currentGene = 1; -_currentState = 0; -geneSeqs = ""; -geneSeqs * 128; - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN); -treeOutFile = LAST_FILE_PATH; - -fscanf (stdin,"String", ps_file); -if (Abs(ps_file)) -{ - fprintf (ps_file, CLEAR_FILE, KEEP_OPEN); -} - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - - -if (_currentState == 1) -{ - geneSeqs * 0; - if (Abs(geneSeqs)) - { - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - } -} - -fprintf (treeOutFile,CLOSE_FILE); -if (Abs(ps_file)) -{ - fprintf (ps_file,CLOSE_FILE); -} -/*---------------------------------------------------------*/ - -function _processAGene (_geneID, nwk_file, ps_file) -{ - if (ds.species == 1) - { - fprintf (nwk_file, _geneID-1, "\\tNone \\tNone\\n"); - return 0; - - } - - DataSetFilter filteredData = CreateFilter (ds,1); - - /* do sequence to branch map */ - - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}}); - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - DISTANCE_PROMPTS = (_geneID==1); - - InferTreeTopology (0); - baseTree = TreeMatrix2TreeString (0); - UseModel (USE_NO_MODEL); - - Tree baseTop = baseTree; - - /* standardize this top */ - - for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1) - { - testString = (isomorphicTreesBySequenceCount[filteredData.species])[k]; - Tree testTree = testString; - if (testTree == baseTop) - { - baseTree = testString; - break; - } - } - if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species])) - { - if (k==0) - { - isomorphicTreesBySequenceCount[filteredData.species] = {}; - } - (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree; - } - - fprintf (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n"); - if (Abs(ps_file)) - { - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (stdout, _geneID, ":", givenTree,"\\n"); - fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); - } - return 0; -} -""" % (filename) - -BranchLengthsMF = """ -VERBOSITY_LEVEL = -1; - -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - - - -_linesIn = Columns (inLines); - - - -/*---------------------------------------------------------*/ - - - -_currentGene = 1; - -_currentState = 0; - -geneSeqs = ""; - -geneSeqs * 128; - - - -for (l=0; l<_linesIn; l=l+1) - -{ - - if (Abs(inLines[l]) == 0) - - { - - if (_currentState == 1) - - { - - geneSeqs * 0; - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - geneSeqs * 128; - - _currentGene = _currentGene + 1; - - } - - } - - else - - { - - if (_currentState == 0) - - { - - _currentState = 1; - - } - - geneSeqs * inLines[l]; - - geneSeqs * "\\n"; - - } - -} - - - -if (_currentState == 1) - -{ - - geneSeqs * 0; - - if (Abs(geneSeqs)) - - { - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - } - -} - - - -fprintf (resultFile,CLOSE_FILE); - - - -/*---------------------------------------------------------*/ - - - -function _processAGene (_geneID) - -{ - - DataSetFilter filteredData = CreateFilter (ds,1); - - if (_currentGene == 1) - - { - - SelectTemplateModel (filteredData); - - - - SetDialogPrompt ("Tree file"); - - fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - - fscanf (stdin, "String", resultFile); - - - - /* do sequence to branch map */ - - - - validNames = {}; - - taxonNameMap = {}; - - - - for (k=0; k<TipCount(givenTree); k=k+1) - - { - - validNames[TipName(givenTree,k)&&1] = 1; - - } - - - - for (k=0; k<BranchCount(givenTree); k=k+1) - - { - - thisName = BranchName(givenTree,k); - - taxonNameMap[thisName&&1] = thisName; - - } - - - - storeValidNames = validNames; - - fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n"); - - } - - else - - { - - HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1); - - validNames = storeValidNames; - - } - - - - for (k=0; k<ds.species; k=k+1) - - { - - GetString (thisName, ds,k); - - shortName = (thisName^{{"\\\\..+",""}})&&1; - - if (validNames[shortName]) - - { - - taxonNameMap[shortName] = thisName; - - validNames - (shortName); - - SetParameter (ds,k,shortName); - - } - - else - - { - - fprintf (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n"); - - return 0; - - } - - } - - - - /* */ - - - - LikelihoodFunction lf = (filteredData,givenTree); - - Optimize (res,lf); - - - - timer = Time(0)-timer; - - - - branchNames = BranchName (givenTree,-1); - - branchLengths = BranchLength (givenTree,-1); - - - - - - for (k=0; k<Columns(branchNames)-1; k=k+1) - - { - - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - if (k==0) - - { - - /* compute a scaling factor */ - - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - - scaleFactor = BranchLength (givenTree,0); - - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - - } - - fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); - - } - - - - ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; - - global treeScaler = 1; - - ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); - - COVARIANCE_PARAMETER = "treeScaler"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); - - ClearConstraints (givenTree); - - return 0; - -} -""" - -BranchLengths = """ -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -SelectTemplateModel (filteredData); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); -fscanf (stdin, "String", resultFile); - -/* do sequence to branch map */ - -validNames = {}; -taxonNameMap = {}; - -for (k=0; k<TipCount(givenTree); k=k+1) -{ - validNames[TipName(givenTree,k)&&1] = 1; -} - -for (k=0; k<BranchCount(givenTree); k=k+1) -{ - thisName = BranchName(givenTree,k); - taxonNameMap[thisName&&1] = thisName; -} - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - if (validNames[shortName]) - { - taxonNameMap[shortName] = thisName; - validNames - (shortName); - SetParameter (ds,k,shortName); - } - else - { - fprintf (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree); - return 0; - } -} - -/* */ - -LikelihoodFunction lf = (filteredData,givenTree); - -Optimize (res,lf); - -timer = Time(0)-timer; - -branchNames = BranchName (givenTree,-1); -branchLengths = BranchLength (givenTree,-1); - -fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n"); - -for (k=0; k<Columns(branchNames)-1; k=k+1) -{ - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - if (k==0) - { - /* compute a scaling factor */ - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - scaleFactor = BranchLength (givenTree,0); - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - } - fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); -} - -ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; -global treeScaler = 1; -ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); -COVARIANCE_PARAMETER = "treeScaler"; -COVARIANCE_PRECISION = 0.95; -CovarianceMatrix (cmx,lf); -ClearConstraints (givenTree); -fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); -fprintf (resultFile,CLOSE_FILE); -""" - -SimpleLocalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "AC"; - _analysisHeaders[7] = "AT"; - _analysisHeaders[8] = "CG"; - _analysisHeaders[9] = "CT"; - _analysisHeaders[10] = "GT"; - _analysisHeaders[11] = "Tree"; - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")"; - } - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Local"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = treeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = treeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["Tree"] = Format(codonTree,0,1); - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _returnMe ["length("+branchName+")"] = (_cL["Total"])[_biterator]; - _returnMe ["dS("+branchName+")"] = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]); - _returnMe ["dN("+branchName+")"] = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]); - - ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);"); - _returnMe ["omega("+branchName+")"] = _lom; - } - - return _returnMe; -} - -""" - -SimpleGlobalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "omega"; - _analysisHeaders[7] = "omega_range"; - _analysisHeaders[8] = "AC"; - _analysisHeaders[9] = "AT"; - _analysisHeaders[10] = "CG"; - _analysisHeaders[11] = "CT"; - _analysisHeaders[12] = "GT"; - _analysisHeaders[13] = "Tree"; - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n"); - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - _nucSites = filteredData.sites; - - if (Abs(treeString)) - { - givenTreeString = treeString; - } - else - { - if (_currentGene==1) - { - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf"); - } - givenTreeString = InferTreeTopology (0); - treeString = ""; - } - - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Global"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = givenTreeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = givenTreeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["Stop_codons"] = (_nucSites-filteredData.sites*3)$3; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["omega"] = R; - COVARIANCE_PARAMETER = "R"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - _returnMe ["omega_range"] = ""+cmx[0]+"-"+cmx[2]; - _returnMe ["Tree"] = Format(codonTree,0,1); - - - return _returnMe; -} -""" - -FastaReader = """ -fscanf (stdin, "String", _coreAnalysis); -fscanf (stdin, "String", _outputDriver); - -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf"); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - -treeBranchNames = BranchName (givenTree,-1); -treeBranchCount = Columns (treeBranchNames)-1; -treeString = Format (givenTree,1,1); - -SetDialogPrompt ("Multiple gene FASTA file"); -fscanf (PROMPT_FOR_FILE, "Lines", inLines); -fscanf (stdin, "String", modelSpecString); -fscanf (stdin, "String", _outPath); - -ExecuteAFile (_outputDriver); -ExecuteAFile (_coreAnalysis); - -/*---------------------------------------------------------*/ - -_linesIn = Columns (inLines); -_currentGene = 1; - _currentState = 0; -/* 0 - waiting for a non-empty line */ -/* 1 - reading files */ - -geneSeqs = ""; -geneSeqs * 0; - -_prepareFileOutput (_outPath); - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - -if (_currentState == 1) -{ - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); -} - -_finishFileOutput (0); -""" - -TabWriter = """ -/*---------------------------------------------------------*/ -function _prepareFileOutput (_outPath) -{ - _outputFilePath = _outPath; - - _returnHeaders = returnResultHeaders(0); - - fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]); - } - - - - fprintf (_outputFilePath,"\\n"); - return 0; -} - -/*---------------------------------------------------------*/ - -function _processAGene (valid, _geneID) -{ - if (valid) - { - returnValue = runAGeneFit (_geneID); - fprintf (_outputFilePath, returnValue[_returnHeaders[0]]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]); - } - fprintf (_outputFilePath, "\\n"); - } - /* - else - { - fprintf (_outputFilePath, - _geneID, ", Incorrect number of sequences\\n"); - } - */ - _currentState = 0; - return 0; -} - -/*---------------------------------------------------------*/ -function _finishFileOutput (dummy) -{ - return 0; -} -""" - -def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the DATA READER */ - -_genomeScreenOptions ["0"] = "%s"; - /* which analysis to run on each gene; */ -_genomeScreenOptions ["1"] = "%s"; - /* what output to produce; */ -_genomeScreenOptions ["2"] = "%s"; - /* genetic code */ -_genomeScreenOptions ["3"] = "%s"; - /* tree file */ -_genomeScreenOptions ["4"] = "%s"; - /* alignment file */ -_genomeScreenOptions ["5"] = "%s"; - /* nucleotide bias string; can define any of the 203 models */ -_genomeScreenOptions ["6"] = "%s"; - /* output csv file */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ) - return get_filled_temp_filename(contents) - - -def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the NucDataBranchLengths.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "CUSTOM"; - /* use an arbitrary nucleotide model */ -_genomeScreenOptions ["2"] = "%s"; - /* which model to use */ -_genomeScreenOptions ["3"] = "%s"; - /* model options */ -_genomeScreenOptions ["4"] = "Estimated"; - /* rate parameters */ -_genomeScreenOptions ["5"] = "%s"; - /* base frequencies */ -_genomeScreenOptions ["6"] = "%s"; - /* the tree to use; */ -_genomeScreenOptions ["7"] = "%s"; - /* write .csv output to; */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename) - return get_filled_temp_filename(contents) - - -def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTree.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ -_genomeScreenOptions ["2"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["3"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename) - return get_filled_temp_filename(contents) - - -def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTreeMF.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the multiple alignment file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["2"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ -_genomeScreenOptions ["3"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename) - return get_filled_temp_filename(contents) diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tool-data/shared/ucsc/ucsc_build_sites.txt --- a/tool-data/shared/ucsc/ucsc_build_sites.txt +++ b/tool-data/shared/ucsc/ucsc_build_sites.txt @@ -5,4 +5,3 @@ #Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn test http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks? anoCar1,ce4,ce3,ce2,ce1,loxAfr1,rn2,eschColi_O157H7_1,rn4,droYak1,heliPylo_J99_1,droYak2,dp3,dp2,caeRem2,caeRem1,oryLat1,eschColi_K12_1,homIni13,homIni14,droAna1,droAna2,oryCun1,sacCer1,heliHepa1,droGri1,sc1,dasNov1,choHof1,tupBel1,mm9,mm8,vibrChol1,mm5,mm4,mm7,mm6,mm3,mm2,rn3,venter1,galGal3,galGal2,ornAna1,equCab1,cioSav2,rheMac2,eutHer13,droPer1,droVir2,droVir1,heliPylo_26695_1,euaGli13,calJac1,campJeju1,droSim1,hg13,hg15,hg16,hg17,monDom1,monDom4,droMoj1,petMar1,droMoj2,vibrChol_MO10_1,vibrPara1,gliRes13,vibrVuln_YJ016_1,braFlo1,cioSav1,lauRas13,dm1,canFam1,canFam2,ci1,echTel1,ci2,caePb1,dm3,ponAbe2,falciparum,xenTro1,xenTro2,nonAfr13,fr2,fr1,gasAcu1,dm2,apiMel1,apiMel2,eschColi_O157H7EDL933_1,priPac1,panTro1,hg18,panTro2,campJeju_RM1221_1,canHg12,vibrChol_O395_1,vibrFisc_ES114_1,danRer5,danRer4,danRer3,danRer2,danRer1,tetNig1,afrOth13,bosTau1,eschColi_CFT073_1,bosTau3,bosTau2,bosTau4,rodEnt13,droEre1,priMat13,vibrVuln_CMCP6_1,cb2,cb3,cb1,borEut13,droSec1,felCat3,strPur1,strPur2,otoGar1,catArr1,anoGam1,triCas2 ucla http://epigenomics.mcdb.ucla.edu/cgi-bin/hgTracks? araTha1 -psu bx main http://main.genome-browser.bx.psu.edu/cgi-bin/hgTracks? hg18,hg19,mm8,mm9 diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -4,32 +4,19 @@ <tool file="data_source/upload.xml" /><tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/biomart.xml" /><tool file="data_source/gramene_mart.xml" /> - <tool file="data_source/flymine.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -87,7 +74,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="features" name="Extract Features"><tool file="filters/ucsc_gene_bed_to_exon_bed.xml" /> @@ -111,7 +97,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -139,7 +124,6 @@ <tool file="plotting/histogram2.xml" /><tool file="plotting/scatterplot.xml" /><tool file="plotting/boxplot.xml" /> - <tool file="visualization/GMAJ.xml" /><tool file="visualization/build_ucsc_custom_track.xml" /><tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /> @@ -170,14 +154,6 @@ <tool file="multivariate_stats/kpca.xml" /><tool file="multivariate_stats/kcca.xml" /></section> - <section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <!-- <tool file="hyphy/hyphy_dnds_wrapper.xml" /> --> - </section> - <section id="motifs" name="Motif Tools"> - <tool file="rgenetics/rgWebLogo3.xml" /> - </section><section id="clustal" name="Multiple Alignments"><tool file="rgenetics/rgClustalw.xml" /></section> @@ -253,10 +229,6 @@ <tool file="gatk/variant_eval.xml" /><tool file="gatk/variant_combine.xml" /></section> - <section id="peak_calling" name="NGS: Peak Calling"> - <tool file="peak_calling/macs_wrapper.xml" /> - <tool file="peak_calling/sicer_wrapper.xml" /> - </section><section id="ngs-rna-tools" name="NGS: RNA Analysis"><label id="rna_seq" text="RNA-seq" /><label id="filtering" text="Filtering" /> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -5,7 +5,6 @@ <tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_test.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/microbial_import.xml" /><tool file="data_source/biomart.xml" /> @@ -13,34 +12,18 @@ <tool file="data_source/cbi_rice_mart.xml" /><tool file="data_source/gramene_mart.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/flymine.xml" /> - <tool file="data_source/flymine_test.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /> - <tool file="data_source/metabolicmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/wormbase_test.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /> - <tool file="data_source/epigraph_import_test.xml" /><tool file="data_source/hbvar.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /><tool file="validation/fix_errors.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /> - <tool file="data_destination/epigraph_test.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -81,7 +64,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="convert" name="Convert Formats"><tool file="filters/axt_to_concat_fasta.xml" /> @@ -124,7 +106,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -189,9 +170,6 @@ <tool file="multivariate_stats/kcca.xml" /></section><section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <tool file="hyphy/hyphy_dnds_wrapper.xml" /><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" /></section> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_destination/epigraph.xml --- a/tools/data_destination/epigraph.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_export"> - <description> and prediction with EpiGRAPH</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_destination/epigraph_test.xml --- a/tools/data_destination/epigraph_test.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_test_export"> - <description> and prediction with EpiGRAPH Test</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/bx_browser.xml --- a/tools/data_source/bx_browser.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="BX" id="bx_browser" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to BX Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="bx_browser" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="primaryTable" /> - <value galaxy_value="tabular" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/encode_db.xml --- a/tools/data_source/encode_db.xml +++ /dev/null @@ -1,27 +0,0 @@ -<?xml version="1.0"?> - -<tool name="EncodeDB" id="encode_db1"> - - <description> - at NHGRI - </description> - - <command interpreter="python"> - fetch.py "$url" $output - </command> - - <inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> -<!-- <inputs action="http://localhost:9000/prepared"> --> - <display>go to EncodeDB $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" /> - </inputs> - - <uihints minwidth="800"/> - - <outputs> - <data format="bed" name="output" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> \ No newline at end of file diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> - <description> server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> - <description> test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine" id="flymine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.flymine.org" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml +++ /dev/null @@ -1,31 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine test" id="flymine_test" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/metabolicmine.xml --- a/tools/data_source/metabolicmine.xml +++ /dev/null @@ -1,13 +0,0 @@ -<?xml version="1.0"?> -<tool name="metabolicMine" id="metabolicmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> - <display>go to metabolicMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/modmine.xml --- a/tools/data_source/modmine.xml +++ /dev/null @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="modENCODE modMine" id="modmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> - <display>go to modENCODE modMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/mousemine.xml --- a/tools/data_source/mousemine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="MouseMine" id="mousemine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.mousemine.org/mousemine/begin.do" check_values="false" method="get"> - <display>go to MouseMine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=mousemine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="MouseMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/ratmine.xml --- a/tools/data_source/ratmine.xml +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Ratmine" id="ratmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> - <display>go to Ratmine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/data_source/yeastmine.xml --- a/tools/data_source/yeastmine.xml +++ /dev/null @@ -1,20 +0,0 @@ -<?xml version="1.0"?> -<tool name="YeastMine" id="yeastmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> - <display>go to yeastMine server $GALAXY_URL</display> - </inputs> - <request_param_translation> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/encode/gencode_partition.xml --- a/tools/encode/gencode_partition.xml +++ /dev/null @@ -1,45 +0,0 @@ -<tool id="gencode_partition1" name="Gencode Partition"> - <description>an interval file</description> - <command interpreter="python">split_by_partitions.py ${GALAXY_DATA_INDEX_DIR} $input1 $out_file1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Partition"/> - </inputs> - <outputs> - <data name="out_file1" format="bed"/> - </outputs> - <tests> - <test> - <param name="input1" value="encode_1.bed"/> - <output name="out_file1" file="gencode_partition_out.bed"/> - </test> - </tests> - <help> -For detailed information about partitioning, click here_. - -.. _here: http://genome.imim.es/gencode/wiki/index.php/Collecting_Feature_Sets_from_Al... - -Datasets are partitioned according to the protocol below: - -A partition scheme has been defined that is similar to what has previously been done with TARs/TRANSFRAGs such that any feature can be classified as falling into one of the following 6 categories: - 1. **Coding** -- coding exons defined from the GENCODE experimentally verified coding set (coding in any transcript) - 2. **5UTR** -- 5' UTR exons defined from the GENCODE experimentally verified coding set (5' UTR in some transcript but never coding in any other) - 3. **3UTR** -- 3' UTR exons defined from the GENCODE experimentally verified coding set (3' UTR in some transcript but never coding in any other) - 4. **Intronic Proximal** -- intronic and no more than 5kb away from an exon. - 5. **Intergenic Proximal** -- between genes and no more than 5kb away from an exon. - 6. **Intronic Distal** -- intronic and greater than 5kb away from an exon. - 7. **Intergenic Distal** -- between genes and greater than 5kb away from an exon. - ------ - -.. class:: infomark - -**Note:** Features overlapping more than one partition will take the identity of the lower-numbered partition. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file diff -r ad9cb3bde1ee3aace42ed16dca5e40ac537eee6d -r 275a49bb9e1d1ae9771d1207b96c68334509b72b tools/encode/random_intervals.xml --- a/tools/encode/random_intervals.xml +++ /dev/null @@ -1,64 +0,0 @@ -<tool id="random_intervals1" name="Random Intervals"> -<description>create a random set of intervals</description> - <command interpreter="python">random_intervals_no_bits.py $regions $input2 $input1 $out_file1 ${input2.metadata.chromCol} ${input2.metadata.startCol} ${input2.metadata.endCol} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $use_mask $strand_overlaps ${GALAXY_DATA_INDEX_DIR}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Mimick"> - <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16 or hg17. Click the pencil icon in your history item to set the genome build."/> - </param> - <param name="input2" type="data" format="interval" label="Intervals to Mask"/> - <param name="use_mask" type="select" label="Use mask"> - <option value="no_mask">No</option> - <option value="use_mask">Yes</option> - </param> - <param name="strand_overlaps" type="select" label="Allow overlaps"> - <option value="all">Any</option> - <option value="strand">Across Strands</option> - <option value="none">None</option> - </param> - <param name="regions" type="select" label="Regions to use"> - <options from_file="regions.loc"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <column name="dbkey" index="0"/> - <filter type="data_meta" ref="input1" key="dbkey" column="0" /> - <validator type="no_options" message="This tool currently only works with ENCODE data from genome builds hg16 or hg17."/> - </options> - </param> - </inputs> - <outputs> - <data name="out_file1" format="input"/> - </outputs> - <help> - -.. class:: warningmark - -This tool currently only works with ENCODE data from genome builds hg16 or hg17. - ------ - -.. class:: infomark - -**Note:** If you do not wish to mask a set of intervals, change the Use Mask option to No, this option will override any Mask files selected. - ------ - -**Syntax** - -This tool will attempt to create a random set of intervals that mimic those found within your source file. You may also specify a set of intervals to mask. - -**Allow overlaps** options - * **Across Strands** - random regions are allowed to overlap only if they are on different strands. - * **Any** - all overlaps are allowed. - * **None** - no overlapping regions are allowed. - -**Regions to use** options - * Bounding region of interest based on the dataset build. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/0c6bf744c5b2/ Changeset: 0c6bf744c5b2 Branch: next-stable User: natefoo Date: 2014-01-27 20:00:16 Summary: Merge heads on next-stable Affected #: 33 files diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -4,28 +4,6 @@ : ${HOSTTYPE:=`uname -m`} -# link to HYPHY is arch-dependent -case "$OSTYPE" in - linux-gnu) - kernel=`uname -r | cut -f1,2 -d.` - HYPHY="/galaxy/software/linux$kernel-$HOSTTYPE/hyphy" - ;; - darwin*) - this_minor=`uname -r | awk -F. '{print ($1-4)}'` - machine=`machine` - for minor in `jot - 3 $this_minor 1`; do - HYPHY="/galaxy/software/macosx10.$minor-$machine/hyphy" - [ -d "$HYPHY" ] && break - done - [ ! -d "$HYPHY" ] && unset HYPHY - ;; - solaris2.10) - # For the psu-production builder which is Solaris, but jobs run on a - # Linux cluster - HYPHY="/galaxy/software/linux2.6-x86_64/hyphy" - ;; -esac - LINKS=" /galaxy/data/location/add_scores.loc /galaxy/data/location/all_fasta.loc @@ -121,12 +99,6 @@ ln -sf $link tool-data done - if [ -d "$HYPHY" ]; then - echo "Linking $HYPHY" - rm -f tool-data/HYPHY - ln -sf $HYPHY tool-data/HYPHY - fi - if [ -d "$JARS" ]; then echo "Linking $JARS" rm -f tool-data/shared/jars diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf doc/source/lib/galaxy.tools.util.rst --- a/doc/source/lib/galaxy.tools.util.rst +++ b/doc/source/lib/galaxy.tools.util.rst @@ -9,14 +9,6 @@ :undoc-members: :show-inheritance: -:mod:`hyphy_util` Module ------------------------- - -.. automodule:: galaxy.tools.util.hyphy_util - :members: - :undoc-members: - :show-inheritance: - :mod:`maf_utilities` Module --------------------------- diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf lib/galaxy/tools/util/hyphy_util.py --- a/lib/galaxy/tools/util/hyphy_util.py +++ /dev/null @@ -1,1163 +0,0 @@ -#Dan Blankenberg -#Contains file contents and helper methods for HYPHY configurations -import tempfile, os - -def get_filled_temp_filename(contents): - fh = tempfile.NamedTemporaryFile('w') - filename = fh.name - fh.close() - fh = open(filename, 'w') - fh.write(contents) - fh.close() - return filename - -NJ_tree_shared_ibf = """ -COUNT_GAPS_IN_FREQUENCIES = 0; -methodIndex = 1; - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function InferTreeTopology(verbFlag) -{ - distanceMatrix = {ds.species,ds.species}; - - MESSAGE_LOGGING = 0; - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def"); - InitializeDistances (0); - - for (i = 0; i<ds.species; i=i+1) - { - for (j = i+1; j<ds.species; j = j+1) - { - distanceMatrix[i][j] = ComputeDistanceFormula (i,j); - } - } - - MESSAGE_LOGGING = 1; - cladesMade = 1; - - - if (ds.species == 2) - { - d1 = distanceMatrix[0][1]/2; - treeNodes = {{0,1,d1__}, - {1,1,d1__}, - {2,0,0}}; - - cladesInfo = {{2,0}}; - } - else - { - if (ds.species == 3) - { - /* generate least squares estimates here */ - - d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2; - d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2; - d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2; - - treeNodes = {{0,1,d1__}, - {1,1,d2__}, - {2,1,d3__} - {3,0,0}}; - - cladesInfo = {{3,0}}; - } - else - { - njm = (distanceMatrix > methodIndex)>=ds.species; - - treeNodes = {2*(ds.species+1),3}; - cladesInfo = {ds.species-1,2}; - - for (i=Rows(treeNodes)-1; i>=0; i=i-1) - { - treeNodes[i][0] = njm[i][0]; - treeNodes[i][1] = njm[i][1]; - treeNodes[i][2] = njm[i][2]; - } - - for (i=Rows(cladesInfo)-1; i>=0; i=i-1) - { - cladesInfo[i][0] = njm[i][3]; - cladesInfo[i][1] = njm[i][4]; - } - - njm = 0; - } - } - return 1.0; -} - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function TreeMatrix2TreeString (doLengths) -{ - treeString = ""; - p = 0; - k = 0; - m = treeNodes[0][1]; - n = treeNodes[0][0]; - treeString*(Rows(treeNodes)*25); - - while (m) - { - if (m>p) - { - if (p) - { - treeString*","; - } - for (j=p;j<m;j=j+1) - { - treeString*"("; - } - } - else - { - if (m<p) - { - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - } - else - { - treeString*","; - } - } - if (n<ds.species) - { - GetString (nodeName, ds, n); - if (doLengths != 1) - { - treeString*nodeName; - } - else - { - treeString*taxonNameMap[nodeName]; - } - } - if (doLengths>.5) - { - nodeName = ":"+treeNodes[k][2]; - treeString*nodeName; - } - k=k+1; - p=m; - n=treeNodes[k][0]; - m=treeNodes[k][1]; - } - - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - - treeString*0; - return treeString; -} -""" - -def get_NJ_tree (filename): - return """ -DISTANCE_PROMPTS = 1; -ExecuteAFile ("%s"); - -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -/* do sequence to branch map */ - -taxonNameMap = {}; - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); -} - -DataSetFilter filteredData = CreateFilter (ds,1); -InferTreeTopology (0); -treeString = TreeMatrix2TreeString (1); - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, treeString); -fscanf (stdin, "String", ps_file); - -if (Abs(ps_file)) -{ - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (ps_file, CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); -} -""" % (filename) - -def get_NJ_treeMF (filename): - return """ -ExecuteAFile ("%s"); - -VERBOSITY_LEVEL = -1; -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - -_linesIn = Columns (inLines); -isomorphicTreesBySequenceCount = {}; - -/*---------------------------------------------------------*/ - -_currentGene = 1; -_currentState = 0; -geneSeqs = ""; -geneSeqs * 128; - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN); -treeOutFile = LAST_FILE_PATH; - -fscanf (stdin,"String", ps_file); -if (Abs(ps_file)) -{ - fprintf (ps_file, CLEAR_FILE, KEEP_OPEN); -} - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - - -if (_currentState == 1) -{ - geneSeqs * 0; - if (Abs(geneSeqs)) - { - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - } -} - -fprintf (treeOutFile,CLOSE_FILE); -if (Abs(ps_file)) -{ - fprintf (ps_file,CLOSE_FILE); -} -/*---------------------------------------------------------*/ - -function _processAGene (_geneID, nwk_file, ps_file) -{ - if (ds.species == 1) - { - fprintf (nwk_file, _geneID-1, "\\tNone \\tNone\\n"); - return 0; - - } - - DataSetFilter filteredData = CreateFilter (ds,1); - - /* do sequence to branch map */ - - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}}); - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - DISTANCE_PROMPTS = (_geneID==1); - - InferTreeTopology (0); - baseTree = TreeMatrix2TreeString (0); - UseModel (USE_NO_MODEL); - - Tree baseTop = baseTree; - - /* standardize this top */ - - for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1) - { - testString = (isomorphicTreesBySequenceCount[filteredData.species])[k]; - Tree testTree = testString; - if (testTree == baseTop) - { - baseTree = testString; - break; - } - } - if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species])) - { - if (k==0) - { - isomorphicTreesBySequenceCount[filteredData.species] = {}; - } - (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree; - } - - fprintf (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n"); - if (Abs(ps_file)) - { - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (stdout, _geneID, ":", givenTree,"\\n"); - fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); - } - return 0; -} -""" % (filename) - -BranchLengthsMF = """ -VERBOSITY_LEVEL = -1; - -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - - - -_linesIn = Columns (inLines); - - - -/*---------------------------------------------------------*/ - - - -_currentGene = 1; - -_currentState = 0; - -geneSeqs = ""; - -geneSeqs * 128; - - - -for (l=0; l<_linesIn; l=l+1) - -{ - - if (Abs(inLines[l]) == 0) - - { - - if (_currentState == 1) - - { - - geneSeqs * 0; - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - geneSeqs * 128; - - _currentGene = _currentGene + 1; - - } - - } - - else - - { - - if (_currentState == 0) - - { - - _currentState = 1; - - } - - geneSeqs * inLines[l]; - - geneSeqs * "\\n"; - - } - -} - - - -if (_currentState == 1) - -{ - - geneSeqs * 0; - - if (Abs(geneSeqs)) - - { - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - } - -} - - - -fprintf (resultFile,CLOSE_FILE); - - - -/*---------------------------------------------------------*/ - - - -function _processAGene (_geneID) - -{ - - DataSetFilter filteredData = CreateFilter (ds,1); - - if (_currentGene == 1) - - { - - SelectTemplateModel (filteredData); - - - - SetDialogPrompt ("Tree file"); - - fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - - fscanf (stdin, "String", resultFile); - - - - /* do sequence to branch map */ - - - - validNames = {}; - - taxonNameMap = {}; - - - - for (k=0; k<TipCount(givenTree); k=k+1) - - { - - validNames[TipName(givenTree,k)&&1] = 1; - - } - - - - for (k=0; k<BranchCount(givenTree); k=k+1) - - { - - thisName = BranchName(givenTree,k); - - taxonNameMap[thisName&&1] = thisName; - - } - - - - storeValidNames = validNames; - - fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n"); - - } - - else - - { - - HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1); - - validNames = storeValidNames; - - } - - - - for (k=0; k<ds.species; k=k+1) - - { - - GetString (thisName, ds,k); - - shortName = (thisName^{{"\\\\..+",""}})&&1; - - if (validNames[shortName]) - - { - - taxonNameMap[shortName] = thisName; - - validNames - (shortName); - - SetParameter (ds,k,shortName); - - } - - else - - { - - fprintf (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n"); - - return 0; - - } - - } - - - - /* */ - - - - LikelihoodFunction lf = (filteredData,givenTree); - - Optimize (res,lf); - - - - timer = Time(0)-timer; - - - - branchNames = BranchName (givenTree,-1); - - branchLengths = BranchLength (givenTree,-1); - - - - - - for (k=0; k<Columns(branchNames)-1; k=k+1) - - { - - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - if (k==0) - - { - - /* compute a scaling factor */ - - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - - scaleFactor = BranchLength (givenTree,0); - - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - - } - - fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); - - } - - - - ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; - - global treeScaler = 1; - - ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); - - COVARIANCE_PARAMETER = "treeScaler"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); - - ClearConstraints (givenTree); - - return 0; - -} -""" - -BranchLengths = """ -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -SelectTemplateModel (filteredData); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); -fscanf (stdin, "String", resultFile); - -/* do sequence to branch map */ - -validNames = {}; -taxonNameMap = {}; - -for (k=0; k<TipCount(givenTree); k=k+1) -{ - validNames[TipName(givenTree,k)&&1] = 1; -} - -for (k=0; k<BranchCount(givenTree); k=k+1) -{ - thisName = BranchName(givenTree,k); - taxonNameMap[thisName&&1] = thisName; -} - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - if (validNames[shortName]) - { - taxonNameMap[shortName] = thisName; - validNames - (shortName); - SetParameter (ds,k,shortName); - } - else - { - fprintf (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree); - return 0; - } -} - -/* */ - -LikelihoodFunction lf = (filteredData,givenTree); - -Optimize (res,lf); - -timer = Time(0)-timer; - -branchNames = BranchName (givenTree,-1); -branchLengths = BranchLength (givenTree,-1); - -fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n"); - -for (k=0; k<Columns(branchNames)-1; k=k+1) -{ - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - if (k==0) - { - /* compute a scaling factor */ - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - scaleFactor = BranchLength (givenTree,0); - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - } - fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); -} - -ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; -global treeScaler = 1; -ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); -COVARIANCE_PARAMETER = "treeScaler"; -COVARIANCE_PRECISION = 0.95; -CovarianceMatrix (cmx,lf); -ClearConstraints (givenTree); -fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); -fprintf (resultFile,CLOSE_FILE); -""" - -SimpleLocalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "AC"; - _analysisHeaders[7] = "AT"; - _analysisHeaders[8] = "CG"; - _analysisHeaders[9] = "CT"; - _analysisHeaders[10] = "GT"; - _analysisHeaders[11] = "Tree"; - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")"; - } - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Local"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = treeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = treeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["Tree"] = Format(codonTree,0,1); - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _returnMe ["length("+branchName+")"] = (_cL["Total"])[_biterator]; - _returnMe ["dS("+branchName+")"] = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]); - _returnMe ["dN("+branchName+")"] = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]); - - ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);"); - _returnMe ["omega("+branchName+")"] = _lom; - } - - return _returnMe; -} - -""" - -SimpleGlobalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "omega"; - _analysisHeaders[7] = "omega_range"; - _analysisHeaders[8] = "AC"; - _analysisHeaders[9] = "AT"; - _analysisHeaders[10] = "CG"; - _analysisHeaders[11] = "CT"; - _analysisHeaders[12] = "GT"; - _analysisHeaders[13] = "Tree"; - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n"); - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - _nucSites = filteredData.sites; - - if (Abs(treeString)) - { - givenTreeString = treeString; - } - else - { - if (_currentGene==1) - { - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf"); - } - givenTreeString = InferTreeTopology (0); - treeString = ""; - } - - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Global"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = givenTreeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = givenTreeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["Stop_codons"] = (_nucSites-filteredData.sites*3)$3; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["omega"] = R; - COVARIANCE_PARAMETER = "R"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - _returnMe ["omega_range"] = ""+cmx[0]+"-"+cmx[2]; - _returnMe ["Tree"] = Format(codonTree,0,1); - - - return _returnMe; -} -""" - -FastaReader = """ -fscanf (stdin, "String", _coreAnalysis); -fscanf (stdin, "String", _outputDriver); - -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf"); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - -treeBranchNames = BranchName (givenTree,-1); -treeBranchCount = Columns (treeBranchNames)-1; -treeString = Format (givenTree,1,1); - -SetDialogPrompt ("Multiple gene FASTA file"); -fscanf (PROMPT_FOR_FILE, "Lines", inLines); -fscanf (stdin, "String", modelSpecString); -fscanf (stdin, "String", _outPath); - -ExecuteAFile (_outputDriver); -ExecuteAFile (_coreAnalysis); - -/*---------------------------------------------------------*/ - -_linesIn = Columns (inLines); -_currentGene = 1; - _currentState = 0; -/* 0 - waiting for a non-empty line */ -/* 1 - reading files */ - -geneSeqs = ""; -geneSeqs * 0; - -_prepareFileOutput (_outPath); - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - -if (_currentState == 1) -{ - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); -} - -_finishFileOutput (0); -""" - -TabWriter = """ -/*---------------------------------------------------------*/ -function _prepareFileOutput (_outPath) -{ - _outputFilePath = _outPath; - - _returnHeaders = returnResultHeaders(0); - - fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]); - } - - - - fprintf (_outputFilePath,"\\n"); - return 0; -} - -/*---------------------------------------------------------*/ - -function _processAGene (valid, _geneID) -{ - if (valid) - { - returnValue = runAGeneFit (_geneID); - fprintf (_outputFilePath, returnValue[_returnHeaders[0]]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]); - } - fprintf (_outputFilePath, "\\n"); - } - /* - else - { - fprintf (_outputFilePath, - _geneID, ", Incorrect number of sequences\\n"); - } - */ - _currentState = 0; - return 0; -} - -/*---------------------------------------------------------*/ -function _finishFileOutput (dummy) -{ - return 0; -} -""" - -def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the DATA READER */ - -_genomeScreenOptions ["0"] = "%s"; - /* which analysis to run on each gene; */ -_genomeScreenOptions ["1"] = "%s"; - /* what output to produce; */ -_genomeScreenOptions ["2"] = "%s"; - /* genetic code */ -_genomeScreenOptions ["3"] = "%s"; - /* tree file */ -_genomeScreenOptions ["4"] = "%s"; - /* alignment file */ -_genomeScreenOptions ["5"] = "%s"; - /* nucleotide bias string; can define any of the 203 models */ -_genomeScreenOptions ["6"] = "%s"; - /* output csv file */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ) - return get_filled_temp_filename(contents) - - -def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the NucDataBranchLengths.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "CUSTOM"; - /* use an arbitrary nucleotide model */ -_genomeScreenOptions ["2"] = "%s"; - /* which model to use */ -_genomeScreenOptions ["3"] = "%s"; - /* model options */ -_genomeScreenOptions ["4"] = "Estimated"; - /* rate parameters */ -_genomeScreenOptions ["5"] = "%s"; - /* base frequencies */ -_genomeScreenOptions ["6"] = "%s"; - /* the tree to use; */ -_genomeScreenOptions ["7"] = "%s"; - /* write .csv output to; */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename) - return get_filled_temp_filename(contents) - - -def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTree.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ -_genomeScreenOptions ["2"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["3"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename) - return get_filled_temp_filename(contents) - - -def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTreeMF.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the multiple alignment file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["2"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ -_genomeScreenOptions ["3"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename) - return get_filled_temp_filename(contents) diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tool-data/shared/ucsc/ucsc_build_sites.txt --- a/tool-data/shared/ucsc/ucsc_build_sites.txt +++ b/tool-data/shared/ucsc/ucsc_build_sites.txt @@ -5,4 +5,3 @@ #Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn test http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks? anoCar1,ce4,ce3,ce2,ce1,loxAfr1,rn2,eschColi_O157H7_1,rn4,droYak1,heliPylo_J99_1,droYak2,dp3,dp2,caeRem2,caeRem1,oryLat1,eschColi_K12_1,homIni13,homIni14,droAna1,droAna2,oryCun1,sacCer1,heliHepa1,droGri1,sc1,dasNov1,choHof1,tupBel1,mm9,mm8,vibrChol1,mm5,mm4,mm7,mm6,mm3,mm2,rn3,venter1,galGal3,galGal2,ornAna1,equCab1,cioSav2,rheMac2,eutHer13,droPer1,droVir2,droVir1,heliPylo_26695_1,euaGli13,calJac1,campJeju1,droSim1,hg13,hg15,hg16,hg17,monDom1,monDom4,droMoj1,petMar1,droMoj2,vibrChol_MO10_1,vibrPara1,gliRes13,vibrVuln_YJ016_1,braFlo1,cioSav1,lauRas13,dm1,canFam1,canFam2,ci1,echTel1,ci2,caePb1,dm3,ponAbe2,falciparum,xenTro1,xenTro2,nonAfr13,fr2,fr1,gasAcu1,dm2,apiMel1,apiMel2,eschColi_O157H7EDL933_1,priPac1,panTro1,hg18,panTro2,campJeju_RM1221_1,canHg12,vibrChol_O395_1,vibrFisc_ES114_1,danRer5,danRer4,danRer3,danRer2,danRer1,tetNig1,afrOth13,bosTau1,eschColi_CFT073_1,bosTau3,bosTau2,bosTau4,rodEnt13,droEre1,priMat13,vibrVuln_CMCP6_1,cb2,cb3,cb1,borEut13,droSec1,felCat3,strPur1,strPur2,otoGar1,catArr1,anoGam1,triCas2 ucla http://epigenomics.mcdb.ucla.edu/cgi-bin/hgTracks? araTha1 -psu bx main http://main.genome-browser.bx.psu.edu/cgi-bin/hgTracks? hg18,hg19,mm8,mm9 diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -4,32 +4,19 @@ <tool file="data_source/upload.xml" /><tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/biomart.xml" /><tool file="data_source/gramene_mart.xml" /> - <tool file="data_source/flymine.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -87,7 +74,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="features" name="Extract Features"><tool file="filters/ucsc_gene_bed_to_exon_bed.xml" /> @@ -111,7 +97,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -139,7 +124,6 @@ <tool file="plotting/histogram2.xml" /><tool file="plotting/scatterplot.xml" /><tool file="plotting/boxplot.xml" /> - <tool file="visualization/GMAJ.xml" /><tool file="visualization/build_ucsc_custom_track.xml" /><tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /> @@ -170,14 +154,6 @@ <tool file="multivariate_stats/kpca.xml" /><tool file="multivariate_stats/kcca.xml" /></section> - <section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <!-- <tool file="hyphy/hyphy_dnds_wrapper.xml" /> --> - </section> - <section id="motifs" name="Motif Tools"> - <tool file="rgenetics/rgWebLogo3.xml" /> - </section><section id="clustal" name="Multiple Alignments"><tool file="rgenetics/rgClustalw.xml" /></section> @@ -253,10 +229,6 @@ <tool file="gatk/variant_eval.xml" /><tool file="gatk/variant_combine.xml" /></section> - <section id="peak_calling" name="NGS: Peak Calling"> - <tool file="peak_calling/macs_wrapper.xml" /> - <tool file="peak_calling/sicer_wrapper.xml" /> - </section><section id="ngs-rna-tools" name="NGS: RNA Analysis"><label id="rna_seq" text="RNA-seq" /><label id="filtering" text="Filtering" /> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -5,7 +5,6 @@ <tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_test.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/microbial_import.xml" /><tool file="data_source/biomart.xml" /> @@ -13,34 +12,18 @@ <tool file="data_source/cbi_rice_mart.xml" /><tool file="data_source/gramene_mart.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/flymine.xml" /> - <tool file="data_source/flymine_test.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /> - <tool file="data_source/metabolicmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/wormbase_test.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /> - <tool file="data_source/epigraph_import_test.xml" /><tool file="data_source/hbvar.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /><tool file="validation/fix_errors.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /> - <tool file="data_destination/epigraph_test.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -81,7 +64,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="convert" name="Convert Formats"><tool file="filters/axt_to_concat_fasta.xml" /> @@ -124,7 +106,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -189,9 +170,6 @@ <tool file="multivariate_stats/kcca.xml" /></section><section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <tool file="hyphy/hyphy_dnds_wrapper.xml" /><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" /></section> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_destination/epigraph.xml --- a/tools/data_destination/epigraph.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_export"> - <description> and prediction with EpiGRAPH</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_destination/epigraph_test.xml --- a/tools/data_destination/epigraph_test.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_test_export"> - <description> and prediction with EpiGRAPH Test</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/bx_browser.xml --- a/tools/data_source/bx_browser.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="BX" id="bx_browser" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to BX Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="bx_browser" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="primaryTable" /> - <value galaxy_value="tabular" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/encode_db.xml --- a/tools/data_source/encode_db.xml +++ /dev/null @@ -1,27 +0,0 @@ -<?xml version="1.0"?> - -<tool name="EncodeDB" id="encode_db1"> - - <description> - at NHGRI - </description> - - <command interpreter="python"> - fetch.py "$url" $output - </command> - - <inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> -<!-- <inputs action="http://localhost:9000/prepared"> --> - <display>go to EncodeDB $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" /> - </inputs> - - <uihints minwidth="800"/> - - <outputs> - <data format="bed" name="output" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> \ No newline at end of file diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> - <description> server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> - <description> test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine" id="flymine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.flymine.org" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml +++ /dev/null @@ -1,31 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine test" id="flymine_test" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/metabolicmine.xml --- a/tools/data_source/metabolicmine.xml +++ /dev/null @@ -1,13 +0,0 @@ -<?xml version="1.0"?> -<tool name="metabolicMine" id="metabolicmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> - <display>go to metabolicMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/modmine.xml --- a/tools/data_source/modmine.xml +++ /dev/null @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="modENCODE modMine" id="modmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> - <display>go to modENCODE modMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/mousemine.xml --- a/tools/data_source/mousemine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="MouseMine" id="mousemine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.mousemine.org/mousemine/begin.do" check_values="false" method="get"> - <display>go to MouseMine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=mousemine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="MouseMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/ratmine.xml --- a/tools/data_source/ratmine.xml +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Ratmine" id="ratmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> - <display>go to Ratmine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/data_source/yeastmine.xml --- a/tools/data_source/yeastmine.xml +++ /dev/null @@ -1,20 +0,0 @@ -<?xml version="1.0"?> -<tool name="YeastMine" id="yeastmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> - <display>go to yeastMine server $GALAXY_URL</display> - </inputs> - <request_param_translation> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/encode/gencode_partition.xml --- a/tools/encode/gencode_partition.xml +++ /dev/null @@ -1,45 +0,0 @@ -<tool id="gencode_partition1" name="Gencode Partition"> - <description>an interval file</description> - <command interpreter="python">split_by_partitions.py ${GALAXY_DATA_INDEX_DIR} $input1 $out_file1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Partition"/> - </inputs> - <outputs> - <data name="out_file1" format="bed"/> - </outputs> - <tests> - <test> - <param name="input1" value="encode_1.bed"/> - <output name="out_file1" file="gencode_partition_out.bed"/> - </test> - </tests> - <help> -For detailed information about partitioning, click here_. - -.. _here: http://genome.imim.es/gencode/wiki/index.php/Collecting_Feature_Sets_from_Al... - -Datasets are partitioned according to the protocol below: - -A partition scheme has been defined that is similar to what has previously been done with TARs/TRANSFRAGs such that any feature can be classified as falling into one of the following 6 categories: - 1. **Coding** -- coding exons defined from the GENCODE experimentally verified coding set (coding in any transcript) - 2. **5UTR** -- 5' UTR exons defined from the GENCODE experimentally verified coding set (5' UTR in some transcript but never coding in any other) - 3. **3UTR** -- 3' UTR exons defined from the GENCODE experimentally verified coding set (3' UTR in some transcript but never coding in any other) - 4. **Intronic Proximal** -- intronic and no more than 5kb away from an exon. - 5. **Intergenic Proximal** -- between genes and no more than 5kb away from an exon. - 6. **Intronic Distal** -- intronic and greater than 5kb away from an exon. - 7. **Intergenic Distal** -- between genes and greater than 5kb away from an exon. - ------ - -.. class:: infomark - -**Note:** Features overlapping more than one partition will take the identity of the lower-numbered partition. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file diff -r 3fdf673bdfc9af7653695ced36f274d65748c7f1 -r 0c6bf744c5b25377145df3994198f994193ba0bf tools/encode/random_intervals.xml --- a/tools/encode/random_intervals.xml +++ /dev/null @@ -1,64 +0,0 @@ -<tool id="random_intervals1" name="Random Intervals"> -<description>create a random set of intervals</description> - <command interpreter="python">random_intervals_no_bits.py $regions $input2 $input1 $out_file1 ${input2.metadata.chromCol} ${input2.metadata.startCol} ${input2.metadata.endCol} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $use_mask $strand_overlaps ${GALAXY_DATA_INDEX_DIR}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Mimick"> - <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16 or hg17. Click the pencil icon in your history item to set the genome build."/> - </param> - <param name="input2" type="data" format="interval" label="Intervals to Mask"/> - <param name="use_mask" type="select" label="Use mask"> - <option value="no_mask">No</option> - <option value="use_mask">Yes</option> - </param> - <param name="strand_overlaps" type="select" label="Allow overlaps"> - <option value="all">Any</option> - <option value="strand">Across Strands</option> - <option value="none">None</option> - </param> - <param name="regions" type="select" label="Regions to use"> - <options from_file="regions.loc"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <column name="dbkey" index="0"/> - <filter type="data_meta" ref="input1" key="dbkey" column="0" /> - <validator type="no_options" message="This tool currently only works with ENCODE data from genome builds hg16 or hg17."/> - </options> - </param> - </inputs> - <outputs> - <data name="out_file1" format="input"/> - </outputs> - <help> - -.. class:: warningmark - -This tool currently only works with ENCODE data from genome builds hg16 or hg17. - ------ - -.. class:: infomark - -**Note:** If you do not wish to mask a set of intervals, change the Use Mask option to No, this option will override any Mask files selected. - ------ - -**Syntax** - -This tool will attempt to create a random set of intervals that mimic those found within your source file. You may also specify a set of intervals to mask. - -**Allow overlaps** options - * **Across Strands** - random regions are allowed to overlap only if they are on different strands. - * **Any** - all overlaps are allowed. - * **None** - no overlapping regions are allowed. - -**Regions to use** options - * Bounding region of interest based on the dataset build. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/2210b9de666e/ Changeset: 2210b9de666e User: natefoo Date: 2014-01-27 20:00:52 Summary: Merge heads in default. Affected #: 33 files diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -4,28 +4,6 @@ : ${HOSTTYPE:=`uname -m`} -# link to HYPHY is arch-dependent -case "$OSTYPE" in - linux-gnu) - kernel=`uname -r | cut -f1,2 -d.` - HYPHY="/galaxy/software/linux$kernel-$HOSTTYPE/hyphy" - ;; - darwin*) - this_minor=`uname -r | awk -F. '{print ($1-4)}'` - machine=`machine` - for minor in `jot - 3 $this_minor 1`; do - HYPHY="/galaxy/software/macosx10.$minor-$machine/hyphy" - [ -d "$HYPHY" ] && break - done - [ ! -d "$HYPHY" ] && unset HYPHY - ;; - solaris2.10) - # For the psu-production builder which is Solaris, but jobs run on a - # Linux cluster - HYPHY="/galaxy/software/linux2.6-x86_64/hyphy" - ;; -esac - LINKS=" /galaxy/data/location/add_scores.loc /galaxy/data/location/all_fasta.loc @@ -121,12 +99,6 @@ ln -sf $link tool-data done - if [ -d "$HYPHY" ]; then - echo "Linking $HYPHY" - rm -f tool-data/HYPHY - ln -sf $HYPHY tool-data/HYPHY - fi - if [ -d "$JARS" ]; then echo "Linking $JARS" rm -f tool-data/shared/jars diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e doc/source/lib/galaxy.tools.util.rst --- a/doc/source/lib/galaxy.tools.util.rst +++ b/doc/source/lib/galaxy.tools.util.rst @@ -9,14 +9,6 @@ :undoc-members: :show-inheritance: -:mod:`hyphy_util` Module ------------------------- - -.. automodule:: galaxy.tools.util.hyphy_util - :members: - :undoc-members: - :show-inheritance: - :mod:`maf_utilities` Module --------------------------- diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e lib/galaxy/tools/util/hyphy_util.py --- a/lib/galaxy/tools/util/hyphy_util.py +++ /dev/null @@ -1,1163 +0,0 @@ -#Dan Blankenberg -#Contains file contents and helper methods for HYPHY configurations -import tempfile, os - -def get_filled_temp_filename(contents): - fh = tempfile.NamedTemporaryFile('w') - filename = fh.name - fh.close() - fh = open(filename, 'w') - fh.write(contents) - fh.close() - return filename - -NJ_tree_shared_ibf = """ -COUNT_GAPS_IN_FREQUENCIES = 0; -methodIndex = 1; - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function InferTreeTopology(verbFlag) -{ - distanceMatrix = {ds.species,ds.species}; - - MESSAGE_LOGGING = 0; - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def"); - InitializeDistances (0); - - for (i = 0; i<ds.species; i=i+1) - { - for (j = i+1; j<ds.species; j = j+1) - { - distanceMatrix[i][j] = ComputeDistanceFormula (i,j); - } - } - - MESSAGE_LOGGING = 1; - cladesMade = 1; - - - if (ds.species == 2) - { - d1 = distanceMatrix[0][1]/2; - treeNodes = {{0,1,d1__}, - {1,1,d1__}, - {2,0,0}}; - - cladesInfo = {{2,0}}; - } - else - { - if (ds.species == 3) - { - /* generate least squares estimates here */ - - d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2; - d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2; - d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2; - - treeNodes = {{0,1,d1__}, - {1,1,d2__}, - {2,1,d3__} - {3,0,0}}; - - cladesInfo = {{3,0}}; - } - else - { - njm = (distanceMatrix > methodIndex)>=ds.species; - - treeNodes = {2*(ds.species+1),3}; - cladesInfo = {ds.species-1,2}; - - for (i=Rows(treeNodes)-1; i>=0; i=i-1) - { - treeNodes[i][0] = njm[i][0]; - treeNodes[i][1] = njm[i][1]; - treeNodes[i][2] = njm[i][2]; - } - - for (i=Rows(cladesInfo)-1; i>=0; i=i-1) - { - cladesInfo[i][0] = njm[i][3]; - cladesInfo[i][1] = njm[i][4]; - } - - njm = 0; - } - } - return 1.0; -} - -/*-----------------------------------------------------------------------------------------------------------------------------------------*/ - -function TreeMatrix2TreeString (doLengths) -{ - treeString = ""; - p = 0; - k = 0; - m = treeNodes[0][1]; - n = treeNodes[0][0]; - treeString*(Rows(treeNodes)*25); - - while (m) - { - if (m>p) - { - if (p) - { - treeString*","; - } - for (j=p;j<m;j=j+1) - { - treeString*"("; - } - } - else - { - if (m<p) - { - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - } - else - { - treeString*","; - } - } - if (n<ds.species) - { - GetString (nodeName, ds, n); - if (doLengths != 1) - { - treeString*nodeName; - } - else - { - treeString*taxonNameMap[nodeName]; - } - } - if (doLengths>.5) - { - nodeName = ":"+treeNodes[k][2]; - treeString*nodeName; - } - k=k+1; - p=m; - n=treeNodes[k][0]; - m=treeNodes[k][1]; - } - - for (j=m;j<p;j=j+1) - { - treeString*")"; - } - - treeString*0; - return treeString; -} -""" - -def get_NJ_tree (filename): - return """ -DISTANCE_PROMPTS = 1; -ExecuteAFile ("%s"); - -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -/* do sequence to branch map */ - -taxonNameMap = {}; - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); -} - -DataSetFilter filteredData = CreateFilter (ds,1); -InferTreeTopology (0); -treeString = TreeMatrix2TreeString (1); - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, treeString); -fscanf (stdin, "String", ps_file); - -if (Abs(ps_file)) -{ - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (ps_file, CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); -} -""" % (filename) - -def get_NJ_treeMF (filename): - return """ -ExecuteAFile ("%s"); - -VERBOSITY_LEVEL = -1; -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - -_linesIn = Columns (inLines); -isomorphicTreesBySequenceCount = {}; - -/*---------------------------------------------------------*/ - -_currentGene = 1; -_currentState = 0; -geneSeqs = ""; -geneSeqs * 128; - -fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN); -treeOutFile = LAST_FILE_PATH; - -fscanf (stdin,"String", ps_file); -if (Abs(ps_file)) -{ - fprintf (ps_file, CLEAR_FILE, KEEP_OPEN); -} - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - - -if (_currentState == 1) -{ - geneSeqs * 0; - if (Abs(geneSeqs)) - { - DataSet ds = ReadFromString (geneSeqs); - _processAGene (_currentGene,treeOutFile,ps_file); - } -} - -fprintf (treeOutFile,CLOSE_FILE); -if (Abs(ps_file)) -{ - fprintf (ps_file,CLOSE_FILE); -} -/*---------------------------------------------------------*/ - -function _processAGene (_geneID, nwk_file, ps_file) -{ - if (ds.species == 1) - { - fprintf (nwk_file, _geneID-1, "\\tNone \\tNone\\n"); - return 0; - - } - - DataSetFilter filteredData = CreateFilter (ds,1); - - /* do sequence to branch map */ - - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}}); - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - DISTANCE_PROMPTS = (_geneID==1); - - InferTreeTopology (0); - baseTree = TreeMatrix2TreeString (0); - UseModel (USE_NO_MODEL); - - Tree baseTop = baseTree; - - /* standardize this top */ - - for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1) - { - testString = (isomorphicTreesBySequenceCount[filteredData.species])[k]; - Tree testTree = testString; - if (testTree == baseTop) - { - baseTree = testString; - break; - } - } - if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species])) - { - if (k==0) - { - isomorphicTreesBySequenceCount[filteredData.species] = {}; - } - (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree; - } - - fprintf (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n"); - if (Abs(ps_file)) - { - treeString = TreeMatrix2TreeString (2); - UseModel (USE_NO_MODEL); - Tree givenTree = treeString; - baseHeight = TipCount (givenTree)*28; - TREE_OUTPUT_OPTIONS = {}; - TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14; - baseWidth = 0; - treeAVL = givenTree^0; - drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n"; - for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1) - { - nodeName = (treeAVL[k3])["Name"]; - if(Abs((treeAVL[k3])["Children"]) == 0) - { - mySpecs = {}; - mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter"; - baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]); - } - } - baseWidth = 40*baseWidth; - - fprintf (stdout, _geneID, ":", givenTree,"\\n"); - fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}})); - } - return 0; -} -""" % (filename) - -BranchLengthsMF = """ -VERBOSITY_LEVEL = -1; - -fscanf (PROMPT_FOR_FILE, "Lines", inLines); - - - -_linesIn = Columns (inLines); - - - -/*---------------------------------------------------------*/ - - - -_currentGene = 1; - -_currentState = 0; - -geneSeqs = ""; - -geneSeqs * 128; - - - -for (l=0; l<_linesIn; l=l+1) - -{ - - if (Abs(inLines[l]) == 0) - - { - - if (_currentState == 1) - - { - - geneSeqs * 0; - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - geneSeqs * 128; - - _currentGene = _currentGene + 1; - - } - - } - - else - - { - - if (_currentState == 0) - - { - - _currentState = 1; - - } - - geneSeqs * inLines[l]; - - geneSeqs * "\\n"; - - } - -} - - - -if (_currentState == 1) - -{ - - geneSeqs * 0; - - if (Abs(geneSeqs)) - - { - - DataSet ds = ReadFromString (geneSeqs); - - _processAGene (_currentGene); - - } - -} - - - -fprintf (resultFile,CLOSE_FILE); - - - -/*---------------------------------------------------------*/ - - - -function _processAGene (_geneID) - -{ - - DataSetFilter filteredData = CreateFilter (ds,1); - - if (_currentGene == 1) - - { - - SelectTemplateModel (filteredData); - - - - SetDialogPrompt ("Tree file"); - - fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - - fscanf (stdin, "String", resultFile); - - - - /* do sequence to branch map */ - - - - validNames = {}; - - taxonNameMap = {}; - - - - for (k=0; k<TipCount(givenTree); k=k+1) - - { - - validNames[TipName(givenTree,k)&&1] = 1; - - } - - - - for (k=0; k<BranchCount(givenTree); k=k+1) - - { - - thisName = BranchName(givenTree,k); - - taxonNameMap[thisName&&1] = thisName; - - } - - - - storeValidNames = validNames; - - fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n"); - - } - - else - - { - - HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1); - - validNames = storeValidNames; - - } - - - - for (k=0; k<ds.species; k=k+1) - - { - - GetString (thisName, ds,k); - - shortName = (thisName^{{"\\\\..+",""}})&&1; - - if (validNames[shortName]) - - { - - taxonNameMap[shortName] = thisName; - - validNames - (shortName); - - SetParameter (ds,k,shortName); - - } - - else - - { - - fprintf (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n"); - - return 0; - - } - - } - - - - /* */ - - - - LikelihoodFunction lf = (filteredData,givenTree); - - Optimize (res,lf); - - - - timer = Time(0)-timer; - - - - branchNames = BranchName (givenTree,-1); - - branchLengths = BranchLength (givenTree,-1); - - - - - - for (k=0; k<Columns(branchNames)-1; k=k+1) - - { - - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - if (k==0) - - { - - /* compute a scaling factor */ - - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - - scaleFactor = BranchLength (givenTree,0); - - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - - } - - fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); - - } - - - - ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; - - global treeScaler = 1; - - ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); - - COVARIANCE_PARAMETER = "treeScaler"; - - COVARIANCE_PRECISION = 0.95; - - CovarianceMatrix (cmx,lf); - - fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); - - ClearConstraints (givenTree); - - return 0; - -} -""" - -BranchLengths = """ -DataSet ds = ReadDataFile (PROMPT_FOR_FILE); -DataSetFilter filteredData = CreateFilter (ds,1); - -SelectTemplateModel (filteredData); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); -fscanf (stdin, "String", resultFile); - -/* do sequence to branch map */ - -validNames = {}; -taxonNameMap = {}; - -for (k=0; k<TipCount(givenTree); k=k+1) -{ - validNames[TipName(givenTree,k)&&1] = 1; -} - -for (k=0; k<BranchCount(givenTree); k=k+1) -{ - thisName = BranchName(givenTree,k); - taxonNameMap[thisName&&1] = thisName; -} - -for (k=0; k<ds.species; k=k+1) -{ - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - if (validNames[shortName]) - { - taxonNameMap[shortName] = thisName; - validNames - (shortName); - SetParameter (ds,k,shortName); - } - else - { - fprintf (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree); - return 0; - } -} - -/* */ - -LikelihoodFunction lf = (filteredData,givenTree); - -Optimize (res,lf); - -timer = Time(0)-timer; - -branchNames = BranchName (givenTree,-1); -branchLengths = BranchLength (givenTree,-1); - -fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n"); - -for (k=0; k<Columns(branchNames)-1; k=k+1) -{ - COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - if (k==0) - { - /* compute a scaling factor */ - ExecuteCommands ("givenTree."+branchNames[0]+".t=1"); - scaleFactor = BranchLength (givenTree,0); - ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]); - } - fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n"); -} - -ttl = (branchLengths*(Transpose(branchLengths["1"])))[0]; -global treeScaler = 1; -ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree); -COVARIANCE_PARAMETER = "treeScaler"; -COVARIANCE_PRECISION = 0.95; -CovarianceMatrix (cmx,lf); -ClearConstraints (givenTree); -fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n"); -fprintf (resultFile,CLOSE_FILE); -""" - -SimpleLocalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "AC"; - _analysisHeaders[7] = "AT"; - _analysisHeaders[8] = "CG"; - _analysisHeaders[9] = "CT"; - _analysisHeaders[10] = "GT"; - _analysisHeaders[11] = "Tree"; - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")"; - _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")"; - } - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Local"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = treeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = treeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["Tree"] = Format(codonTree,0,1); - - for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1) - { - branchName = treeBranchNames[_biterator]; - - _returnMe ["length("+branchName+")"] = (_cL["Total"])[_biterator]; - _returnMe ["dS("+branchName+")"] = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]); - _returnMe ["dN("+branchName+")"] = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]); - - ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);"); - _returnMe ["omega("+branchName+")"] = _lom; - } - - return _returnMe; -} - -""" - -SimpleGlobalFitter = """ -VERBOSITY_LEVEL = -1; -COUNT_GAPS_IN_FREQUENCIES = 0; - -/*---------------------------------------------------------*/ - -function returnResultHeaders (dummy) -{ - _analysisHeaders = {}; - _analysisHeaders[0] = "BLOCK"; - _analysisHeaders[1] = "BP"; - _analysisHeaders[2] = "S_sites"; - _analysisHeaders[3] = "NS_sites"; - _analysisHeaders[4] = "Stop_codons"; - _analysisHeaders[5] = "LogL"; - _analysisHeaders[6] = "omega"; - _analysisHeaders[7] = "omega_range"; - _analysisHeaders[8] = "AC"; - _analysisHeaders[9] = "AT"; - _analysisHeaders[10] = "CG"; - _analysisHeaders[11] = "CT"; - _analysisHeaders[12] = "GT"; - _analysisHeaders[13] = "Tree"; - - return _analysisHeaders; -} - -/*---------------------------------------------------------*/ - -function runAGeneFit (myID) -{ - fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n"); - taxonNameMap = {}; - - for (k=0; k<ds.species; k=k+1) - { - GetString (thisName, ds,k); - shortName = (thisName^{{"\\\\..+",""}})&&1; - taxonNameMap[shortName] = thisName; - SetParameter (ds,k,shortName); - } - - DataSetFilter filteredData = CreateFilter (ds,1); - _nucSites = filteredData.sites; - - if (Abs(treeString)) - { - givenTreeString = treeString; - } - else - { - if (_currentGene==1) - { - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf"); - } - givenTreeString = InferTreeTopology (0); - treeString = ""; - } - - DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); - - if (_currentGene==1) - { - _MG94stdinOverload = {}; - _MG94stdinOverload ["0"] = "Global"; - _MG94stdinOverload ["1"] = modelSpecString; - - ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl", - _MG94stdinOverload); - - Tree codonTree = givenTreeString; - } - else - { - HarvestFrequencies (observedFreq,filteredData,3,1,1); - MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); - vectorOfFrequencies = BuildCodonFrequencies (observedFreq); - Model MG94customModel = (MG94custom,vectorOfFrequencies,0); - - Tree codonTree = givenTreeString; - } - - LikelihoodFunction lf = (filteredData,codonTree); - - Optimize (res,lf); - - _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0); - _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree"); - - - _returnMe = {}; - _returnMe ["BLOCK"] = myID; - _returnMe ["LogL"] = res[1][0]; - _returnMe ["BP"] = _snsAVL ["Sites"]; - _returnMe ["S_sites"] = _snsAVL ["SSites"]; - _returnMe ["NS_sites"] = _snsAVL ["NSSites"]; - _returnMe ["Stop_codons"] = (_nucSites-filteredData.sites*3)$3; - _returnMe ["AC"] = AC; - _returnMe ["AT"] = AT; - _returnMe ["CG"] = CG; - _returnMe ["CT"] = CT; - _returnMe ["GT"] = GT; - _returnMe ["omega"] = R; - COVARIANCE_PARAMETER = "R"; - COVARIANCE_PRECISION = 0.95; - CovarianceMatrix (cmx,lf); - _returnMe ["omega_range"] = ""+cmx[0]+"-"+cmx[2]; - _returnMe ["Tree"] = Format(codonTree,0,1); - - - return _returnMe; -} -""" - -FastaReader = """ -fscanf (stdin, "String", _coreAnalysis); -fscanf (stdin, "String", _outputDriver); - -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf"); -ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf"); - -SetDialogPrompt ("Tree file"); -fscanf (PROMPT_FOR_FILE, "Tree", givenTree); - -treeBranchNames = BranchName (givenTree,-1); -treeBranchCount = Columns (treeBranchNames)-1; -treeString = Format (givenTree,1,1); - -SetDialogPrompt ("Multiple gene FASTA file"); -fscanf (PROMPT_FOR_FILE, "Lines", inLines); -fscanf (stdin, "String", modelSpecString); -fscanf (stdin, "String", _outPath); - -ExecuteAFile (_outputDriver); -ExecuteAFile (_coreAnalysis); - -/*---------------------------------------------------------*/ - -_linesIn = Columns (inLines); -_currentGene = 1; - _currentState = 0; -/* 0 - waiting for a non-empty line */ -/* 1 - reading files */ - -geneSeqs = ""; -geneSeqs * 0; - -_prepareFileOutput (_outPath); - -for (l=0; l<_linesIn; l=l+1) -{ - if (Abs(inLines[l]) == 0) - { - if (_currentState == 1) - { - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); - geneSeqs * 128; - _currentGene = _currentGene + 1; - } - } - else - { - if (_currentState == 0) - { - _currentState = 1; - } - geneSeqs * inLines[l]; - geneSeqs * "\\n"; - } -} - -if (_currentState == 1) -{ - geneSeqs * 0; - DataSet ds = ReadFromString (geneSeqs); - _processAGene (ds.species == treeBranchCount,_currentGene); -} - -_finishFileOutput (0); -""" - -TabWriter = """ -/*---------------------------------------------------------*/ -function _prepareFileOutput (_outPath) -{ - _outputFilePath = _outPath; - - _returnHeaders = returnResultHeaders(0); - - fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]); - } - - - - fprintf (_outputFilePath,"\\n"); - return 0; -} - -/*---------------------------------------------------------*/ - -function _processAGene (valid, _geneID) -{ - if (valid) - { - returnValue = runAGeneFit (_geneID); - fprintf (_outputFilePath, returnValue[_returnHeaders[0]]); - for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1) - { - fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]); - } - fprintf (_outputFilePath, "\\n"); - } - /* - else - { - fprintf (_outputFilePath, - _geneID, ", Incorrect number of sequences\\n"); - } - */ - _currentState = 0; - return 0; -} - -/*---------------------------------------------------------*/ -function _finishFileOutput (dummy) -{ - return 0; -} -""" - -def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the DATA READER */ - -_genomeScreenOptions ["0"] = "%s"; - /* which analysis to run on each gene; */ -_genomeScreenOptions ["1"] = "%s"; - /* what output to produce; */ -_genomeScreenOptions ["2"] = "%s"; - /* genetic code */ -_genomeScreenOptions ["3"] = "%s"; - /* tree file */ -_genomeScreenOptions ["4"] = "%s"; - /* alignment file */ -_genomeScreenOptions ["5"] = "%s"; - /* nucleotide bias string; can define any of the 203 models */ -_genomeScreenOptions ["6"] = "%s"; - /* output csv file */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ) - return get_filled_temp_filename(contents) - - -def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the NucDataBranchLengths.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "CUSTOM"; - /* use an arbitrary nucleotide model */ -_genomeScreenOptions ["2"] = "%s"; - /* which model to use */ -_genomeScreenOptions ["3"] = "%s"; - /* model options */ -_genomeScreenOptions ["4"] = "Estimated"; - /* rate parameters */ -_genomeScreenOptions ["5"] = "%s"; - /* base frequencies */ -_genomeScreenOptions ["6"] = "%s"; - /* the tree to use; */ -_genomeScreenOptions ["7"] = "%s"; - /* write .csv output to; */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename) - return get_filled_temp_filename(contents) - - -def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTree.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ -_genomeScreenOptions ["2"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["3"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename) - return get_filled_temp_filename(contents) - - -def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename): - contents = """ -_genomeScreenOptions = {}; - -/* all paths are either absolute or relative -to the BuildNJTreeMF.bf */ - -_genomeScreenOptions ["0"] = "%s"; - /* the multiple alignment file to analyze; */ -_genomeScreenOptions ["1"] = "%s"; - /* write Newick tree output to; */ -_genomeScreenOptions ["2"] = "%s"; - /* write a postscript tree file to this file; leave blank to not write a tree */ -_genomeScreenOptions ["3"] = "%s"; - /* pick which distance metric to use; TN93 is a good default */ - -ExecuteAFile ("%s", _genomeScreenOptions); -""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename) - return get_filled_temp_filename(contents) diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tool-data/shared/ucsc/ucsc_build_sites.txt --- a/tool-data/shared/ucsc/ucsc_build_sites.txt +++ b/tool-data/shared/ucsc/ucsc_build_sites.txt @@ -5,4 +5,3 @@ #Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn test http://genome-test.cse.ucsc.edu/cgi-bin/hgTracks? anoCar1,ce4,ce3,ce2,ce1,loxAfr1,rn2,eschColi_O157H7_1,rn4,droYak1,heliPylo_J99_1,droYak2,dp3,dp2,caeRem2,caeRem1,oryLat1,eschColi_K12_1,homIni13,homIni14,droAna1,droAna2,oryCun1,sacCer1,heliHepa1,droGri1,sc1,dasNov1,choHof1,tupBel1,mm9,mm8,vibrChol1,mm5,mm4,mm7,mm6,mm3,mm2,rn3,venter1,galGal3,galGal2,ornAna1,equCab1,cioSav2,rheMac2,eutHer13,droPer1,droVir2,droVir1,heliPylo_26695_1,euaGli13,calJac1,campJeju1,droSim1,hg13,hg15,hg16,hg17,monDom1,monDom4,droMoj1,petMar1,droMoj2,vibrChol_MO10_1,vibrPara1,gliRes13,vibrVuln_YJ016_1,braFlo1,cioSav1,lauRas13,dm1,canFam1,canFam2,ci1,echTel1,ci2,caePb1,dm3,ponAbe2,falciparum,xenTro1,xenTro2,nonAfr13,fr2,fr1,gasAcu1,dm2,apiMel1,apiMel2,eschColi_O157H7EDL933_1,priPac1,panTro1,hg18,panTro2,campJeju_RM1221_1,canHg12,vibrChol_O395_1,vibrFisc_ES114_1,danRer5,danRer4,danRer3,danRer2,danRer1,tetNig1,afrOth13,bosTau1,eschColi_CFT073_1,bosTau3,bosTau2,bosTau4,rodEnt13,droEre1,priMat13,vibrVuln_CMCP6_1,cb2,cb3,cb1,borEut13,droSec1,felCat3,strPur1,strPur2,otoGar1,catArr1,anoGam1,triCas2 ucla http://epigenomics.mcdb.ucla.edu/cgi-bin/hgTracks? araTha1 -psu bx main http://main.genome-browser.bx.psu.edu/cgi-bin/hgTracks? hg18,hg19,mm8,mm9 diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tool_conf.xml.main --- a/tool_conf.xml.main +++ b/tool_conf.xml.main @@ -4,32 +4,19 @@ <tool file="data_source/upload.xml" /><tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/biomart.xml" /><tool file="data_source/gramene_mart.xml" /> - <tool file="data_source/flymine.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -87,7 +74,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="features" name="Extract Features"><tool file="filters/ucsc_gene_bed_to_exon_bed.xml" /> @@ -111,7 +97,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -139,7 +124,6 @@ <tool file="plotting/histogram2.xml" /><tool file="plotting/scatterplot.xml" /><tool file="plotting/boxplot.xml" /> - <tool file="visualization/GMAJ.xml" /><tool file="visualization/build_ucsc_custom_track.xml" /><tool file="maf/vcf_to_maf_customtrack.xml" /><tool file="mutation/visualize.xml" /> @@ -170,14 +154,6 @@ <tool file="multivariate_stats/kpca.xml" /><tool file="multivariate_stats/kcca.xml" /></section> - <section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <!-- <tool file="hyphy/hyphy_dnds_wrapper.xml" /> --> - </section> - <section id="motifs" name="Motif Tools"> - <tool file="rgenetics/rgWebLogo3.xml" /> - </section><section id="clustal" name="Multiple Alignments"><tool file="rgenetics/rgClustalw.xml" /></section> @@ -253,10 +229,6 @@ <tool file="gatk/variant_eval.xml" /><tool file="gatk/variant_combine.xml" /></section> - <section id="peak_calling" name="NGS: Peak Calling"> - <tool file="peak_calling/macs_wrapper.xml" /> - <tool file="peak_calling/sicer_wrapper.xml" /> - </section><section id="ngs-rna-tools" name="NGS: RNA Analysis"><label id="rna_seq" text="RNA-seq" /><label id="filtering" text="Filtering" /> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tool_conf.xml.sample --- a/tool_conf.xml.sample +++ b/tool_conf.xml.sample @@ -5,7 +5,6 @@ <tool file="data_source/ucsc_tablebrowser.xml" /><tool file="data_source/ucsc_tablebrowser_test.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /> - <tool file="data_source/bx_browser.xml" /><tool file="data_source/ebi_sra.xml" /><tool file="data_source/microbial_import.xml" /><tool file="data_source/biomart.xml" /> @@ -13,34 +12,18 @@ <tool file="data_source/cbi_rice_mart.xml" /><tool file="data_source/gramene_mart.xml" /><tool file="data_source/fly_modencode.xml" /> - <tool file="data_source/flymine.xml" /> - <tool file="data_source/flymine_test.xml" /> - <tool file="data_source/modmine.xml" /> - <tool file="data_source/mousemine.xml" /> - <tool file="data_source/ratmine.xml" /> - <tool file="data_source/yeastmine.xml" /> - <tool file="data_source/metabolicmine.xml" /><tool file="data_source/worm_modencode.xml" /><tool file="data_source/wormbase.xml" /><tool file="data_source/wormbase_test.xml" /><tool file="data_source/eupathdb.xml" /> - <tool file="data_source/encode_db.xml" /> - <tool file="data_source/epigraph_import.xml" /> - <tool file="data_source/epigraph_import_test.xml" /><tool file="data_source/hbvar.xml" /><tool file="genomespace/genomespace_file_browser_prod.xml" /><tool file="genomespace/genomespace_importer.xml" /><tool file="validation/fix_errors.xml" /></section><section id="send" name="Send Data"> - <tool file="data_destination/epigraph.xml" /> - <tool file="data_destination/epigraph_test.xml" /><tool file="genomespace/genomespace_exporter.xml" /></section> - <section id="EncodeTools" name="ENCODE Tools"> - <tool file="encode/gencode_partition.xml" /> - <tool file="encode/random_intervals.xml" /> - </section><section id="liftOver" name="Lift-Over"><tool file="extract/liftOver_wrapper.xml" /></section> @@ -81,7 +64,6 @@ <tool file="filters/compare.xml" /><tool file="new_operations/subtract_query.xml" /><tool file="stats/grouping.xml" /> - <tool file="new_operations/column_join.xml" /></section><section id="convert" name="Convert Formats"><tool file="filters/axt_to_concat_fasta.xml" /> @@ -124,7 +106,6 @@ <section id="scores" name="Get Genomic Scores"><tool file="stats/wiggle_to_simple.xml" /><tool file="stats/aggregate_binned_scores_in_intervals.xml" /> - <tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section id="bxops" name="Operate on Genomic Intervals"><tool file="new_operations/intersect.xml" /> @@ -189,9 +170,6 @@ <tool file="multivariate_stats/kcca.xml" /></section><section id="hyphy" name="Evolution"> - <tool file="hyphy/hyphy_branch_lengths_wrapper.xml" /> - <tool file="hyphy/hyphy_nj_tree_wrapper.xml" /> - <tool file="hyphy/hyphy_dnds_wrapper.xml" /><tool file="evolution/codingSnps.xml" /><tool file="evolution/add_scores.xml" /></section> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_destination/epigraph.xml --- a/tools/data_destination/epigraph.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_export"> - <description> and prediction with EpiGRAPH</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_destination/epigraph_test.xml --- a/tools/data_destination/epigraph_test.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0"?> -<tool name="Perform genome analysis" id="epigraph_test_export"> - <description> and prediction with EpiGRAPH Test</description> - <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params> - <inputs> - <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH"> - <validator type="unspecified_build" /> - </param> - <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" /> - <param name="DATA_URL" type="baseurl" value="/datasets" /> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <outputs/> - <help> - -.. class:: warningmark - -After clicking the **Execute** button, you will be redirected to the EpiGRAPH test website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance. - ------ - -.. class:: infomark - -**What it does** - -This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods. - ------ - -.. class:: infomark - -**EpiGRAPH outline** - -The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties. - -.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/ - - </help> -</tool> - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/bx_browser.xml --- a/tools/data_source/bx_browser.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="BX" id="bx_browser" tool_type="data_source"> - <description>table browser</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://main.genome-browser.bx.psu.edu/cgi-bin/hgTables" check_values="false" method="get"> - <display>go to BX Browser $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner" /> - <param name="tool_id" type="hidden" value="bx_browser" /> - <param name="sendToGalaxy" type="hidden" value="1" /> - <param name="hgta_compressType" type="hidden" value="none" /> - <param name="hgta_outputType" type="hidden" value="bed" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="org" missing="unknown species" /> - <request_param galaxy_name="table" remote_name="hgta_table" missing="unknown table" /> - <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" /> - <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="tabular" > - <value_translation> - <value galaxy_value="tabular" remote_value="primaryTable" /> - <value galaxy_value="tabular" remote_value="selectedFields" /> - <value galaxy_value="wig" remote_value="wigData" /> - <value galaxy_value="interval" remote_value="tab" /> - <value galaxy_value="html" remote_value="hyperlinks" /> - <value galaxy_value="fasta" remote_value="sequence" /> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="tabular" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/encode_db.xml --- a/tools/data_source/encode_db.xml +++ /dev/null @@ -1,27 +0,0 @@ -<?xml version="1.0"?> - -<tool name="EncodeDB" id="encode_db1"> - - <description> - at NHGRI - </description> - - <command interpreter="python"> - fetch.py "$url" $output - </command> - - <inputs action="http://research.nhgri.nih.gov/projects/ENCODEdb/cgi-bin/power_query.cgi" target="_top"> -<!-- <inputs action="http://localhost:9000/prepared"> --> - <display>go to EncodeDB $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/async/encode_db1" /> - </inputs> - - <uihints minwidth="800"/> - - <outputs> - <data format="bed" name="output" /> - </outputs> - - <options sanitize="False" refresh="True"/> - -</tool> \ No newline at end of file diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/epigraph_import.xml --- a/tools/data_source/epigraph_import.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source"> - <description> server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/epigraph_import_test.xml --- a/tools/data_source/epigraph_import_test.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="EpiGRAPH" id="epigraph_import_test" tool_type="data_source"> - <description> test server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get"> - <display>go to EpiGRAPH server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import_test" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="get" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" /> - <request_param galaxy_name="info" remote_name="INFO" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/flymine.xml --- a/tools/data_source/flymine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine" id="flymine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.flymine.org" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/flymine_test.xml --- a/tools/data_source/flymine_test.xml +++ /dev/null @@ -1,31 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Flymine test" id="flymine_test" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get"> - <display>go to Flymine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" /> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/metabolicmine.xml --- a/tools/data_source/metabolicmine.xml +++ /dev/null @@ -1,13 +0,0 @@ -<?xml version="1.0"?> -<tool name="metabolicMine" id="metabolicmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.metabolicmine.org/beta/begin.do" check_values="false" method="get"> - <display>go to metabolicMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/modmine.xml --- a/tools/data_source/modmine.xml +++ /dev/null @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="modENCODE modMine" id="modmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://intermine.modencode.org/" check_values="false" method="get"> - <display>go to modENCODE modMine server $GALAXY_URL</display> - </inputs> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/mousemine.xml --- a/tools/data_source/mousemine.xml +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="MouseMine" id="mousemine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://www.mousemine.org/mousemine/begin.do" check_values="false" method="get"> - <display>go to MouseMine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=mousemine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="MouseMine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> - diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/ratmine.xml --- a/tools/data_source/ratmine.xml +++ /dev/null @@ -1,34 +0,0 @@ -<?xml version="1.0"?> -<!-- - If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in - the initial response. If value of 'URL_method' is 'post', any additional params coming back in the - initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed. ---> -<tool name="Ratmine" id="ratmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://ratmine.mcw.edu/ratmine/begin.do" check_values="false" method="get"> - <display>go to Ratmine server $GALAXY_URL</display> - <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=ratmine" /> - </inputs> - <request_param_translation> - <request_param galaxy_name="URL_method" remote_name="URL_method" missing="post" /> - <request_param galaxy_name="URL" remote_name="URL" missing="" /> - <request_param galaxy_name="dbkey" remote_name="db" missing="?" /> - <request_param galaxy_name="organism" remote_name="organism" missing="" /> - <request_param galaxy_name="table" remote_name="table" missing="" /> - <request_param galaxy_name="description" remote_name="description" missing="" /> - <request_param galaxy_name="name" remote_name="name" missing="Ratmine query" /> - <request_param galaxy_name="info" remote_name="info" missing="" /> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/data_source/yeastmine.xml --- a/tools/data_source/yeastmine.xml +++ /dev/null @@ -1,20 +0,0 @@ -<?xml version="1.0"?> -<tool name="YeastMine" id="yeastmine" tool_type="data_source"> - <description>server</description> - <command interpreter="python">data_source.py $output $__app__.config.output_size_limit</command> - <inputs action="http://yeastmine.yeastgenome.org/yeastmine/begin.do" check_values="false" method="get"> - <display>go to yeastMine server $GALAXY_URL</display> - </inputs> - <request_param_translation> - <request_param galaxy_name="data_type" remote_name="data_type" missing="auto" > - <value_translation> - <value galaxy_value="auto" remote_value="txt" /><!-- intermine currently always provides 'txt', make this auto detect --> - </value_translation> - </request_param> - </request_param_translation> - <uihints minwidth="800"/> - <outputs> - <data name="output" format="txt" /> - </outputs> - <options sanitize="False" refresh="True"/> -</tool> diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/encode/gencode_partition.xml --- a/tools/encode/gencode_partition.xml +++ /dev/null @@ -1,45 +0,0 @@ -<tool id="gencode_partition1" name="Gencode Partition"> - <description>an interval file</description> - <command interpreter="python">split_by_partitions.py ${GALAXY_DATA_INDEX_DIR} $input1 $out_file1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Partition"/> - </inputs> - <outputs> - <data name="out_file1" format="bed"/> - </outputs> - <tests> - <test> - <param name="input1" value="encode_1.bed"/> - <output name="out_file1" file="gencode_partition_out.bed"/> - </test> - </tests> - <help> -For detailed information about partitioning, click here_. - -.. _here: http://genome.imim.es/gencode/wiki/index.php/Collecting_Feature_Sets_from_Al... - -Datasets are partitioned according to the protocol below: - -A partition scheme has been defined that is similar to what has previously been done with TARs/TRANSFRAGs such that any feature can be classified as falling into one of the following 6 categories: - 1. **Coding** -- coding exons defined from the GENCODE experimentally verified coding set (coding in any transcript) - 2. **5UTR** -- 5' UTR exons defined from the GENCODE experimentally verified coding set (5' UTR in some transcript but never coding in any other) - 3. **3UTR** -- 3' UTR exons defined from the GENCODE experimentally verified coding set (3' UTR in some transcript but never coding in any other) - 4. **Intronic Proximal** -- intronic and no more than 5kb away from an exon. - 5. **Intergenic Proximal** -- between genes and no more than 5kb away from an exon. - 6. **Intronic Distal** -- intronic and greater than 5kb away from an exon. - 7. **Intergenic Distal** -- between genes and greater than 5kb away from an exon. - ------ - -.. class:: infomark - -**Note:** Features overlapping more than one partition will take the identity of the lower-numbered partition. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file diff -r 37cf56c3f0e4605e15f5988764f147bcb186f40f -r 2210b9de666ed9baa48304de39ba745e081b329e tools/encode/random_intervals.xml --- a/tools/encode/random_intervals.xml +++ /dev/null @@ -1,64 +0,0 @@ -<tool id="random_intervals1" name="Random Intervals"> -<description>create a random set of intervals</description> - <command interpreter="python">random_intervals_no_bits.py $regions $input2 $input1 $out_file1 ${input2.metadata.chromCol} ${input2.metadata.startCol} ${input2.metadata.endCol} ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} ${input1.metadata.strandCol} $use_mask $strand_overlaps ${GALAXY_DATA_INDEX_DIR}</command> - <inputs> - <param name="input1" type="data" format="interval" label="File to Mimick"> - <validator type="unspecified_build" message="Unspecified build, this tool works with data from genome builds hg16 or hg17. Click the pencil icon in your history item to set the genome build."/> - </param> - <param name="input2" type="data" format="interval" label="Intervals to Mask"/> - <param name="use_mask" type="select" label="Use mask"> - <option value="no_mask">No</option> - <option value="use_mask">Yes</option> - </param> - <param name="strand_overlaps" type="select" label="Allow overlaps"> - <option value="all">Any</option> - <option value="strand">Across Strands</option> - <option value="none">None</option> - </param> - <param name="regions" type="select" label="Regions to use"> - <options from_file="regions.loc"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <column name="dbkey" index="0"/> - <filter type="data_meta" ref="input1" key="dbkey" column="0" /> - <validator type="no_options" message="This tool currently only works with ENCODE data from genome builds hg16 or hg17."/> - </options> - </param> - </inputs> - <outputs> - <data name="out_file1" format="input"/> - </outputs> - <help> - -.. class:: warningmark - -This tool currently only works with ENCODE data from genome builds hg16 or hg17. - ------ - -.. class:: infomark - -**Note:** If you do not wish to mask a set of intervals, change the Use Mask option to No, this option will override any Mask files selected. - ------ - -**Syntax** - -This tool will attempt to create a random set of intervals that mimic those found within your source file. You may also specify a set of intervals to mask. - -**Allow overlaps** options - * **Across Strands** - random regions are allowed to overlap only if they are on different strands. - * **Any** - all overlaps are allowed. - * **None** - no overlapping regions are allowed. - -**Regions to use** options - * Bounding region of interest based on the dataset build. - ------- - -**Citation** - -If you use this tool, please cite `Blankenberg D, Taylor J, Schenck I, He J, Zhang Y, Ghent M, Veeraraghavan N, Albert I, Miller W, Makova KD, Hardison RC, Nekrutenko A. A framework for collaborative analysis of ENCODE data: making large-scale analyses biologist-friendly. Genome Res. 2007 Jun;17(6):960-4. <http://www.ncbi.nlm.nih.gov/pubmed/17568012>`_ - - </help> -</tool> \ No newline at end of file This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/8ea87f7bc63a/ Changeset: 8ea87f7bc63a User: natefoo Date: 2014-01-27 20:01:21 Summary: Merge merge from next-stable to default Affected #: 0 files Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org