commit/galaxy-central: inithello: Added script to update NCBI builds list, replaced Broad data source with direct URL, cleaned up help text.
1 new commit in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/changeset/df99df26d92f/ changeset: df99df26d92f user: inithello date: 2012-08-23 22:36:58 summary: Added script to update NCBI builds list, replaced Broad data source with direct URL, cleaned up help text. affected #: 4 files diff -r 429c1054c13e9c2206ccd4bc3dfd1ca1fa75973c -r df99df26d92fa8a5e3eaef4fd1eef4b6ee18f86d cron/get_ncbi.py --- /dev/null +++ b/cron/get_ncbi.py @@ -0,0 +1,93 @@ +import urllib, pkg_resources, os +pkg_resources.require( 'elementtree' ) +from elementtree import ElementTree, ElementInclude +from xml.parsers.expat import ExpatError as XMLParseErrorThing +import sys + +import pkg_resources + +class GetListing: + def __init__( self, data ): + self.tree = ElementTree.parse( data ) + self.root = self.tree.getroot() + ElementInclude.include(self.root) + + def xml_text(self, name=None): + """Returns the text inside an element""" + root = self.root + if name is not None: + # Try attribute first + val = root.get(name) + if val: + return val + # Then try as element + elem = root.find(name) + else: + elem = root + if elem is not None and elem.text: + text = ''.join(elem.text.splitlines()) + return text.strip() + # No luck, return empty string + return '' + +def dlcachefile( webenv, querykey, i, results ): + url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nuccore&usehistory=y&term=nuccore_assembly[filter]%20AND%20refseq[filter]' + fp = urllib.urlopen( url ) + search = GetListing( fp ) + fp.close() + webenv = search.xml_text( 'WebEnv' ) + querykey = search.xml_text( 'QueryKey' ) + url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=nuccore&WebEnv=%s&query_key=%s&retstart=%d&retmax=%d' % ( webenv, querykey, i, results ) + fp = urllib.urlopen( url ) + cachefile = os.tmpfile() + for line in fp: + cachefile.write( line ) + fp.close() + cachefile.flush() + cachefile.seek(0) + return cachefile + + +url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nuccore&usehistory=y&term=nuccore_assembly[filter]%20AND%20refseq[filter]' +fp = urllib.urlopen( url ) +results = GetListing( fp ) +fp.close() + +webenv = results.xml_text( 'WebEnv' ) +querykey = results.xml_text( 'QueryKey' ) +counts = int( results.xml_text( 'Count' ) ) +results = 10000 +found = 0 + +for i in range(0, counts + results, results): + rets = dict() + cache = dlcachefile( webenv, querykey, i, results ) + try: + xmldoc = GetListing( cache ) + except (IOError, XMLParseErrorThing): + cache = dlcachefile( webenv, querykey, i, results ) + try: + xmldoc = GetListing( cache ) + except (IOError, XMLParseErrorThing): + cache.close() + exit() + pass + finally: + cache.close() + entries = xmldoc.root.findall( 'DocSum' ) + for entry in entries: + dbkey = None + children = entry.findall('Item') + for item in children: + rets[ item.get('Name') ] = item.text + if not rets['Caption'].startswith('NC_'): + continue + + for ret in rets['Extra'].split('|'): + if not ret.startswith('NC_'): + continue + else: + dbkey = ret + break + if dbkey is not None: + print '\t'.join( [ dbkey, rets['Title'] ] ) diff -r 429c1054c13e9c2206ccd4bc3dfd1ca1fa75973c -r df99df26d92fa8a5e3eaef4fd1eef4b6ee18f86d cron/updatencbi.sh.sample --- /dev/null +++ b/cron/updatencbi.sh.sample @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Script to update NCBI shared data tables. The idea is to update, but if +# the update fails, not replace current data/tables with error +# messages. + +# Edit this line to refer to galaxy's path: +GALAXY=/path/to/galaxy +PYTHONPATH=${GALAXY}/lib +export PYTHONPATH + +# setup directories +echo "Creating required directories." +DIRS=" +${GALAXY}/tool-data/shared/ncbi +${GALAXY}/tool-data/shared/ncbi/new +" +for dir in $DIRS; do + if [ ! -d $dir ]; then + echo "Creating $dir" + mkdir $dir + else + echo "$dir already exists, continuing." + fi +done + +date +echo "Updating NCBI shared data tables." + +# Try to build "builds.txt" +echo "Updating builds.txt" +python ${GALAXY}/cron/get_ncbi.py > ${GALAXY}/tool-data/shared/ncbi/new/builds.txt +if [ $? -eq 0 ] +then + diff ${GALAXY}/tool-data/shared/ncbi/new/builds.txt ${GALAXY}/tool-data/shared/ncbi/builds.txt > /dev/null 2>&1 + if [ $? -ne 0 ] + then + cp -f ${GALAXY}/tool-data/shared/ncbi/new/builds.txt ${GALAXY}/tool-data/shared/ncbi/builds.txt + fi +else + echo "Failed to update builds.txt" >&2 +fi diff -r 429c1054c13e9c2206ccd4bc3dfd1ca1fa75973c -r df99df26d92fa8a5e3eaef4fd1eef4b6ee18f86d lib/galaxy/web/controllers/data_admin.py --- a/lib/galaxy/web/controllers/data_admin.py +++ b/lib/galaxy/web/controllers/data_admin.py @@ -75,7 +75,7 @@ @web.require_admin def add_genome( self, trans, **kwd ): if trans.app.config.get_bool( 'enable_beta_job_managers', False ) == False: - return trans.fill_template( '/admin/data_admin/betajob.mako' ) + return trans.fill_template( '/admin/data_admin/generic_error.mako', message='This feature requires that enable_beta_job_managers be set to True in your Galaxy configuration.' ) dbkeys = trans.ucsc_builds ensemblkeys = trans.ensembl_builds ncbikeys = trans.ncbi_builds @@ -137,9 +137,10 @@ dbkey = build.split( ': ' )[0] longname = build.split( ': ' )[-1] url = 'http://togows.dbcls.jp/entry/ncbi-nucleotide/%s.fasta' % dbkey - elif source == 'Broad': - dbkey = params.get('broad_dbkey', '')[0] - url = 'ftp://ftp.broadinstitute.org/pub/seq/references/%s.fasta' % dbkey + elif source == 'URL': + dbkey = params.get( 'url_dbkey', '' ) + url = params.get( 'url', None ) + longname = params.get( 'longname', None ) elif source == 'UCSC': longname = None for build in trans.ucsc_builds: @@ -198,7 +199,9 @@ break url = 'ftp://ftp.ensembl.org/pub/release-%s/fasta/%s/dna/%s.%s.%s.dna.toplevel.fa.gz' % ( release, pathname.lower(), pathname, dbkey, release ) else: - raise ValueError, 'Somehow an invalid data source was specified.' + return trans.fill_template( '/admin/data_admin/generic_error.mako', message='Somehow an invalid data source was specified.' ) + if url is None: + return trans.fill_template( '/admin/data_admin/generic_error.mako', message='Unable to generate a valid URL with the specified parameters.' ) params = dict( protocol='http', name=dbkey, datatype='fasta', url=url, user=trans.user.id ) jobid = trans.app.job_manager.deferred_job_queue.plugins['GenomeTransferPlugin'].create_job( trans, url, dbkey, longname, indexers ) chainjob = [] diff -r 429c1054c13e9c2206ccd4bc3dfd1ca1fa75973c -r df99df26d92fa8a5e3eaef4fd1eef4b6ee18f86d templates/admin/data_admin/data_form.mako --- a/templates/admin/data_admin/data_form.mako +++ b/templates/admin/data_admin/data_form.mako @@ -53,7 +53,7 @@ <label for="source">Data Source</label><select id="datasource" name="source" label="Data Source"><option value="UCSC">UCSC</option> - <option value="Broad">Broad Institute</option> + <option value="URL">Direct Link</option><option value="NCBI">NCBI</option><option value="Ensembl">EnsemblGenome</option></select> @@ -75,22 +75,31 @@ </div></div><h2>Parameters</h2> - <div id="params_Broad" class="params-block"> + <div id="params_URL" class="params-block"><div class="form-row"> - <label for="longname">Internal Name</label> - <input name="longname" type="text" label="Internal Name" /> + <label for="longname">Long Name</label> + <input name="longname" type="text" label="Long Name" /><div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + A descriptive name for this build. + </div></div><div class="form-row"> - <label for="uniqid">Internal Unique Identifier</label> - <input name="uniqid" type="text" label="Internal Identifier" /> + <label for="url_dbkey">DB Key</label> + <input name="url_dbkey" type="text" label="DB Key" /><div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + The internal DB key for this build. WARNING: Using a value that already exists in one or more .loc files may have unpredictable results. + </div></div><div id="dlparams"><div class="form-row"> - <label for="broad_dbkey">External Name</label> - <input name="broad_dbkey" type="text" label="Genome Unique Name" /> + <label for="url">URL</label> + <input name="url" type="text" label="URL" /><div style="clear: both;"> </div> + <div class="toolParamHelp" style="clear: both;"> + The URL to download this build from. + </div></div></div></div> @@ -101,7 +110,9 @@ <input type="text" class="text-and-autocomplete-select ac_input" size="40" name="ncbi_name" id="ncbi_name" value="" /></div><div class="toolParamHelp" style="clear: both;"> - If you can't find the build you want in this list, <insert link to instructions here> + If you can't find the build you want in this list, open a terminal and execute + <pre>sh cron/updatencbi.sh</pre> + in your galaxy root directory. </div></div></div> @@ -116,7 +127,9 @@ </select></div><div class="toolParamHelp" style="clear: both;"> - If you can't find the build you want in this list, <insert link to instructions here> + If you can't find the build you want in this list, open a terminal and execute + <pre>sh cron/updateensembl.sh</pre> + in your galaxy root directory. </div></div></div> @@ -135,7 +148,9 @@ </select></div><div class="toolParamHelp" style="clear: both;"> - If you can't find the build you want in this list, <insert link to instructions here> + If you can't find the build you want in this list, open a terminal and execute + <pre>sh cron/updateucsc.sh</pre> + in your galaxy root directory. </div></div></div> @@ -145,7 +160,6 @@ <script type="text/javascript"> $(document).ready(function() { checkDataSource(); - // Replace dbkey select with search+select. }); $('#datasource').change(function() { checkDataSource(); Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket