3 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/2228c58015cb/ Changeset: 2228c58015cb User: martenson Date: 2015-01-27 00:45:33+00:00 Summary: query parser experiments, better formatted time delta Affected #: 1 file diff -r 0733c87123cbc2f7009a125c5e57d630bb8296a6 -r 2228c58015cb928e8c2819614e2a69b70ea4f448 lib/galaxy/webapps/tool_shed/search/repo_search.py --- a/lib/galaxy/webapps/tool_shed/search/repo_search.py +++ b/lib/galaxy/webapps/tool_shed/search/repo_search.py @@ -1,11 +1,11 @@ """Module for searching the toolshed repositories""" +import datetime +import dateutil.relativedelta from galaxy import exceptions from galaxy import eggs from galaxy.web.base.controller import BaseAPIController from galaxy.webapps.tool_shed import model from tool_shed.util.shed_util_common import generate_sharable_link_for_repository_in_tool_shed - - import logging log = logging.getLogger( __name__ ) @@ -34,6 +34,7 @@ search_ready = False schema = None + class RepoSearch ( object ): def search( self, trans, search_term, **kwd ): @@ -72,9 +73,15 @@ 'remote_repository_url', 'repo_owner_username' ], schema=schema ) - hits = searcher.search( parser.parse( '*' + search_term + '*' ), terms = True ) + # user_query = parser.parse( search_term ) + user_query = parser.parse( '*' + search_term + '*' ) + + # hits = searcher.search( user_query, terms = True ) + hits = searcher.search_page( user_query, 1, pagelen = 1, terms = True ) + log.debug( 'total hits: ' + str( len( hits ) ) ) + log.debug( 'scored hits: ' + str( hits.scored_length() ) ) results = {} - results[ 'length'] = len( hits ) + results[ 'total_results'] = len( hits ) results[ 'hits' ] = [] for hit in hits: repo = trans.sa_session.query( model.Repository ).filter_by( id=hit[ 'id' ] ).one() @@ -85,7 +92,20 @@ break hit_dict = repo.to_dict( view='element', value_mapper={ 'id': trans.security.encode_id, 'user_id': trans.security.encode_id } ) hit_dict[ 'url'] = generate_sharable_link_for_repository_in_tool_shed( repo ) - hit_dict[ 'last_updated' ] = repo.update_time.strftime( "%Y-%m-%d %I:%M %p" ) + + # Format the time since last update to be nicely readable. + dt1 = repo.update_time + dt2 = datetime.datetime.now() + rd = dateutil.relativedelta.relativedelta (dt2, dt1) + time_ago = '' + if rd.years > 0: + time_ago += str( rd.years ) + 'years' + if rd.months > 0: + time_ago += str( rd.months ) + ' months' + if rd.days > 0: + time_ago += str( rd.days ) + ' days ago' + hit_dict[ 'last_updated' ] = time_ago + hit_dict[ 'times_downloaded' ] = repo.times_downloaded hit_dict[ 'approved' ] = approved results[ 'hits' ].append( {'repository': hit_dict, 'matched_terms': hit.matched_terms() } ) https://bitbucket.org/galaxy/galaxy-central/commits/5877dc562874/ Changeset: 5877dc562874 User: martenson Date: 2015-01-27 22:15:18+00:00 Summary: adjust schema, start dynamic scoring Affected #: 2 files diff -r 2228c58015cb928e8c2819614e2a69b70ea4f448 -r 5877dc562874b894a83393bc54ba671d397909c9 lib/galaxy/webapps/tool_shed/search/repo_search.py --- a/lib/galaxy/webapps/tool_shed/search/repo_search.py +++ b/lib/galaxy/webapps/tool_shed/search/repo_search.py @@ -15,27 +15,47 @@ try: eggs.require( "Whoosh" ) import whoosh.index - from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT + from whoosh import scoring + from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT, STORED from whoosh.scoring import BM25F from whoosh.qparser import MultifieldParser from whoosh.index import Index search_ready = True schema = Schema( - id=STORED, - name=TEXT, - description=TEXT, - long_description=TEXT, - repo_type=TEXT, - homepage_url=TEXT, - remote_repository_url=TEXT, - repo_owner_username=TEXT ) + id = STORED, + name = TEXT( field_boost = 1.7 ), + description = TEXT( field_boost = 1.5 ), + long_description = TEXT, + repo_type = TEXT, + homepage_url = TEXT, + remote_repository_url = TEXT, + repo_owner_username = TEXT, + times_downloaded = STORED ) + except ImportError, e: search_ready = False schema = None + +class RepoWeighting( scoring.BM25F ): + """ + Affect the BM25G scoring model through the final method. + source: https://groups.google.com/forum/#!msg/whoosh/1AKNbW8R_l8/XySW0OecH6gJ + """ + use_final = True -class RepoSearch ( object ): + def final( self, searcher, docnum, score ): + + maxhits = 300 + hitcount = searcher.stored_fields( docnum )[ "times_downloaded" ] + log.debug( 'hitcount: ' + str( hitcount ) ) + + # Multiply the computed score for this document by the popularity + return score * ( hitcount / maxhits ) + + +class RepoSearch( object ): def search( self, trans, search_term, **kwd ): """ @@ -43,8 +63,7 @@ :param search_term: unicode encoded string with the search term(s) - :returns results: dictionary containing number of hits, - hits themselves and matched terms for each + :returns results: dictionary containing number of hits, hits themselves and matched terms for each """ if search_ready: toolshed_whoosh_index_dir = trans.app.config.toolshed_whoosh_index_dir @@ -55,42 +74,46 @@ # Some literature about BM25F: # http://trec.nist.gov/pubs/trec13/papers/microsoft-cambridge.web.hard.pdf # http://en.wikipedia.org/wiki/Okapi_BM25 - # Basically the higher number the bigger weight. - searcher = index.searcher( weighting=BM25F( field_B={ - 'name_B' : 0.9, - 'description_B' : 0.6, - 'long_description_B' : 0.5, - 'homepage_url_B' : 0.3, - 'remote_repository_url_B' : 0.2, - 'repo_owner_username' : 0.3, - 'repo_type_B' : 0.1 } ) ) + # __Basically__ the higher number the bigger weight. + repo_weighting = RepoWeighting( field_B = { 'name_B' : 0.9, + 'description_B' : 0.6, + 'long_description_B' : 0.5, + 'homepage_url_B' : 0.3, + 'remote_repository_url_B' : 0.2, + 'repo_owner_username' : 0.3, + 'repo_type_B' : 0.1 } ) + + # log.debug(repo_weighting.__dict__) + searcher = index.searcher( weighting = repo_weighting ) + parser = MultifieldParser( [ - 'name', - 'description', - 'long_description', - 'repo_type', - 'homepage_url', - 'remote_repository_url', - 'repo_owner_username' ], schema=schema ) + 'name', + 'description', + 'long_description', + 'repo_type', + 'homepage_url', + 'remote_repository_url', + 'repo_owner_username' ], schema = schema ) # user_query = parser.parse( search_term ) user_query = parser.parse( '*' + search_term + '*' ) - # hits = searcher.search( user_query, terms = True ) - hits = searcher.search_page( user_query, 1, pagelen = 1, terms = True ) + hits = searcher.search( user_query, terms = True ) + # hits = searcher.search_page( user_query, 1, pagelen = 1, terms = True ) + log.debug( 'searching for: #' + str( search_term ) ) log.debug( 'total hits: ' + str( len( hits ) ) ) log.debug( 'scored hits: ' + str( hits.scored_length() ) ) results = {} results[ 'total_results'] = len( hits ) results[ 'hits' ] = [] for hit in hits: - repo = trans.sa_session.query( model.Repository ).filter_by( id=hit[ 'id' ] ).one() + repo = trans.sa_session.query( model.Repository ).filter_by( id = hit[ 'id' ] ).one() approved = 'no' for review in repo.reviews: if review.approved == 'yes': approved = 'yes' break - hit_dict = repo.to_dict( view='element', value_mapper={ 'id': trans.security.encode_id, 'user_id': trans.security.encode_id } ) + hit_dict = repo.to_dict( view = 'element', value_mapper = { 'id': trans.security.encode_id, 'user_id': trans.security.encode_id } ) hit_dict[ 'url'] = generate_sharable_link_for_repository_in_tool_shed( repo ) # Format the time since last update to be nicely readable. diff -r 2228c58015cb928e8c2819614e2a69b70ea4f448 -r 5877dc562874b894a83393bc54ba671d397909c9 scripts/tool_shed/build_ts_whoosh_index.py --- a/scripts/tool_shed/build_ts_whoosh_index.py +++ b/scripts/tool_shed/build_ts_whoosh_index.py @@ -17,7 +17,7 @@ import whoosh.index import galaxy.webapps.tool_shed.model.mapping from whoosh.filedb.filestore import FileStorage - from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT + from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT, STORED from whoosh.scoring import BM25F from whoosh.qparser import MultifieldParser from whoosh.index import Index @@ -25,14 +25,15 @@ whoosh_ready = True schema = Schema( - id=STORED, - name=TEXT, - description=TEXT, - long_description=TEXT, - repo_type=TEXT, - homepage_url=TEXT, - remote_repository_url=TEXT, - repo_owner_username=TEXT ) + id = STORED, + name = TEXT( field_boost = 1.7 ), + description = TEXT( field_boost = 1.5 ), + long_description = TEXT, + repo_type = TEXT, + homepage_url = TEXT, + remote_repository_url = TEXT, + repo_owner_username = TEXT, + times_downloaded = STORED ) except ImportError, e: print 'import error' @@ -50,15 +51,16 @@ return a_basestr repos_indexed = 0 - for id, name, description, long_description, repo_type, homepage_url, remote_repository_url, repo_owner_username in get_repos( sa_session ): - writer.add_document( id=id, - name=to_unicode( name ), - description=to_unicode( description ), - long_description=to_unicode( long_description ), - repo_type=to_unicode( repo_type ), - homepage_url=to_unicode( homepage_url ), - remote_repository_url=to_unicode( remote_repository_url ), - repo_owner_username=to_unicode( repo_owner_username ) ) + for id, name, description, long_description, repo_type, homepage_url, remote_repository_url, repo_owner_username, times_downloaded in get_repos( sa_session ): + writer.add_document( id = id, + name = to_unicode( name ), + description = to_unicode( description ), + long_description = to_unicode( long_description ), + repo_type = to_unicode( repo_type ), + homepage_url = to_unicode( homepage_url ), + remote_repository_url = to_unicode( remote_repository_url ), + repo_owner_username = to_unicode( repo_owner_username ), + times_downloaded = times_downloaded ) repos_indexed += 1 writer.commit() print "Number of repos indexed: ", repos_indexed @@ -72,13 +74,14 @@ repo_type = repo.type homepage_url = repo.homepage_url remote_repository_url = repo.remote_repository_url + times_downloaded = repo.times_downloaded repo_owner_username = "" if repo.user_id is not None: user = sa_session.query( model.User ).filter( model.User.id == repo.user_id ).one() repo_owner_username = user.username - yield id, name, description, long_description, repo_type, homepage_url, remote_repository_url, repo_owner_username + yield id, name, description, long_description, repo_type, homepage_url, remote_repository_url, repo_owner_username, times_downloaded def get_sa_session_and_needed_config_settings( ini_file ): conf_parser = ConfigParser.ConfigParser( { 'here' : os.getcwd() } ) https://bitbucket.org/galaxy/galaxy-central/commits/73a261a75185/ Changeset: 73a261a75185 User: martenson Date: 2015-01-27 22:19:07+00:00 Summary: Merge Affected #: 7 files diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 config/galaxy.ini.sample --- a/config/galaxy.ini.sample +++ b/config/galaxy.ini.sample @@ -764,6 +764,25 @@ # Enable Galaxy to communicate directly with a sequencer #enable_sequencer_communication = False + +# Enable beta workflow modules that should not yet be considered part of Galaxy's +# stable API. +#enable_beta_workflow_modules = False + +# Force usage of Galaxy's beta workflow scheduler under certain circumstances - +# this workflow scheduling forces Galaxy to schedule workflows in the background +# so initial submission of the workflows is signficantly sped up. This does +# however force the user to refresh their history manually to see newly scheduled +# steps (for "normal" workflows - steps are still scheduled far in advance of +# them being queued and scheduling here doesn't refer to actual cluster job +# scheduling). +# Workflows containing more than the specified number of steps will always use +# the Galaxy's beta workflow scheduling. +#force_beta_workflow_scheduled_min_steps=250 +# Switch to using Galaxy's beta workflow scheduling for all workflows involving +# ccollections. +#force_beta_workflow_scheduled_for_collections=False + # Enable authentication via OpenID. Allows users to log in to their Galaxy # account by authenticating with an OpenID provider. #enable_openid = False diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 config/tool_data_table_conf.xml.sample --- a/config/tool_data_table_conf.xml.sample +++ b/config/tool_data_table_conf.xml.sample @@ -60,4 +60,9 @@ <columns>value, dbkey, name, path</columns><file path="tool-data/mosaik_index.loc" /></table> + <!-- Locations of indexes in the 2bit format --> + <table name="twobit" comment_char="#"> + <columns>value, path</columns> + <file path="tool-data/twobit.loc" /> + </table></tables> diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 lib/galaxy/visualization/genomes.py --- a/lib/galaxy/visualization/genomes.py +++ b/lib/galaxy/visualization/genomes.py @@ -179,33 +179,59 @@ """ def __init__( self, app ): + self.app = app # Create list of genomes from app.genome_builds self.genomes = {} - for key, description in app.genome_builds.get_genome_build_names(): + # Store internal versions of data tables for twobit and __dbkey__ + self._table_versions = { 'twobit': None, '__dbkeys__': None } + self.reload_genomes() + + def reload_genomes( self ): + self.genomes = {} + # Store table versions for later + for table_name in self._table_versions.keys(): + table = self.app.tool_data_tables.get( table_name, None ) + if table is not None: + self._table_versions[ table_name ] = table._loaded_content_version + + twobit_table = self.app.tool_data_tables.get( 'twobit', None ) + twobit_fields = {} + if twobit_table is None: + # Add genome data (twobit files) to genomes, directly from twobit.loc + try: + for line in open( os.path.join( self.app.config.tool_data_path, "twobit.loc" ) ): + if line.startswith("#"): continue + val = line.split() + if len( val ) == 2: + key, path = val + twobit_fields[ key ] = path + except IOError, e: + # Thrown if twobit.loc does not exist. + log.exception( "Error reading twobit.loc: %s", e ) + for key, description in self.app.genome_builds.get_genome_build_names(): self.genomes[ key ] = Genome( key, description ) + # Add len files to genomes. + self.genomes[ key ].len_file = self.app.genome_builds.get_chrom_info( key )[0] + if self.genomes[ key ].len_file: + if not os.path.exists( self.genomes[ key ].len_file ): + self.genomes[ key ].len_file = None + # Add genome data (twobit files) to genomes. + if twobit_table is not None: + self.genomes[ key ].twobit_file = twobit_table.get_entry( 'value', key, 'path', default=None ) + elif key in twobit_fields: + self.genomes[ key ].twobit_file = twobit_fields[ key ] + - # Add len files to genomes. - len_files = glob.glob( os.path.join( app.config.len_file_path, "*.len" ) ) - for f in len_files: - key = os.path.split( f )[1].split( ".len" )[0] - if key in self.genomes: - self.genomes[ key ].len_file = f - - # Add genome data (twobit files) to genomes. - try: - for line in open( os.path.join( app.config.tool_data_path, "twobit.loc" ) ): - if line.startswith("#"): continue - val = line.split() - if len( val ) == 2: - key, path = val - if key in self.genomes: - self.genomes[ key ].twobit_file = path - except IOError, e: - # Thrown if twobit.loc does not exist. - log.exception( str( e ) ) + def check_and_reload( self ): + # Check if tables have been modified, if so reload + for table_name, table_version in self._table_versions.iteritems(): + table = self.app.tool_data_tables.get( table_name, None ) + if table is not None and not table.is_current_version( table_version ): + return self.reload_genomes() def get_build( self, dbkey ): """ Returns build for the given key. """ + self.check_and_reload() rval = None if dbkey in self.genomes: rval = self.genomes[ dbkey ] @@ -214,6 +240,7 @@ def get_dbkeys( self, trans, chrom_info=False, **kwd ): """ Returns all known dbkeys. If chrom_info is True, only dbkeys with chromosome lengths are returned. """ + self.check_and_reload() dbkeys = [] # Add user's custom keys to dbkeys. @@ -241,7 +268,7 @@ Returns a naturally sorted list of chroms/contigs for a given dbkey. Use either chrom or low to specify the starting chrom in the return list. """ - + self.check_and_reload() # If there is no dbkey owner, default to current user. dbkey_owner, dbkey = decode_dbkey( dbkey ) if dbkey_owner: @@ -303,6 +330,7 @@ Returns true if there is reference data for the specified dbkey. If dbkey is custom, dbkey_owner is needed to determine if there is reference data. """ + self.check_and_reload() # Look for key in built-in builds. if dbkey in self.genomes and self.genomes[ dbkey ].twobit_file: # There is built-in reference data. @@ -323,7 +351,7 @@ """ Return reference data for a build. """ - + self.check_and_reload() # If there is no dbkey owner, default to current user. dbkey_owner, dbkey = decode_dbkey( dbkey ) if dbkey_owner: diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 lib/galaxy/webapps/galaxy/controllers/history.py --- a/lib/galaxy/webapps/galaxy/controllers/history.py +++ b/lib/galaxy/webapps/galaxy/controllers/history.py @@ -1351,31 +1351,46 @@ id = galaxy.util.listify( id ) name = galaxy.util.listify( name ) histories = [] - cur_names = [] + for history_id in id: history = self.history_manager.get_owned( trans, self.decode_id( history_id ), trans.user ) if history and history.user_id == user.id: histories.append( history ) - cur_names.append( history.get_display_name() ) if not name or len( histories ) != len( name ): return trans.fill_template( "/history/rename.mako", histories=histories ) - change_msg = "" - for i in range(len(histories)): - if histories[i].user_id == user.id: - if name[i] == histories[i].get_display_name(): - change_msg = change_msg + "<p>History: "+cur_names[i]+" is already named: "+name[i]+"</p>" - elif name[i] not in [None,'',' ']: - name[i] = escape(name[i]) - histories[i].name = sanitize_html( name[i] ) - trans.sa_session.add( histories[i] ) - trans.sa_session.flush() - change_msg = change_msg + "<p>History: "+cur_names[i]+" renamed to: "+name[i]+"</p>" - trans.log_event( "History renamed: id: %s, renamed to: '%s'" % (str(histories[i].id), name[i] ) ) - else: - change_msg = change_msg + "<p>You must specify a valid name for History: "+cur_names[i]+"</p>" - else: - change_msg = change_msg + "<p>History: "+cur_names[i]+" does not appear to belong to you.</p>" - return trans.show_message( "<p>%s" % change_msg, refresh_frames=['history'] ) + + change_msgs = [] + for i in range( len( histories ) ): + cur_name = histories[i].get_display_name() + new_name = name[i] + + # skip if name is empty + if not isinstance( new_name, basestring ) or not new_name.strip(): + change_msgs.append( "You must specify a valid name for History: " + cur_name ) + continue + + # skip if not the owner + #??: isn't this already handled in get_history/if statement above? + if histories[i].user_id != user.id: + change_msgs.append( "History: " + cur_name + " does not appear to belong to you." ) + continue + + # skip if it wouldn't be a change + if new_name == cur_name: + change_msgs.append( "History: " + cur_name + " is already named: " + new_name ) + continue + + # escape, sanitize, set, and log the change + new_name = escape( new_name ) + histories[i].name = sanitize_html( new_name ) + trans.sa_session.add( histories[i] ) + trans.sa_session.flush() + + trans.log_event( "History renamed: id: %s, renamed to: '%s'" % ( str( histories[i].id ), new_name ) ) + change_msgs.append( "History: " + cur_name + " renamed to: " + new_name ) + + change_msg = '<br />'.join( change_msgs ) + return trans.show_message( change_msg, refresh_frames=['history'] ) @web.expose @web.require_login( "copy shared Galaxy history" ) diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 templates/admin/jobs.mako --- a/templates/admin/jobs.mako +++ b/templates/admin/jobs.mako @@ -55,11 +55,7 @@ </tr> %for job in jobs: <td> - %if job.state == 'upload': - - %else: - <input type="checkbox" name="stop" value="${job.id}"/> - %endif + <input type="checkbox" name="stop" value="${job.id}"/></td><td>${job.id}</td> %if job.history and job.history.user: diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 templates/user/register.mako --- a/templates/user/register.mako +++ b/templates/user/register.mako @@ -17,7 +17,7 @@ ## An admin user may be creating a new user account, in which case we want to display the registration form. ## But if the current user is not an admin user, then don't display the registration form. -%if trans.user_is_admin() or not trans.user: +%if ( cntrller=='admin' and trans.user_is_admin() ) or not trans.user: ${render_registration_form()} %if trans.app.config.get( 'terms_url', None ) is not None: diff -r 5877dc562874b894a83393bc54ba671d397909c9 -r 73a261a75185704a35a05350a31a7d5169cbc9b0 test/api/test_tools.py --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -201,6 +201,27 @@ response = self._run( "validation_default", history_id, inputs ) self._assert_status_code_is( response, 400 ) + @skip_without_tool( "multi_select" ) + def test_select_legal_values( self ): + history_id = self.dataset_populator.new_history() + inputs = { + 'select_ex': 'not_option', + } + response = self._run( "multi_select", history_id, inputs ) + self._assert_status_code_is( response, 400 ) + + @skip_without_tool( "column_param" ) + def test_column_legal_values( self ): + history_id = self.dataset_populator.new_history() + new_dataset1 = self.dataset_populator.new_dataset( history_id, content='#col1\tcol2' ) + inputs = { + 'input1': { "src": "hda", "id": new_dataset1["id"] }, + 'col': "' ; echo 'moo", + } + response = self._run( "column_param", history_id, inputs ) + # TODO: make this test pass... + self._assert_status_code_is( response, 400 ) + @skip_without_tool( "collection_paired_test" ) def test_collection_parameter( self ): history_id = self.dataset_populator.new_history() Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.