[hg] galaxy 1685: Script to enumerate GOPS JOIN jobs that could ...
details: http://www.bx.psu.edu/hg/galaxy/rev/82886ba9323b changeset: 1685:82886ba9323b user: guru date: Mon Dec 22 13:35:01 2008 -0500 description: Script to enumerate GOPS JOIN jobs that could have returned an incorrect result before the issue with minimum overlap was fixed last week. 3 file(s) affected in this change: scripts/others/incorrect_gops_jobs.py scripts/others/incorrect_gops_join_jobs.py scripts/others/incorrect_gops_join_jobs.sh diffs (126 lines): diff -r c1d3004f0613 -r 82886ba9323b scripts/others/incorrect_gops_jobs.py --- a/scripts/others/incorrect_gops_jobs.py Mon Dec 22 12:15:02 2008 -0500 +++ b/scripts/others/incorrect_gops_jobs.py Mon Dec 22 13:35:01 2008 -0500 @@ -76,7 +76,10 @@ else: new_cmd_line = " ".join(map(str,cmd_line.split()[:3])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[4:])) job_output = cmd_line.split()[3] - os.system(new_cmd_line) + try: + os.system(new_cmd_line) + except: + pass diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output)) if diff_status == 0: continue diff -r c1d3004f0613 -r 82886ba9323b scripts/others/incorrect_gops_join_jobs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/others/incorrect_gops_join_jobs.py Mon Dec 22 13:35:01 2008 -0500 @@ -0,0 +1,99 @@ +#!/usr/bin/env python +""" +Fetch gops_join wherein the use specified minimum coverage is not 1. +""" + +from galaxy import eggs +import sys, os, ConfigParser, tempfile +import galaxy.app +import galaxy.model.mapping +import pkg_resources + +pkg_resources.require( "SQLAlchemy >= 0.4" ) +import sqlalchemy as sa + +assert sys.version_info[:2] >= ( 2, 4 ) + +class TestApplication( object ): + """Encapsulates the state of a Universe application""" + def __init__( self, database_connection=None, file_path=None ): + print >> sys.stderr, "python path is: " + ", ".join( sys.path ) + if database_connection is None: + raise Exception( "CleanupDatasetsApplication requires a database_connection value" ) + if file_path is None: + raise Exception( "CleanupDatasetsApplication requires a file_path value" ) + self.database_connection = database_connection + self.file_path = file_path + # Setup the database engine and ORM + self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False ) + +def main(): + ini_file = sys.argv[1] + conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} ) + conf_parser.read( ini_file ) + configuration = {} + for key, value in conf_parser.items( "app:main" ): + configuration[key] = value + database_connection = configuration['database_connection'] + file_path = configuration['file_path'] + app = TestApplication( database_connection=database_connection, file_path=file_path ) + jobs = {} + try: + for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time < '2008-12-16', + app.model.Job.table.c.state == 'ok', + app.model.Job.table.c.tool_id == 'gops_join_1', + sa.not_( app.model.Job.table.c.command_line.like( '%-m 1 %' ) ) + ) + ).all(): + print "# processing job id %s" % str( job.id ) + for jtoda in job.output_datasets: + print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id ) + hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id ) + print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id ) + if not hda.deleted: + # Probably don't need this check, since the job state should suffice, but... + if hda.dataset.state == 'ok': + history = app.model.History.get( hda.history_id ) + print "# ------> processing history id %s" % str( history.id ) + if history.user_id: + cmd_line = str( job.command_line ) + new_output = tempfile.NamedTemporaryFile('w') + new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:])) + job_output = cmd_line.split()[4] + try: + os.system(new_cmd_line) + except: + pass + diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output)) + if diff_status == 0: + continue + print "# --------> Outputs differ" + user = app.model.User.get( history.user_id ) + jobs[ job.id ] = {} + jobs[ job.id ][ 'hda_id' ] = hda.id + jobs[ job.id ][ 'hda_name' ] = hda.name + jobs[ job.id ][ 'hda_info' ] = hda.info + jobs[ job.id ][ 'history_id' ] = history.id + jobs[ job.id ][ 'history_name' ] = history.name + jobs[ job.id ][ 'history_update_time' ] = history.update_time + jobs[ job.id ][ 'user_email' ] = user.email + except Exception, e: + print "# caught exception: %s" % str( e ) + + print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) ) + print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email" + for jid in jobs: + print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \ + ( str( jid ), + str( jobs[ jid ][ 'hda_id' ] ), + jobs[ jid ][ 'hda_name' ], + jobs[ jid ][ 'hda_info' ], + str( jobs[ jid ][ 'history_id' ] ), + jobs[ jid ][ 'history_name' ], + jobs[ jid ][ 'history_update_time' ], + jobs[ jid ][ 'user_email' ] + ) + sys.exit(0) + +if __name__ == "__main__": + main() diff -r c1d3004f0613 -r 82886ba9323b scripts/others/incorrect_gops_join_jobs.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/others/incorrect_gops_join_jobs.sh Mon Dec 22 13:35:01 2008 -0500 @@ -0,0 +1,4 @@ +#!/bin/sh + +cd `dirname $0`/../.. +python ./scripts/others/incorrect_gops_join_jobs.py ./universe_wsgi.ini >> ./scripts/others/incorrect_gops_join_jobs.log
participants (1)
-
Nate Coraor