details: http://www.bx.psu.edu/hg/galaxy/rev/b1514bd5244e
changeset: 1655:b1514bd5244e
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Tue Dec 09 15:51:12 2008 -0500
description:
Bug fix for last commit to Tabular.set_meta()
1 file(s) affected in this change:
lib/galaxy/datatypes/tabular.py
diffs (12 lines):
diff -r f77ec6315c7c -r b1514bd5244e lib/galaxy/datatypes/tabular.py
--- a/lib/galaxy/datatypes/tabular.py Tue Dec 09 14:53:10 2008 -0500
+++ b/lib/galaxy/datatypes/tabular.py Tue Dec 09 15:51:12 2008 -0500
@@ -134,7 +134,7 @@
#Now we fill any unknown (None) column_types with data from first line
for i in range( len( column_types ) ):
if column_types[i] is None:
- if first_line_column_types[i] is None:
+ if len( first_line_column_types ) <= i or first_line_column_types[i] is None:
column_types[i] = default_column_type
else:
column_types[i] = first_line_column_types[i]
details: http://www.bx.psu.edu/hg/galaxy/rev/6041065f86a9
changeset: 1650:6041065f86a9
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Thu Dec 04 15:09:57 2008 -0500
description:
Update GMAJ help.
1 file(s) affected in this change:
tools/visualization/GMAJ.xml
diffs (33 lines):
diff -r 8b9cd2a1d5c2 -r 6041065f86a9 tools/visualization/GMAJ.xml
--- a/tools/visualization/GMAJ.xml Thu Dec 04 11:40:42 2008 -0500
+++ b/tools/visualization/GMAJ.xml Thu Dec 04 15:09:57 2008 -0500
@@ -176,13 +176,25 @@
<data name="out_file1" format="gmaj.zip"/>
</outputs>
<help>
-You can use this tool to view a set of MAF alignments. You may also include optional annotation data for one or more of the organisms.
+.. class:: infomark
+
+**Reference Sequence:**
+The default option, "First sequence in each block", is the correct choice for the vast majority of MAF alignments. The alternative, "Any sequence", will allow you to flip the blocks to view them with any of the MAF sequences as the reference, but this is only appropriate if the file was generated by a sequence-symmetric alignment program such as TBA_. Using "Any sequence" with an ordinary MAF will **not** give the same results as if that alignment had been run with a different reference sequence.
+
+.. class:: infomark
+
+**Annotation Style:**
+The default style, "Galaxy", specifies one set of annotations for each species in the MAF file; it assumes that if you have, say, exons for several chromosomes of one species, they are all together in one file. The other style, "Basic", is more flexible but cumbersome: a separate set of files is specified for each sequence (e.g. chromosome), and you must fill in the full sequence name as it appears in the MAF. The Basic style also allows you to provide a display offset that GMAJ will add to all of the position labels for that sequence. With either style, specifying more than one set of annotations for the same sequence will result in an error message from GMAJ.
+
+----
+
+**What it does**
+
+GMAJ is an interactive viewer for MAF alignments, with support for optional annotation data. In addition to browsing the alignments, you can select and export them according to a variety of criteria and send the output back to your Galaxy history.
For detailed information on GMAJ, click here_.
.. _here: /static/gmaj/docs/gmaj_readme.html
-
-Gmaj is a tool for viewing and manipulating Generalized Multiple Alignments (GMAs) produced by programs such as TBA (though it can also be used with maf-format alignments from other sources). It can display interactive graphical and text representations of the alignments, a diagram showing the locations of exons and repeats, and other annotations -- all with the user's choice of reference sequence.
-
+.. _TBA: http://www.bx.psu.edu/miller_lab/
</help>
</tool>
details: http://www.bx.psu.edu/hg/galaxy/rev/1dc78a13070c
changeset: 1645:1dc78a13070c
user: James Taylor <james(a)jamestaylor.org>
date: Tue Dec 02 13:49:29 2008 -0500
description:
imported patch job-queue-sessions
1 file(s) affected in this change:
lib/galaxy/jobs/__init__.py
diffs (229 lines):
diff -r ab46f22b4624 -r 1dc78a13070c lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py Tue Dec 02 11:41:25 2008 -0500
+++ b/lib/galaxy/jobs/__init__.py Tue Dec 02 13:49:29 2008 -0500
@@ -63,9 +63,8 @@
def __init__( self, app, dispatcher ):
"""Start the job manager"""
self.app = app
- # Should we use IPC to communicate (needed if forking)
+ # Should we read jobs form the database, or use an in memory queue
self.track_jobs_in_database = app.config.get_bool( 'track_jobs_in_database', False )
-
# Check if any special scheduling policy should be used. If not, default is FIFO.
sched_policy = app.config.get('job_scheduler_policy', 'FIFO')
# Parse the scheduler policy string. The policy class implements a special queue.
@@ -104,13 +103,13 @@
self.sleeper = Sleeper()
self.running = True
self.dispatcher = dispatcher
- self.monitor_thread = threading.Thread( target=self.monitor )
+ self.monitor_thread = threading.Thread( target=self.__monitor )
self.monitor_thread.start()
log.info( "job manager started" )
if app.config.get_bool( 'enable_job_recovery', True ):
- self.check_jobs_at_startup()
+ self.__check_jobs_at_startup()
- def check_jobs_at_startup( self ):
+ def __check_jobs_at_startup( self ):
"""
Checks all jobs that are in the 'running' or 'queued' state in the
database and requeues or cleans up as necessary. Only run as the
@@ -125,10 +124,10 @@
for job in model.Job.filter( (model.Job.c.state == model.Job.states.RUNNING) | (model.Job.c.state == model.Job.states.QUEUED) ).all():
if job.job_runner_name is not None:
# why are we passing the queue to the wrapper?
- job_wrapper = JobWrapper( job.id, self.app.toolbox.tools_by_id[ job.tool_id ], self )
+ job_wrapper = JobWrapper( job, self.app.toolbox.tools_by_id[ job.tool_id ], self )
self.dispatcher.recover( job, job_wrapper )
- def monitor( self ):
+ def __monitor( self ):
"""
Continually iterate the waiting jobs, checking is each is ready to
run and dispatching if so.
@@ -137,13 +136,13 @@
time.sleep( 10 )
while self.running:
try:
- self.monitor_step()
+ self.__monitor_step()
except:
log.exception( "Exception in monitor_step" )
# Sleep
self.sleeper.sleep( 1 )
- def monitor_step( self ):
+ def __monitor_step( self ):
"""
Called repeatedly by `monitor` to process waiting jobs. Gets any new
jobs (either from the database or from its own queue), then iterates
@@ -153,12 +152,13 @@
it is marked as having errors and removed from the queue. Otherwise,
the job is dispatched.
"""
+ # Get an orm session
+ session = mapping.Session()
# Pull all new jobs from the queue at once
new_jobs = []
if self.track_jobs_in_database:
- model = self.app.model
- for j in model.Job.filter( model.Job.c.state==model.Job.states.NEW ).all():
- job = JobWrapper( j.id, self.app.toolbox.tools_by_id[ j.tool_id ], self )
+ for j in session.query( model.Job ).filter( model.Job.c.state == model.Job.states.NEW ).all():
+ job = JobWrapper( j, self.app.toolbox.tools_by_id[ j.tool_id ], self )
new_jobs.append( job )
else:
try:
@@ -169,18 +169,26 @@
# Unpack the message
job_id, tool_id = message
# Create a job wrapper from it
- job = JobWrapper( job_id, self.app.toolbox.tools_by_id[ tool_id ], self )
+ job_entity = session.query( model.Job ).get( job_id )
+ job = JobWrapper( job_entity, self.app.toolbox.tools_by_id[ tool_id ], self )
# Append to watch queue
new_jobs.append( job )
except Empty:
- pass
+ pass
# Iterate over new and waiting jobs and look for any that are
# ready to run
new_waiting = []
for job in ( new_jobs + self.waiting ):
try:
+ # Clear the session for each job so we get fresh states for
+ # job and all datasets
+ session.clear()
+ # Get the real job entity corresponding to the wrapper (if we
+ # are tracking in the database this is probably cached in
+ # the session from the origianl query above)
+ job_entity = session.query( model.Job ).get( job.job_id )
# Check the job's dependencies, requeue if they're not done
- job_state = job.check_if_ready_to_run()
+ job_state = self.__check_if_ready_to_run( job, job_entity )
if job_state == JOB_WAIT:
if not self.track_jobs_in_database:
new_waiting.append( job )
@@ -195,7 +203,7 @@
if self.use_policy :
self.squeue.put( job )
log.debug( "job %d put in policy queue" % job.job_id )
- else : # or dispatch the job directly
+ else: # or dispatch the job directly
self.dispatcher.put( job )
log.debug( "job %d dispatched" % job.job_id)
elif job_state == JOB_DELETED:
@@ -225,6 +233,43 @@
except Exception, e: # if something else breaks while dispatching
job.fail( "failure running job %d: %s" % ( sjob.job_id, str( e ) ) )
log.exception( "failure running job %d" % sjob.job_id )
+ # Done with the session
+ mapping.Session.remove()
+
+ def __check_if_ready_to_run( self, job_wrapper, job ):
+ """
+ Check if a job is ready to run by verifying that each of its input
+ datasets is ready (specifically in the OK state). If any input dataset
+ has an error, fail the job and return JOB_INPUT_ERROR. If any input
+ dataset is deleted, fail the job and return JOB_INPUT_DELETED. If all
+ input datasets are in OK state, return JOB_READY indicating that the
+ job can be dispatched. Otherwise, return JOB_WAIT indicating that input
+ datasets are still being prepared.
+ """
+ if self.app.memory_usage:
+ # Keep track of memory usage
+ m0 = self.app.memory_usage.memory()
+ for dataset_assoc in job.input_datasets:
+ idata = dataset_assoc.dataset
+ if not idata:
+ continue
+ # don't run jobs for which the input dataset was deleted
+ if idata.deleted:
+ job_wrapper.fail( "input data %d (file: %s) was deleted before the job started" % ( idata.hid, idata.file_name ) )
+ return JOB_INPUT_DELETED
+ # an error in the input data causes us to bail immediately
+ elif idata.state == idata.states.ERROR:
+ job_wrapper.fail( "input data %d (file: %s) is in an error state" % ( idata.hid, idata.file_name ) )
+ return JOB_INPUT_ERROR
+ elif idata.state != idata.states.OK:
+ # need to requeue
+ return JOB_WAIT
+ if self.app.memory_usage:
+ m1 = self.app.memory_usage.memory( m0, pretty=True )
+ log.info("End of check_if_ready_to_run for job id %d, memory used increased by %s" % ( job.id, m1 ) )
+ if job.state == model.Job.states.DELETED:
+ return JOB_DELETED
+ return JOB_READY
def put( self, job_id, tool ):
"""Add a job to the queue (by job identifier)"""
@@ -251,8 +296,10 @@
Wraps a 'model.Job' with convience methods for running processes and
state management.
"""
- def __init__(self, job_id, tool, queue ):
- self.job_id = job_id
+ def __init__(self, job, tool, queue ):
+ self.job_id = job.id
+ # This is immutable, we cache it for the scheduling policy to use if needed
+ self.session_id = job.session_id
self.tool = tool
self.queue = queue
self.app = queue.app
@@ -401,45 +448,6 @@
job.job_runner_name = runner_url
job.job_runner_external_id = external_id
job.flush()
-
- def check_if_ready_to_run( self ):
- """
- Check if a job is ready to run by verifying that each of its input
- datasets is ready (specifically in the OK state). If any input dataset
- has an error, fail the job and return JOB_INPUT_ERROR. If any input
- dataset is deleted, fail the job and return JOB_INPUT_DELETED. If all
- input datasets are in OK state, return JOB_READY indicating that the
- job can be dispatched. Otherwise, return JOB_WAIT indicating that input
- datasets are still being prepared.
- """
- if self.app.memory_usage:
- # Keep track of memory usage
- m0 = self.app.memory_usage.memory()
- job = model.Job.get( self.job_id )
- job.refresh()
- for dataset_assoc in job.input_datasets:
- idata = dataset_assoc.dataset
- if not idata:
- continue
- idata.refresh()
- idata.dataset.refresh() #we need to refresh the base Dataset, since that is where 'state' is stored
- # don't run jobs for which the input dataset was deleted
- if idata.deleted:
- self.fail( "input data %d (file: %s) was deleted before the job started" % ( idata.hid, idata.file_name ) )
- return JOB_INPUT_DELETED
- # an error in the input data causes us to bail immediately
- elif idata.state == idata.states.ERROR:
- self.fail( "input data %d (file: %s) is in an error state" % ( idata.hid, idata.file_name ) )
- return JOB_INPUT_ERROR
- elif idata.state != idata.states.OK:
- # need to requeue
- return JOB_WAIT
- if self.app.memory_usage:
- m1 = self.app.memory_usage.memory( m0, pretty=True )
- log.info("End of check_if_ready_to_run for job id %d, memory used increased by %s" % ( job.id, m1 ) )
- if job.state == model.Job.states.DELETED:
- return JOB_DELETED
- return JOB_READY
def finish( self, stdout, stderr ):
"""
@@ -538,8 +546,7 @@
return self.command_line
def get_session_id( self ):
- job = model.Job.get( self.job_id )
- return job.session_id
+ return self.session_id
def get_input_fnames( self ):
job = model.Job.get( self.job_id )