commit/galaxy-central: jmchilton: Improvements to test/api/test_workflow_extraction.py.

2 Dec 2014

1 new commit in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/87ef26ad1876/
Changeset:   87ef26ad1876
User:        jmchilton
Date:        2014-12-03 03:17:25+00:00
Summary:     Improvements to test/api/test_workflow_extraction.py.
Fill out the dataset collection parameter test and add a new test for a workflow that includes subcollection mapping. Move toward orchestrating jobs in this file via a high-level YAML description of steps and test data as well as some higher-level methods for testing various stuff about extracted workflows (step counts, input types, tools used, connected-ness, etc...).

Allow ordering jobs index API by create_time instead of update_time.
Affected #:  4 files

diff -r a734cbf4bd368198e2c813b5ea61e807f9a1af33 -r 87ef26ad1876d0d6162eb4b6e7c93c8d69fbc4d6 lib/galaxy/webapps/galaxy/api/jobs.py

--- a/lib/galaxy/webapps/galaxy/api/jobs.py
+++ b/lib/galaxy/webapps/galaxy/api/jobs.py
@@ -73,9 +73,11 @@
                 raise exceptions.ObjectAttributeInvalidException()
 
         out = []
-        for job in query.order_by(
-                trans.app.model.Job.update_time.desc()
-            ).all():
+        if kwd.get( 'order_by' ) == 'create_time':
+            order_by = trans.app.model.Job.create_time.desc()
+        else:
+            order_by = trans.app.model.Job.update_time.desc()
+        for job in query.order_by( order_by ).all():
             out.append( self.encode_all_ids( trans, job.to_dict( 'collection' ), True ) )
         return out
 

diff -r a734cbf4bd368198e2c813b5ea61e807f9a1af33 -r 87ef26ad1876d0d6162eb4b6e7c93c8d69fbc4d6 test/api/helpers.py
--- a/test/api/helpers.py
+++ b/test/api/helpers.py
@@ -76,7 +76,10 @@
         return run_response.json()["outputs"][0]
 
     def wait_for_history( self, history_id, assert_ok=False, timeout=DEFAULT_HISTORY_TIMEOUT ):
-        wait_on_state( lambda: self.galaxy_interactor.get( "histories/%s" % history_id ), assert_ok=assert_ok, timeout=timeout )
+        return wait_on_state( lambda: self.galaxy_interactor.get( "histories/%s" % history_id ), assert_ok=assert_ok, timeout=timeout )
+
+    def wait_for_job( self, job_id, assert_ok=False, timeout=DEFAULT_HISTORY_TIMEOUT ):
+        return wait_on_state( lambda: self.galaxy_interactor.get( "jobs/%s" % job_id ), assert_ok=assert_ok, timeout=timeout )
 
     def new_history( self, **kwds ):
         name = kwds.get( "name", "API Test History" )
@@ -296,6 +299,12 @@
         )
         return self.__create( payload )
 
+    def create_list_of_pairs_in_history( self, history_id, **kwds ):
+        pair1 = self.create_pair_in_history( history_id, **kwds ).json()["id"]
+        #pair2 = self.create_pair_in_history( history_id, **kwds ).json()["id"]
+        #pair3 = self.create_pair_in_history( history_id, **kwds ).json()["id"]
+        return self.create_list_from_pairs( history_id, [ pair1 ] )
+
     def create_pair_in_history( self, history_id, **kwds ):
         payload = self.create_pair_payload(
             history_id,
@@ -375,13 +384,13 @@
         response = state_func()
         assert response.status_code == 200, "Failed to fetch state update while waiting."
         state = response.json()[ "state" ]
-        if state not in [ "running", "queued", "new" ]:
+        if state not in [ "running", "queued", "new", "ready" ]:
             if assert_ok:
                 assert state == "ok", "Final state - %s - not okay." % state
             return state
         else:
             return None
-    wait_on( get_state, desc="state", timeout=timeout)
+    return wait_on( get_state, desc="state", timeout=timeout)
 
 
 def wait_on( function, desc, timeout=5 ):

diff -r a734cbf4bd368198e2c813b5ea61e807f9a1af33 -r 87ef26ad1876d0d6162eb4b6e7c93c8d69fbc4d6 test/api/test_workflow_extraction.py
--- a/test/api/test_workflow_extraction.py
+++ b/test/api/test_workflow_extraction.py
@@ -1,8 +1,12 @@
+from collections import namedtuple
+import functools
 from json import dumps, loads
 import operator
 
 from .helpers import skip_without_tool
+from .helpers import wait_on_state
 from .test_workflows import BaseWorkflowsApiTestCase
+import yaml
 
 
 class WorkflowExtractionApiTestCase( BaseWorkflowsApiTestCase ):
@@ -110,27 +114,147 @@
 
     @skip_without_tool( "collection_paired_test" )
     def test_extract_workflows_with_dataset_collections( self ):
-        hdca = self.dataset_collection_populator.create_pair_in_history( self.history_id ).json()
-        hdca_id = hdca[ "id" ]
-        inputs = {
-            "f1": dict( src="hdca", id=hdca_id )
-        }
-        run_output = self.dataset_populator.run_tool(
-            tool_id="collection_paired_test",
-            inputs=inputs,
-            history_id=self.history_id,
-        )
-        job_id = run_output[ "jobs" ][ 0 ][ "id" ]
-        self.dataset_populator.wait_for_history( self.history_id, assert_ok=True )
+        jobs_summary = self._run_jobs("""
+steps:
+  - label: text_input1
+    type: input_collection
+  - tool_id: collection_paired_test
+    state:
+      f1:
+        $link: text_input1
+test_data:
+  text_input1:
+    type: paired
+""")
+        job_id = self._job_id_for_tool( jobs_summary.jobs, "collection_paired_test" )
         downloaded_workflow = self._extract_and_download_workflow(
-            dataset_collection_ids=[ hdca[ "hid" ] ],
+            dataset_collection_ids=[ jobs_summary.inputs["text_input1"]["hid"] ],
             job_ids=[ job_id ],
         )
-        collection_steps = self._get_steps_of_type( downloaded_workflow, "data_collection_input", expected_len=1 )
-        collection_step = collection_steps[ 0 ]
+        self.__check_workflow(
+            downloaded_workflow,
+            step_count=2,
+            verify_connected=True,
+            data_input_count=0,
+            data_collection_input_count=1,
+            tool_ids=["collection_paired_test"]
+        )
+
+        collection_step = self._get_steps_of_type( downloaded_workflow, "data_collection_input", expected_len=1 )[ 0 ]
         collection_step_state = loads( collection_step[ "tool_state" ] )
         self.assertEquals( collection_step_state[ "collection_type" ], u"paired" )
 
+    def test_subcollection_mapping( self ):
+        jobs_summary = self._run_jobs("""
+steps:
+  - label: text_input1
+    type: input_collection
+  - label: noop
+    tool_id: cat1
+    state:
+      input1:
+        $link: text_input1
+  - tool_id: cat_collection
+    state:
+      input1:
+        $link: noop#out_file1
+test_data:
+  text_input1:
+    type: "list:paired"
+        """)
+        job1_id = self._job_id_for_tool( jobs_summary.jobs, "cat1" )
+        job2_id = self._job_id_for_tool( jobs_summary.jobs, "cat_collection" )
+        downloaded_workflow = self._extract_and_download_workflow(
+            dataset_collection_ids=[ jobs_summary.inputs["text_input1"]["hid"] ],
+            job_ids=[ job1_id, job2_id ],
+        )
+        print jobs_summary.inputs["text_input1"]
+        self.__check_workflow(
+            downloaded_workflow,
+            step_count=3,
+            verify_connected=True,
+            data_input_count=0,
+            data_collection_input_count=1,
+            tool_ids=["cat_collection", "cat1"],
+        )
+
+        collection_step = self._get_steps_of_type( downloaded_workflow, "data_collection_input", expected_len=1 )[ 0 ]
+        collection_step_state = loads( collection_step[ "tool_state" ] )
+        self.assertEquals( collection_step_state[ "collection_type" ], u"list:paired" )
+
+    def _run_jobs( self, jobs_yaml ):
+        history_id = self.history_id
+        workflow_id = self._upload_yaml_workflow(
+            jobs_yaml
+        )
+        jobs_descriptions = yaml.load( jobs_yaml )
+        test_data = jobs_descriptions["test_data"]
+
+        label_map = {}
+        inputs = {}
+        for key, value in test_data.items():
+            if isinstance( value, dict ):
+                elements_data = value.get( "elements", [] )
+                elements = []
+                for element_data in elements_data:
+                    identifier = element_data[ "identifier" ]
+                    content = element_data["content"]
+                    elements.append( ( identifier, content ) )
+                collection_type = value["type"]
+                if collection_type == "list:paired":
+                    hdca = self.dataset_collection_populator.create_list_of_pairs_in_history( history_id ).json()
+                elif collection_type == "list":
+                    hdca = self.dataset_collection_populator.create_list_in_history( history_id, contents=elements ).json()
+                else:
+                    hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=elements ).json()
+                label_map[key] = self._ds_entry( hdca )
+                inputs[key] = hdca
+            else:
+                hda = self.dataset_populator.new_dataset( history_id, content=value )
+                label_map[key] = self._ds_entry( hda )
+                inputs[key] = hda
+        workflow_request = dict(
+            history="hist_id=%s" % history_id,
+            workflow_id=workflow_id,
+        )
+        workflow_request[ "inputs" ] = dumps( label_map )
+        workflow_request[ "inputs_by" ] = 'name'
+        self.dataset_populator.wait_for_history( history_id, assert_ok=True )
+        url = "workflows/%s/usage" % ( workflow_id )
+        invocation_response = self._post( url, data=workflow_request )
+        self._assert_status_code_is( invocation_response, 200 )
+        invocation = invocation_response.json()
+        invocation_id = invocation[ "id" ]
+        # Wait for workflow to become fully scheduled and then for all jobs
+        # complete.
+        self.wait_for_invocation( workflow_id, invocation_id )
+        self.dataset_populator.wait_for_history( history_id, assert_ok=True )
+        jobs = self._history_jobs( history_id )
+        return RunJobsSummary(
+            history_id=history_id,
+            workflow_id=workflow_id,
+            inputs=inputs,
+            jobs=jobs,
+        )
+
+    def wait_for_invocation( self, workflow_id, invocation_id ):
+        url = "workflows/%s/usage/%s" % ( workflow_id, invocation_id )
+        return wait_on_state( lambda: self._get( url )  )
+
+    def _history_jobs( self, history_id ):
+        return self._get("jobs", { "history_id": history_id, "order_by": "create_time" } ).json()
+
+    def _job_id_for_tool( self, jobs, tool_id ):
+        return self._job_for_tool( jobs, tool_id )[ "id" ]
+
+    def _job_for_tool( self, jobs, tool_id ):
+        tool_jobs = filter( lambda j: j["tool_id"] == tool_id, jobs )
+        if not tool_jobs:
+            assert False, "Failed to find job for tool %s" % tool_id
+        # if len( tool_jobs ) > 1:
+        #     assert False, "Found multiple jobs for tool %s" % tool_id
+        return tool_jobs[ -1 ]
+
     def __run_random_lines_mapped_over_pair( self, history_id ):
         hdca = self.dataset_collection_populator.create_pair_in_history( history_id, contents=["1 2 3\n4 5 6", "7 8 9\n10 11 10"] ).json()
         hdca_id = hdca[ "id" ]
@@ -266,3 +390,45 @@
         job_id = run_output1[ "jobs" ][ 0 ][ "id" ]
         self.dataset_populator.wait_for_history( history_id, assert_ok=True, timeout=20 )
         return implicit_hdca, job_id
+
+    def __check_workflow(
+        self,
+        workflow,
+        step_count=None,
+        verify_connected=False,
+        data_input_count=None,
+        data_collection_input_count=None,
+        tool_ids=None,
+    ):
+        steps = workflow[ 'steps' ]
+
+        if step_count is not None:
+            assert len( steps ) == step_count
+        if verify_connected:
+            self.__assert_connected( workflow, steps )
+        if tool_ids is not None:
+            tool_steps = self._get_steps_of_type( workflow, "tool" )
+            found_steps = set(map(operator.itemgetter("tool_id"), tool_steps))
+            expected_steps = set(tool_ids)
+            assert found_steps == expected_steps
+        if data_input_count is not None:
+            self._get_steps_of_type( workflow, "data_input", expected_len=data_input_count )
+        if data_collection_input_count is not None:
+            self._get_steps_of_type( workflow, "data_collection_input", expected_len=data_collection_input_count )
+
+    def __assert_connected( self, workflow, steps ):
+        disconnected_inputs = []
+
+        for key, value in steps.items():
+            if value[ 'type' ] == "tool":
+                input_connections = value[ "input_connections" ]
+                if not input_connections:
+                    disconnected_inputs.append( value )
+
+        if disconnected_inputs:
+            template = "%d step(s_ disconnected in extracted workflow - disconnectect steps are %s - workflow is %s"
+            message = template % ( len( disconnected_inputs ), disconnected_inputs, workflow )
+            raise AssertionError( message )
+
+
+RunJobsSummary = namedtuple('RunJobsSummary', ['history_id', 'workflow_id', 'inputs', 'jobs'])

diff -r a734cbf4bd368198e2c813b5ea61e807f9a1af33 -r 87ef26ad1876d0d6162eb4b6e7c93c8d69fbc4d6 test/api/test_workflows.py
--- a/test/api/test_workflows.py
+++ b/test/api/test_workflows.py
@@ -95,6 +95,12 @@
         workflow_inputs = workflow_show_resposne.json()[ "inputs" ]
         return workflow_inputs
 
+    def _invocation_details( self, workflow_id, invocation_id ):
+        invocation_details_response = self._get( "workflows/%s/usage/%s" % ( workflow_id, invocation_id ) )
+        self._assert_status_code_is( invocation_details_response, 200 )
+        invocation_details = invocation_details_response.json()
+        return invocation_details
+
 
 # Workflow API TODO:
 # - Allow history_id as param to workflow run action. (hist_id)
@@ -530,12 +536,6 @@
         self._assert_status_code_is( step_response, 200 )
         self._assert_has_keys( step_response.json(), "id", "order_index" )
 
-    def _invocation_details( self, workflow_id, invocation_id ):
-        invocation_details_response = self._get( "workflows/%s/usage/%s" % ( workflow_id, invocation_id ) )
-        self._assert_status_code_is( invocation_details_response, 200 )
-        invocation_details = invocation_details_response.json()
-        return invocation_details
-
     def _invocation_step_details( self, workflow_id, invocation_id, step_id ):
         invocation_step_response = self._get( "workflows/%s/usage/%s/steps/%s" % ( workflow_id, invocation_id, step_id ) )
         self._assert_status_code_is( invocation_step_response, 200 )

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)