Unfortunately this is one of the most glaring problems with the UI based workflow engine that is Galaxy (That, and no modular workflows)...
You cannot easily pair up more than one set of paired end reads from the UI, you need to use the API to run pairs of FASTQ files like that...
I have written a script that will do this through the API..Excerpt below...
You won't be able to run this as-is, but it gives you some idea what hoops you have to jump through to do this..
The section INPUT FILE DEFINITION is the meat of the script, in that I simply cycle through all the files in a Library and match up the files myself...Ugly but it works....
#!/usr/bin/env python
###############################################################################
#
# MUT190-1.py
#
# Version 2.0.0
# Created: 5-Apr-2012
# By Thon de Boer, GHI
# Last Update: 23-Apr-2012
# By Thon de Boer, GHI
#
# This program will run step MUT190-1; the alignment of FASTQ file with BWA
# It is based on the generic GALAXY Workflow execution engine,
# but has hardcoded defaults, seen below
#
# Version 1.0.1: Added get_folder_id
# Version 2.0.0: Made the config file an option (and the only option)
#
"""
Execute a specifc workflow on a specific set of history items.
It is created for running the GATK pipeline on a selected set of paired-end reads
Input to the tool is the name of a History and it will extract all the paired-end FASTQ files from this.
"""
import os, sys, optparse, shutil, subprocess, tempfile, fileinput, psycopg2, ConfigParser
sys.path.insert( 0, os.path.dirname( __file__ ) )
sys.path.insert( 0, "/home/tdeboer/g/scripts/api" )
from common import *
from ThonsModules import *
def main():
#Parse Command Line
parser = optparse.OptionParser()
parser.add_option( '-c', '--config', dest='config', type='string', help='the configuration file containing all the settings for this workflow' )
parser.add_option( '-t', '--test', action='store_true', dest='test', help='Do not submit, but test the settings', default=False)
(options, args) = parser.parse_args()
if not options.config:
print "Error: No config file proivided. Please provide a configuration file with the -c,--config flag."
sys.exit(1)
config = ConfigParser.RawConfigParser()
try:
config.read(options.config)
GALAXY_LIBRARY_FASTQ = config.get('GALAXY', 'GALAXY_LIBRARY_FASTQ').strip('"')
GALAXY_WORKFLOW_DEFAULT=config.get('GALAXY', 'GALAXY_WORKFLOW_DEFAULT').strip('"')
GALAXY_OUTHISTORY_DEFAULT=config.get('GALAXY', 'GALAXY_OUTHISTORY_DEFAULT').strip('"')
galaxyURL=config.get('GALAXY', 'GALAXY_URL').strip('"')
myKey=config.get('GALAXY', 'MYKEY').strip('"')
except Exception, e:
print 'Error: %s' % e
sys.exit(1)
data = {}
#Find the files that go with the history or library
try:
historyPrefix = GALAXY_OUTHISTORY_DEFAULT + '-'
files = {}
lib_id = get_library(myKey, galaxyURL, GALAXY_LIBRARY_FASTQ)
files = get_files_from_library(myKey, galaxyURL, GALAXY_LIBRARY_FASTQ, lib_id)
if files == {}:
print 'Error: Did not get any files from the library'
sys.exit(1)
workflow_id = get_workflow(myKey, galaxyURL, GALAXY_WORKFLOW_DEFAULT)
data['workflow_id'] = workflow_id
except Exception, e:
print 'Error: %s' % e
sys.exit(1)
data['ds_map'] ={}
#Define the standard library datasets
try:
###########################################################################
#
# WORKFLOW STEP DEFINITION
#
###########################################################################
#Format for these lines should be followed...Only change the 'BAITS' and 'MUT190/Baits and Targets/SS_Mut_v2_baits190.bed' text
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'BAITS')] = get_library_file(myKey, galaxyURL, 'MUT190/Baits and Targets/SS_Mut_v2_baitsMUT190.bed')
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'TARGETS')] = get_library_file(myKey, galaxyURL, 'MUT190/Baits and Targets/SS_Mut_v2_targetsMUT190_PLUS60.interval')
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'TARGETS (GATK)')] = get_library_file(myKey, galaxyURL, 'MUT190/Baits and Targets/SS_Mut_v2_targetsMUT190_PLUS400.gatk-interval')
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'dbSNP VCF File')] = get_library_file(myKey, galaxyURL, 'Annotations/Annotations used in GATK-1.5/dbsnp_135.b37.vcf')
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'Mills_and_1000G_gold_standard')] = get_library_file(myKey, galaxyURL, 'Annotations/Annotations used in GATK-1.5/Mills_and_1000G_gold_standard.indels.b37.sites.vcf')
except Exception, e:
print "Error: Problem with finding all the input steps for this workflow: %s" % e
sys.exit(1)
###########################################################################
#
# INPUT FILE DEFINITION
# These are the files that are cycled over
#
###########################################################################
for f in files:
if '_R1.fastq' in f:
f2 = f.replace('_R1.fastq','_R2.fastq')
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'FASTQ Input (#1)')] = {'src':'ldda', 'id':files[f] }
data['ds_map'][get_workflow_step(myKey, galaxyURL, workflow_id, 'FASTQ Input (#2)')] = {'src':'ldda', 'id':files[f2] }
#History will be using PREFIX-FILENAME format
data['history'] = "%s%s" % ( historyPrefix,f.split('/')[-1] )
print '-------------------------------------------------------------------'
print "Preparing workflow for :\n%s\n" % f
if options.test:
print data
else:
url = "%s/workflows" % galaxyURL
submit( myKey, url, data )
if __name__ == '__main__':
main()