details: http://www.bx.psu.edu/hg/galaxy/rev/eb941905fd70 changeset: 1509:eb941905fd70 user: guru date: Tue Sep 16 14:09:16 2008 -0400 description: Rewrote "Compare two queries" tool in Python. 2 file(s) affected in this change: tools/filters/compare.xml tools/filters/joinWrapper.py diffs (68 lines): diff -r ec547440ec97 -r eb941905fd70 tools/filters/compare.xml --- a/tools/filters/compare.xml Tue Sep 16 13:25:42 2008 -0400 +++ b/tools/filters/compare.xml Tue Sep 16 14:09:16 2008 -0400 @@ -1,6 +1,6 @@ <tool id="comp1" name="Compare two Queries"> <description>to find common or distinct rows</description> - <command interpreter="perl">joinWrapper.pl $input1 $input2 $field1 $field2 $mode "Y" $out_file1</command> + <command interpreter="python">joinWrapper.py $input1 $input2 $field1 $field2 $mode $out_file1</command> <inputs> <param format="tabular" name="input1" type="data" label="Compare"/> <param name="field1" label="Using column" type="data_column" data_ref="input1" /> diff -r ec547440ec97 -r eb941905fd70 tools/filters/joinWrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filters/joinWrapper.py Tue Sep 16 14:09:16 2008 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/env python +#Guruprasad Ananda +""" +This tool provides the UNIX "join" functionality. +""" +import sys, os, tempfile + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + infile1 = sys.argv[1] + infile2 = sys.argv[2] + field1 = int(sys.argv[3]) + field2 = int(sys.argv[4]) + mode =sys.argv[5] + outfile = sys.argv[6] + + tmpfile1 = tempfile.NamedTemporaryFile() + tmpfile2 = tempfile.NamedTemporaryFile() + + try: + #Sort the two files based on specified fields + os.system("sort -k %d -o %s %s" %(field1, tmpfile1.name, infile1)) + os.system("sort -k %d -o %s %s" %(field2, tmpfile2.name, infile2)) + except Exception, exc: + stop_err( 'Initialization error -> %s' %str(exc) ) + + option = "" + for line in file(tmpfile1.name): + line = line.strip() + if line: + elems = line.split('\t') + for j in range(1,len(elems)+1): + if j == 1: + option = "1.1" + else: + option = option + ",1." + str(j) + break + + if mode == "V": + cmdline = 'join -v 1 -o %s -1 %d -2 %d %s %s | tr " " "\t" > %s' %(option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) + else: + cmdline = 'join -o %s -1 %d -2 %d %s %s | tr " " "\t" > %s' %(option, field1, field2, tmpfile1.name, tmpfile2.name, outfile) + + try: + os.system(cmdline) + except Exception, exj: + stop_err('Error joining the two datasets -> %s' %str(exj)) + +if __name__ == "__main__": + main()