Hi,
I've enabled the 'library_import_dir' option in universe_wsgi.ini as:
library_import_dir = /homes/www-galaxy/data_import
It's contents is:
www-galaxy@ge-004: data_import> ls -l
total 96
lrwxrwxrwx 1 www-galaxy barton 40 Mar 4 14:40 Dd_inuc_1.txt ->
/homes/pschofield/data/TOH/Dd_inuc_1.txt
-rw-r--r-- 1 www-galaxy barton 142 Mar 5 13:50 README
However, both files fail to import with this error:
Traceback (most recent call last): File
"/homes/www-galaxy/galaxy_devel/tools/data_source/upload.py", line 326,
in __main__() File
"/homes/www-galaxy/galaxy_devel/tools/data_source/upload.py", line 318,
in __main__ add_file( dataset,
error
The strangeness comes in if I copy to the Dd_inuc_1.txt file a directory
owned by the www-galaxy user and link to it there. Then, the imports
works for both files.
www-galaxy@ge-004: data_import> ls -l
total 96
lrwxrwxrwx 1 www-galaxy barton 20 Mar 5 13:56 Dd_inuc_1.txt ->
../tmp/Dd_inuc_1.txt
-rw-r--r-- 1 www-galaxy barton 142 Mar 5 13:50 README
The permissions are correct for the file:
www-galaxy@ge-004: data_import> ls -l
/homes/pschofield/data/TOH/Dd_inuc_1.txt
-rw-r--r-- 1 pschofield barton 966480362 Jan 18 16:39
/homes/pschofield/data/TOH/Dd_inuc_1.txt
I've no idea why Galaxy cares about who owns the files as it should be
able to see them, but something's screwy. Anyone have ideas as to what's
going on here?
Cheers,
Chris
--
Dr Chris Cole
Senior Bioinformatics Research Officer
School of Life Sciences Research
University of Dundee
Dow Street
Dundee
DD1 5EH
Scotland, UK
url: http://network.nature.com/profile/drchriscole
e-mail: chris(a)compbio.dundee.ac.uk
Tel: +44 (0)1382 388 721
The University of Dundee is a registered Scottish charity, No: SC015096
Hi,
I just did an hg pull for the latest version and I've got errors trying
to run the 'Fastq Groomer' and 'Fastq Summary Statistics' tools. It
looks like they're related. The old Fastq Groomer as found in the
'Deprecated' tools section works fine.
Groomer:
Traceback (most recent call last):
File "/homes/www-galaxy/galaxy_devel/tools/fastq/fastq_groomer.py",
line 3, in ?
from galaxy_utils.sequence.fastq import fastqReader,
fastqAggregator, fastqWriter
File
"/homes/www-galaxy/galaxy_devel/lib/galaxy_utils/sequence/fastq.py",
line 267
class fastqAggregator():
^
SyntaxError: invalid syntax
Summary Stats:
Traceback (most recent call last):
File "/homes/www-galaxy/galaxy_devel/tools/fastq/fastq_stats.py",
line 3, in ?
from galaxy_utils.sequence.fastq import fastqReader, fastqAggregator
File
"/homes/www-galaxy/galaxy_devel/lib/galaxy_utils/sequence/fastq.py",
line 267
class fastqAggregator():
^
SyntaxError: invalid syntax
Is this a problem at my end or in Galaxy itself? This is python 2.6, BTW.
Cheers,
Chris
--
Dr Chris Cole
Senior Bioinformatics Research Officer
School of Life Sciences Research
University of Dundee
Dow Street
Dundee
DD1 5EH
Scotland, UK
url: http://network.nature.com/profile/drchriscole
e-mail: chris(a)compbio.dundee.ac.uk
Tel: +44 (0)1382 388 721
The University of Dundee is a registered Scottish charity, No: SC015096
Hello,
Is there a way to pass SGE options to the runner? Specifically, i'd like to
tell the scheduler the number of cores required by a tool (-pe).
Thanks,
Ed Kirton
US DOE JGI
details: http://www.bx.psu.edu/hg/galaxy/rev/3563fade2c86
changeset: 3489:3563fade2c86
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Fri Mar 05 17:21:16 2010 -0500
description:
Add the ability for tests to have a lines_diff attribute set for standard output datasets that will allow the specified number of lines to vary between test-data and tool run results; a 'change' to a line equates to a difference of 2 lines between files (i.e. 1 line added, 1 line removed). Add the ability for test comparisons to use single or multiline regular expressions, this is accessed by adding the attribute compare= "match_re" or "match_re_multiline" (respectively); the standard diff method can be explicitly declared as "diff", or is used at the default when not specified.
diffstat:
lib/galaxy/tools/__init__.py | 7 +-
lib/galaxy/tools/test.py | 4 +-
test/base/twilltestcase.py | 134 ++++++++++++++++++++++++++++-----------
test/functional/test_toolbox.py | 4 +-
4 files changed, 106 insertions(+), 43 deletions(-)
diffs (225 lines):
diff -r 45cfcab6aed7 -r 3563fade2c86 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Fri Mar 05 12:55:51 2010 -0500
+++ b/lib/galaxy/tools/__init__.py Fri Mar 05 17:21:16 2010 -0500
@@ -532,8 +532,11 @@
file = attrib.pop( 'file', None )
if file is None:
raise Exception( "Test output does not have a 'file'")
- sort = util.string_as_bool( attrib.pop( 'sort', False ) )
- test.add_output( name, file, sort )
+ attributes = Bunch()
+ attributes.compare = attrib.pop( 'compare', 'diff' ).lower() #method of comparison
+ attributes.lines_diff = int( attrib.pop( 'lines_diff', '0' ) ) # allow a few lines (dates etc) to vary in logs
+ attributes.sort = util.string_as_bool( attrib.pop( 'sort', False ) )
+ test.add_output( name, file, attributes )
except Exception, e:
test.error = True
test.exception = e
diff -r 45cfcab6aed7 -r 3563fade2c86 lib/galaxy/tools/test.py
--- a/lib/galaxy/tools/test.py Fri Mar 05 12:55:51 2010 -0500
+++ b/lib/galaxy/tools/test.py Fri Mar 05 17:21:16 2010 -0500
@@ -37,8 +37,8 @@
except Exception, e:
log.debug( "Error in add_param for %s: %s" % ( name, e ) )
self.inputs.append( ( name, value, extra ) )
- def add_output( self, name, file, sort ):
- self.outputs.append( ( name, file, sort ) )
+ def add_output( self, name, file, extra ):
+ self.outputs.append( ( name, file, extra ) )
def __expand_grouping_for_data_input( self, name, value, extra, grouping_name, grouping_value ):
# Currently handles grouping.Conditional and grouping.Repeat
if isinstance( grouping_value, grouping.Conditional ):
diff -r 45cfcab6aed7 -r 3563fade2c86 test/base/twilltestcase.py
--- a/test/base/twilltestcase.py Fri Mar 05 12:55:51 2010 -0500
+++ b/test/base/twilltestcase.py Fri Mar 05 17:21:16 2010 -0500
@@ -36,14 +36,21 @@
#self.set_history()
# Functions associated with files
- def files_diff( self, file1, file2, sort=False ):
+ def files_diff( self, file1, file2, attributes=None ):
"""Checks the contents of 2 files for differences"""
+ def get_lines_diff( diff ):
+ count = 0
+ for line in diff:
+ if ( line.startswith( '+' ) and not line.startswith( '+++' ) ) or ( line.startswith( '-' ) and not line.startswith( '---' ) ):
+ count += 1
+ return count
if not filecmp.cmp( file1, file2 ):
files_differ = False
local_file = open( file1, 'U' ).readlines()
history_data = open( file2, 'U' ).readlines()
- if sort:
+ if attributes and attributes.sort:
history_data.sort()
+ ##Why even bother with the check loop below, why not just use the diff output? This seems wasteful.
if len( local_file ) == len( history_data ):
for i in range( len( history_data ) ):
if local_file[i].rstrip( '\r\n' ) != history_data[i].rstrip( '\r\n' ):
@@ -52,37 +59,79 @@
else:
files_differ = True
if files_differ:
- diff = difflib.unified_diff( local_file, history_data, "local_file", "history_data" )
- diff_slice = list( islice( diff, 40 ) )
- if file1.endswith( '.pdf' ) or file2.endswith( '.pdf' ):
- # PDF files contain creation dates, modification dates, ids and descriptions that change with each
- # new file, so we need to handle these differences. As long as the rest of the PDF file does
- # not differ we're ok.
- valid_diff_strs = [ 'description', 'createdate', 'creationdate', 'moddate', 'id' ]
- valid_diff = False
- for line in diff_slice:
- # Make sure to lower case strings before checking.
- line = line.lower()
- # Diff lines will always start with a + or - character, but handle special cases: '--- local_file \n', '+++ history_data \n'
- if ( line.startswith( '+' ) or line.startswith( '-' ) ) and line.find( 'local_file' ) < 0 and line.find( 'history_data' ) < 0:
- for vdf in valid_diff_strs:
- if line.find( vdf ) < 0:
- valid_diff = False
- else:
- valid_diff = True
- # Stop checking as soon as we know we have a valid difference
- break
- if not valid_diff:
- # Print out diff_slice so we can see what failed
- print "###### diff_slice ######"
- raise AssertionError( "".join( diff_slice ) )
- break
+ if attributes and attributes.lines_diff:
+ allowed_diff_count = attributes.lines_diff
else:
- for line in diff_slice:
- for char in line:
- if ord( char ) > 128:
- raise AssertionError( "Binary data detected, not displaying diff" )
- raise AssertionError( "".join( diff_slice ) )
+ allowed_diff_count = 0
+ diff = list( difflib.unified_diff( local_file, history_data, "local_file", "history_data" ) )
+ diff_lines = get_lines_diff( diff )
+ if diff_lines > allowed_diff_count:
+ diff_slice = diff[0:40]
+ #FIXME: This pdf stuff is rather special cased and has not been updated to consider lines_diff
+ #due to unknown desired behavior when used in conjunction with a non-zero lines_diff
+ #PDF forgiveness can probably be handled better by not special casing by __extension__ here
+ #and instead using lines_diff or a regular expression matching
+ #or by creating and using a specialized pdf comparison function
+ if file1.endswith( '.pdf' ) or file2.endswith( '.pdf' ):
+ # PDF files contain creation dates, modification dates, ids and descriptions that change with each
+ # new file, so we need to handle these differences. As long as the rest of the PDF file does
+ # not differ we're ok.
+ valid_diff_strs = [ 'description', 'createdate', 'creationdate', 'moddate', 'id' ]
+ valid_diff = False
+ for line in diff_slice:
+ # Make sure to lower case strings before checking.
+ line = line.lower()
+ # Diff lines will always start with a + or - character, but handle special cases: '--- local_file \n', '+++ history_data \n'
+ if ( line.startswith( '+' ) or line.startswith( '-' ) ) and line.find( 'local_file' ) < 0 and line.find( 'history_data' ) < 0:
+ for vdf in valid_diff_strs:
+ if line.find( vdf ) < 0:
+ valid_diff = False
+ else:
+ valid_diff = True
+ # Stop checking as soon as we know we have a valid difference
+ break
+ if not valid_diff:
+ # Print out diff_slice so we can see what failed
+ print "###### diff_slice ######"
+ raise AssertionError( "".join( diff_slice ) )
+ else:
+ for line in diff_slice:
+ for char in line:
+ if ord( char ) > 128:
+ raise AssertionError( "Binary data detected, not displaying diff" )
+ raise AssertionError( "".join( diff_slice ) )
+
+ def files_re_match( self, file1, file2, attributes=None ):
+ """Checks the contents of 2 files for differences using re.match"""
+ local_file = open( file1, 'U' ).readlines() #regex file
+ history_data = open( file2, 'U' ).readlines()
+ assert len( local_file ) == len( history_data ), 'Data File and Regular Expression File contain a different number of lines (%s != %s)' % ( len( local_file ), len( history_data ) )
+ if attributes and attributes.sort:
+ history_data.sort()
+ if attributes and attributes.lines_diff:
+ lines_diff = attributes.lines_diff
+ else:
+ lines_diff = 0
+ line_diff_count = 0
+ diffs = []
+ for i in range( len( history_data ) ):
+ if not re.match( local_file[i].rstrip( '\r\n' ), history_data[i].rstrip( '\r\n' ) ):
+ line_diff_count += 1
+ diffs.append( 'Regular Expression: %s\nData file : %s' % ( local_file[i].rstrip( '\r\n' ), history_data[i].rstrip( '\r\n' ) ) )
+ if line_diff_count > lines_diff:
+ raise AssertionError, "Regular expression did not match data file (allowed variants=%i):\n%s" % ( lines_diff, "".join( diffs ) )
+
+ def files_re_match_multiline( self, file1, file2, attributes=None ):
+ """Checks the contents of 2 files for differences using re.match in multiline mode"""
+ local_file = open( file1, 'U' ).read() #regex file
+ if attributes and attributes.sort:
+ history_data = open( file2, 'U' ).readlines()
+ history_data.sort()
+ history_data = ''.join( history_data )
+ else:
+ history_data = open( file2, 'U' ).read()
+ #lines_diff not applicable to multiline matching
+ assert re.match( local_file, history_data, re.MULTILINE ), "Multiline Regular expression did not match data file"
def get_filename( self, filename ):
full = os.path.join( self.file_dir, filename)
@@ -541,7 +590,7 @@
hid = elem.get('hid')
hids.append(hid)
return hids
- def verify_dataset_correctness( self, filename, hid=None, wait=True, maxseconds=120, sort=False ):
+ def verify_dataset_correctness( self, filename, hid=None, wait=True, maxseconds=120, attributes=None ):
"""Verifies that the attributes and contents of a history item meet expectations"""
if wait:
self.wait( maxseconds=maxseconds ) #wait for job to finish
@@ -576,13 +625,24 @@
data = self.last_page()
file( temp_name, 'wb' ).write(data)
try:
- self.files_diff( local_name, temp_name, sort=sort )
+ if attributes and 'compare' in attributes:
+ compare = attributes.compare
+ else:
+ compare = 'diff'
+ if compare == 'diff':
+ self.files_diff( local_name, temp_name, attributes=attributes )
+ elif compare == 're_match':
+ self.files_re_match( local_name, temp_name, attributes=attributes )
+ elif compare == 're_match_multiline':
+ self.files_re_match_multiline( local_name, temp_name, attributes=attributes )
+ else:
+ raise Exception, 'Unimplemented Compare type: %s' % compare
except AssertionError, err:
- os.remove(temp_name)
- errmsg = 'History item %s different than expected, difference:\n' % hid
+ errmsg = 'History item %s different than expected, difference (using %s):\n' % ( hid, compare )
errmsg += str( err )
raise AssertionError( errmsg )
- os.remove(temp_name)
+ finally:
+ os.remove( temp_name )
def verify_composite_datatype_file_content( self, file_name, hda_id ):
local_name = self.get_filename( file_name )
temp_name = self.get_filename( 'temp_%s' % file_name )
diff -r 45cfcab6aed7 -r 3563fade2c86 test/functional/test_toolbox.py
--- a/test/functional/test_toolbox.py Fri Mar 05 12:55:51 2010 -0500
+++ b/test/functional/test_toolbox.py Fri Mar 05 17:21:16 2010 -0500
@@ -66,13 +66,13 @@
self.assertTrue( data_list )
elem_index = 0 - len( testdef.outputs )
for output_tuple in testdef.outputs:
- name, file, sort = output_tuple
+ name, outfile, attributes = output_tuple
# Get the correct hid
elem = data_list[ elem_index ]
self.assertTrue( elem is not None )
elem_hid = elem.get( 'hid' )
elem_index += 1
- self.verify_dataset_correctness( file, hid=elem_hid, maxseconds=testdef.maxseconds, sort=sort )
+ self.verify_dataset_correctness( outfile, hid=elem_hid, maxseconds=testdef.maxseconds, attributes=attributes )
self.delete_history( id=self.security.encode_id( latest_history.id ) )
def __expand_grouping( self, tool_inputs, declared_inputs, prefix='' ):