details: http://www.bx.psu.edu/hg/galaxy/rev/0c9e154e9176 changeset: 3517:0c9e154e9176 user: Nate Coraor <nate@bx.psu.edu> date: Thu Mar 11 11:56:20 2010 -0500 description: Modify the newline conversion methods in sniff so converting in place is optional. This is necessary to fix a bug that occurs if using the 'server_dir' or 'path_paste' library upload methods: previously, they would modify the file to be imported in-place if permissions allowed (probably not what the admin wanted) or fail if permissions did not allow. New functionality is to return the converted tempfile if 'server_dir' or 'path_paste' methods are used. Also, no newline conversion will be done if the administrator uses the symlink checkbox. diffstat: lib/galaxy/datatypes/sniff.py | 39 ++++++++++++++++++++++++--------------- tools/data_source/upload.py | 24 +++++++++++++++++++----- 2 files changed, 43 insertions(+), 20 deletions(-) diffs (140 lines): diff -r 2e97ae04856d -r 0c9e154e9176 lib/galaxy/datatypes/sniff.py --- a/lib/galaxy/datatypes/sniff.py Thu Mar 11 11:17:11 2010 -0500 +++ b/lib/galaxy/datatypes/sniff.py Thu Mar 11 11:56:20 2010 -0500 @@ -70,7 +70,7 @@ f.close() return False -def convert_newlines( fname ): +def convert_newlines( fname, in_place=True ): """ Converts in place a file from universal line endings to Posix line endings. @@ -78,7 +78,7 @@ >>> fname = get_test_fname('temp.txt') >>> file(fname, 'wt').write("1 2\\r3 4") >>> convert_newlines(fname) - 2 + (2, None) >>> file(fname).read() '1 2\\n3 4\\n' """ @@ -87,18 +87,21 @@ for i, line in enumerate( file( fname, "U" ) ): fp.write( "%s\n" % line.rstrip( "\r\n" ) ) fp.close() - shutil.move( temp_name, fname ) - # Return number of lines in file. - return i + 1 + if in_place: + shutil.move( temp_name, fname ) + # Return number of lines in file. + return ( i + 1, None ) + else: + return ( i + 1, temp_name ) -def sep2tabs(fname, patt="\\s+"): +def sep2tabs( fname, in_place=True, patt="\\s+" ): """ Transforms in place a 'sep' separated file to a tab separated one >>> fname = get_test_fname('temp.txt') >>> file(fname, 'wt').write("1 2\\n3 4\\n") >>> sep2tabs(fname) - 2 + (2, None) >>> file(fname).read() '1\\t2\\n3\\t4\\n' """ @@ -110,11 +113,14 @@ elems = regexp.split( line ) fp.write( "%s\n" % '\t'.join( elems ) ) fp.close() - shutil.move( temp_name, fname ) - # Return number of lines in file. - return i + 1 + if in_place: + shutil.move( temp_name, fname ) + # Return number of lines in file. + return ( i + 1, None ) + else: + return ( i + 1, temp_name ) -def convert_newlines_sep2tabs( fname, patt="\\s+" ): +def convert_newlines_sep2tabs( fname, in_place=True, patt="\\s+" ): """ Combines above methods: convert_newlines() and sep2tabs() so that files do not need to be read twice @@ -122,7 +128,7 @@ >>> fname = get_test_fname('temp.txt') >>> file(fname, 'wt').write("1 2\\r3 4") >>> convert_newlines_sep2tabs(fname) - 2 + (2, None) >>> file(fname).read() '1\\t2\\n3\\t4\\n' """ @@ -134,9 +140,12 @@ elems = regexp.split( line ) fp.write( "%s\n" % '\t'.join( elems ) ) fp.close() - shutil.move( temp_name, fname ) - # Return number of lines in file. - return i + 1 + if in_place: + shutil.move( temp_name, fname ) + # Return number of lines in file. + return ( i + 1, None ) + else: + return ( i + 1, temp_name ) def get_headers( fname, sep, count=60, is_multi_byte=False ): """ diff -r 2e97ae04856d -r 0c9e154e9176 tools/data_source/upload.py --- a/tools/data_source/upload.py Thu Mar 11 11:17:11 2010 -0500 +++ b/tools/data_source/upload.py Thu Mar 11 11:56:20 2010 -0500 @@ -138,6 +138,7 @@ def add_file( dataset, json_file, output_path ): data_type = None line_count = None + converted_path = None if dataset.type == 'url': try: @@ -239,10 +240,15 @@ file_err( 'The uploaded file contains inappropriate content', dataset, json_file ) return if data_type != 'binary' and data_type != 'zip': - if dataset.space_to_tab: - line_count = sniff.convert_newlines_sep2tabs( dataset.path ) - else: - line_count = sniff.convert_newlines( dataset.path ) + # don't convert newlines on data we're only going to symlink + if not dataset.get( 'link_data_only', False ): + in_place = True + if dataset.type in ( 'server_dir', 'path_paste' ): + in_place = False + if dataset.space_to_tab: + line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place ) + else: + line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place ) if dataset.file_type == 'auto': ext = sniff.guess_ext( dataset.path ) else: @@ -257,7 +263,15 @@ if dataset.get( 'link_data_only', False ): pass # data will remain in place elif dataset.type in ( 'server_dir', 'path_paste' ): - shutil.copy( dataset.path, output_path ) + if converted_path is not None: + shutil.copy( converted_path, output_path ) + try: + os.remove( converted_path ) + except: + pass + else: + # this should not happen, but it's here just in case + shutil.copy( dataset.path, output_path ) else: shutil.move( dataset.path, output_path ) # Write the job info