[galaxy-dev] [hg] galaxy 3517: Modify the newline conversion methods in sniff...

18 Mar 2010

details:   http://www.bx.psu.edu/hg/galaxy/rev/0c9e154e9176
changeset: 3517:0c9e154e9176
user:      Nate Coraor <nate@bx.psu.edu>
date:      Thu Mar 11 11:56:20 2010 -0500
description:
Modify the newline conversion methods in sniff so converting in place is optional.  This is necessary to fix a bug that occurs if using the 'server_dir' or 'path_paste' library upload methods: previously, they would modify the file to be imported in-place if permissions allowed (probably not what the admin wanted) or fail if permissions did not allow.  New functionality is to return the converted tempfile if 'server_dir' or 'path_paste' methods are used.  Also, no newline conversion will be done if the administrator uses the symlink checkbox.

diffstat:

 lib/galaxy/datatypes/sniff.py |  39 ++++++++++++++++++++++++---------------
 tools/data_source/upload.py   |  24 +++++++++++++++++++-----
 2 files changed, 43 insertions(+), 20 deletions(-)

diffs (140 lines):

diff -r 2e97ae04856d -r 0c9e154e9176 lib/galaxy/datatypes/sniff.py

--- a/lib/galaxy/datatypes/sniff.py	Thu Mar 11 11:17:11 2010 -0500
+++ b/lib/galaxy/datatypes/sniff.py	Thu Mar 11 11:56:20 2010 -0500
@@ -70,7 +70,7 @@
     f.close()
     return False
 
-def convert_newlines( fname ):
+def convert_newlines( fname, in_place=True ):
     """
     Converts in place a file from universal line endings 
     to Posix line endings.
@@ -78,7 +78,7 @@
     >>> fname = get_test_fname('temp.txt')
     >>> file(fname, 'wt').write("1 2\\r3 4")
     >>> convert_newlines(fname)
-    2
+    (2, None)
     >>> file(fname).read()
     '1 2\\n3 4\\n'
     """
@@ -87,18 +87,21 @@
     for i, line in enumerate( file( fname, "U" ) ):
         fp.write( "%s\n" % line.rstrip( "\r\n" ) )
     fp.close()
-    shutil.move( temp_name, fname )
-    # Return number of lines in file.
-    return i + 1
+    if in_place:
+        shutil.move( temp_name, fname )
+        # Return number of lines in file.
+        return ( i + 1, None )
+    else:
+        return ( i + 1, temp_name )
 
-def sep2tabs(fname, patt="\\s+"):
+def sep2tabs( fname, in_place=True, patt="\\s+" ):
     """
     Transforms in place a 'sep' separated file to a tab separated one
 
     >>> fname = get_test_fname('temp.txt')
     >>> file(fname, 'wt').write("1 2\\n3 4\\n")
     >>> sep2tabs(fname)
-    2
+    (2, None)
     >>> file(fname).read()
     '1\\t2\\n3\\t4\\n'
     """
@@ -110,11 +113,14 @@
         elems = regexp.split( line )
         fp.write( "%s\n" % '\t'.join( elems ) )
     fp.close()
-    shutil.move( temp_name, fname )
-    # Return number of lines in file.
-    return i + 1
+    if in_place:
+        shutil.move( temp_name, fname )
+        # Return number of lines in file.
+        return ( i + 1, None )
+    else:
+        return ( i + 1, temp_name )
 
-def convert_newlines_sep2tabs( fname, patt="\\s+" ):
+def convert_newlines_sep2tabs( fname, in_place=True, patt="\\s+" ):
     """
     Combines above methods: convert_newlines() and sep2tabs()
     so that files do not need to be read twice
@@ -122,7 +128,7 @@
     >>> fname = get_test_fname('temp.txt')
     >>> file(fname, 'wt').write("1 2\\r3 4")
     >>> convert_newlines_sep2tabs(fname)
-    2
+    (2, None)
     >>> file(fname).read()
     '1\\t2\\n3\\t4\\n'
     """
@@ -134,9 +140,12 @@
         elems = regexp.split( line )
         fp.write( "%s\n" % '\t'.join( elems ) )
     fp.close()
-    shutil.move( temp_name, fname )
-    # Return number of lines in file.
-    return i + 1
+    if in_place:
+        shutil.move( temp_name, fname )
+        # Return number of lines in file.
+        return ( i + 1, None )
+    else:
+        return ( i + 1, temp_name )
 
 def get_headers( fname, sep, count=60, is_multi_byte=False ):
     """
diff -r 2e97ae04856d -r 0c9e154e9176 tools/data_source/upload.py
--- a/tools/data_source/upload.py	Thu Mar 11 11:17:11 2010 -0500
+++ b/tools/data_source/upload.py	Thu Mar 11 11:56:20 2010 -0500
@@ -138,6 +138,7 @@
 def add_file( dataset, json_file, output_path ):
     data_type = None
     line_count = None
+    converted_path = None
 
     if dataset.type == 'url':
         try:
@@ -239,10 +240,15 @@
                 file_err( 'The uploaded file contains inappropriate content', dataset, json_file )
                 return
         if data_type != 'binary' and data_type != 'zip':
-            if dataset.space_to_tab:
-                line_count = sniff.convert_newlines_sep2tabs( dataset.path )
-            else:
-                line_count = sniff.convert_newlines( dataset.path )
+            # don't convert newlines on data we're only going to symlink
+            if not dataset.get( 'link_data_only', False ):
+                in_place = True
+                if dataset.type in ( 'server_dir', 'path_paste' ):
+                    in_place = False
+                if dataset.space_to_tab:
+                    line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place )
+                else:
+                    line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place )
             if dataset.file_type == 'auto':
                 ext = sniff.guess_ext( dataset.path )
             else:
@@ -257,7 +263,15 @@
     if dataset.get( 'link_data_only', False ):
         pass # data will remain in place
     elif dataset.type in ( 'server_dir', 'path_paste' ):
-        shutil.copy( dataset.path, output_path )
+        if converted_path is not None:
+            shutil.copy( converted_path, output_path )
+            try:
+                os.remove( converted_path )
+            except:
+                pass
+        else:
+            # this should not happen, but it's here just in case
+            shutil.copy( dataset.path, output_path )
     else:
         shutil.move( dataset.path, output_path )
     # Write the job info

    

[galaxy-dev] [hg] galaxy 3517: Modify the newline conversion methods in sniff...

Greg Von Kuster