commit/galaxy-central: greg: Don't alter the contents of a file while uploading to a data library if using the filesystem_paths option. This partially resolves the issue where a supposedly sorted BAM file was being resorted upon upload to a data library when using this option. A better imlementation of determining whether a BAM file has been sorted (so that it does not get resorted) remains to be done.
1 new changeset in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/b5ecb8f4839d/ changeset: r5221:b5ecb8f4839d user: greg date: 2011-03-15 14:38:29 summary: Don't alter the contents of a file while uploading to a data library if using the filesystem_paths option. This partially resolves the issue where a supposedly sorted BAM file was being resorted upon upload to a data library when using this option. A better imlementation of determining whether a BAM file has been sorted (so that it does not get resorted) remains to be done. affected #: 4 files (832 bytes) --- a/lib/galaxy/datatypes/binary.py Mon Mar 14 23:01:11 2011 -0400 +++ b/lib/galaxy/datatypes/binary.py Tue Mar 15 09:38:29 2011 -0400 @@ -59,7 +59,8 @@ output = subprocess.Popen(params, stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate()[0] # find returns -1 if string is not found return output.find("SO:coordinate") != -1 or output.find("SO:sorted") != -1 - + def dataset_content_needs_grooming( self, file_name ): + return not self._is_coordinate_sorted( file_name ) def groom_dataset_content( self, file_name ): """ Ensures that the Bam file contents are sorted. This function is called @@ -72,11 +73,9 @@ ## This command may also create temporary files <out.prefix>.%d.bam when the ## whole alignment cannot be fitted into memory ( controlled by option -m ). #do this in a unique temp directory, because of possible <out.prefix>.%d.bam temp files - - if self._is_coordinate_sorted(file_name): + if not self.dataset_content_needs_grooming( file_name ): # Don't re-sort if already sorted return - tmp_dir = tempfile.mkdtemp() tmp_sorted_dataset_file_name_prefix = os.path.join( tmp_dir, 'sorted' ) stderr_name = tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "bam_sort_stderr" ).name @@ -84,7 +83,6 @@ command = "samtools sort %s %s" % ( file_name, tmp_sorted_dataset_file_name_prefix ) proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) ) exit_code = proc.wait() - #Did sort succeed? stderr = open( stderr_name ).read().strip() if stderr: @@ -93,10 +91,8 @@ raise Exception, "Error Grooming BAM file contents: %s" % stderr else: print stderr - # Move samtools_created_sorted_file_name to our output dataset location shutil.move( samtools_created_sorted_file_name, file_name ) - # Remove temp file and empty temporary directory os.unlink( stderr_name ) os.rmdir( tmp_dir ) @@ -124,9 +120,7 @@ raise Exception, "Error Setting BAM Metadata: %s" % stderr else: print stderr - dataset.metadata.bam_index = index_file - # Remove temp file os.unlink( stderr_name ) def sniff( self, filename ): --- a/lib/galaxy/datatypes/data.py Mon Mar 14 23:01:11 2011 -0400 +++ b/lib/galaxy/datatypes/data.py Tue Mar 15 09:38:29 2011 -0400 @@ -88,8 +88,11 @@ except OSError, e: log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name)) return '' + def dataset_content_needs_grooming( self, file_name ): + """This function is called on an output dataset file after the content is initially generated.""" + return False def groom_dataset_content( self, file_name ): - """This function is called on an output dataset file after the content is initially generated.""" + """This function is called on an output dataset file if dataset_content_needs_grooming returns True.""" pass def init_meta( self, dataset, copy_from=None ): # Metadata should be left mostly uninitialized. Dataset will --- a/test/base/twilltestcase.py Mon Mar 14 23:01:11 2011 -0400 +++ b/test/base/twilltestcase.py Tue Mar 15 09:38:29 2011 -0400 @@ -637,10 +637,10 @@ try: if attributes is None: attributes = {} + compare = attributes.get( 'compare', 'diff' ) if attributes.get( 'ftype', None ) == 'bam': local_fh, temp_name = self._bam_to_sam( local_name, temp_name ) local_name = local_fh.name - compare = attributes.get( 'compare', 'diff' ) extra_files = attributes.get( 'extra_files', None ) if compare == 'diff': self.files_diff( local_name, temp_name, attributes=attributes ) --- a/tools/data_source/upload.py Mon Mar 14 23:01:11 2011 -0400 +++ b/tools/data_source/upload.py Tue Mar 15 09:38:29 2011 -0400 @@ -334,8 +334,7 @@ file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file ) return if data_type != 'binary': - # don't convert newlines on data we're only going to symlink - if link_data_only == 'link_to_files': + if link_data_only == 'copy_files': in_place = True if dataset.type in ( 'server_dir', 'path_paste' ): in_place = False @@ -353,8 +352,16 @@ ext = dataset.ext if ext == 'auto': ext = 'data' - # Move the dataset to its "real" path + datatype = registry.get_datatype_by_extension( ext ) + if dataset.type in ( 'server_dir', 'path_paste' ) and link_data_only == 'link_to_files': + # Never alter a file that will not be copied to Galaxy's local file store. + if datatype.dataset_content_needs_grooming( output_path ): + err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ + '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' + file_err( err_msg, dataset, json_file ) + return if link_data_only == 'copy_files' and dataset.type in ( 'server_dir', 'path_paste' ): + # Move the dataset to its "real" path if converted_path is not None: shutil.copy( converted_path, output_path ) try: @@ -362,7 +369,7 @@ except: pass else: - # this should not happen, but it's here just in case + # This should not happen, but it's here just in case shutil.copy( dataset.path, output_path ) elif link_data_only == 'copy_files': shutil.move( dataset.path, output_path ) @@ -375,9 +382,9 @@ name = dataset.name, line_count = line_count ) json_file.write( to_json_string( info ) + "\n" ) - # Groom the dataset content if necessary - datatype = registry.get_datatype_by_extension( ext ) - datatype.groom_dataset_content( output_path ) + if datatype.dataset_content_needs_grooming( output_path ): + # Groom the dataset content if necessary + datatype.groom_dataset_content( output_path ) def add_composite_file( dataset, registry, json_file, output_path, files_path ): if dataset.composite_files: Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
Bitbucket