details:
http://www.bx.psu.edu/hg/galaxy/rev/dd5f1fe8f5e9
changeset: 2485:dd5f1fe8f5e9
user: Dan Blankenberg <dan(a)bx.psu.edu>
date: Wed Jul 15 14:11:35 2009 -0400
description:
Initial pass at allowing the setting of certain metadata parameters on upload (controlled
via a flag). This allows the user to specify the 'base_name' to be used for
Rgenetics datatypes, etc. Bunch of cleanup needed in upload.
9 file(s) affected in this change:
lib/galaxy/datatypes/data.py
lib/galaxy/datatypes/genetics.py
lib/galaxy/datatypes/metadata.py
lib/galaxy/datatypes/registry.py
lib/galaxy/tools/__init__.py
lib/galaxy/tools/actions/upload.py
lib/galaxy/tools/parameters/grouping.py
templates/tool_form.mako
tools/data_source/upload.xml
diffs (418 lines):
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/datatypes/data.py Wed Jul 15 14:11:35 2009 -0400
@@ -49,6 +49,8 @@
"""If False, the peek is regenerated whenever a dataset of this type
is copied"""
copy_safe_peek = True
+
+ is_binary = True #The dataset contains binary data --> do not space_to_tab or
convert newlines, etc. Allow binary file uploads of this type when True.
#Composite datatypes
composite_type = None
@@ -250,7 +252,8 @@
def after_edit( self, dataset ):
"""This function is called on the dataset after metadata is
edited."""
dataset.clear_associated_files( metadata_safe = True )
- def __new_composite_file( self, optional = False, mimetype = None, description =
None, substitute_name_with_metadata = None, **kwds ):
+ def __new_composite_file( self, name, optional = False, mimetype = None, description
= None, substitute_name_with_metadata = None, **kwds ):
+ kwds[ 'name' ] = name
kwds[ 'optional' ] = optional
kwds[ 'mimetype' ] = mimetype
kwds[ 'description' ] = description
@@ -258,7 +261,7 @@
return Bunch( **kwds )
def add_composite_file( self, name, **kwds ):
#self.composite_files = self.composite_files.copy()
- self.composite_files[ name ] = self.__new_composite_file( **kwds )
+ self.composite_files[ name ] = self.__new_composite_file( name, **kwds )
def __substitute_composite_key( self, key, composite_file, dataset = None ):
@@ -273,7 +276,7 @@
def writable_files( self, dataset = None ):
files = odict()
if self.composite_type != 'auto_primary_file':
- files[ self.primary_file_name ] = self.__new_composite_file()
+ files[ self.primary_file_name ] = self.__new_composite_file(
self.primary_file_name )
for key, value in self.get_composite_files( dataset = dataset ).iteritems():
files[ key ] = value
return files
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/genetics.py
--- a/lib/galaxy/datatypes/genetics.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/datatypes/genetics.py Wed Jul 15 14:11:35 2009 -0400
@@ -117,7 +117,7 @@
class Rgenetics(Html):
"""class to use for rgenetics"""
"""Add metadata elements"""
- MetadataElement( name="base_name", desc="base name for all transformed
versions of this genetic dataset", default="galaxy", readonly=True)
+ MetadataElement( name="base_name", desc="base name for all transformed
versions of this genetic dataset", default="galaxy", readonly=True,
set_in_upload=True)
file_ext="html"
composite_type = 'auto_primary_file'
@@ -151,10 +151,7 @@
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
- #def sniff( self, filename ):
- # """
- # """
- # return True
+
class Lped(Rgenetics):
"""fake class to distinguish different species of Rgenetics data
collections
@@ -245,7 +242,10 @@
MetadataElement( name="columns", default=0, desc="Number of
columns", readonly=True, visible=False )
MetadataElement( name="column_names", default=[], desc="Column
names", readonly=True,visible=True )
MetadataElement( name="base_name",
- desc="base name for all transformed versions of this genetic dataset",
readonly=True)
+ desc="base name for all transformed versions of this genetic dataset",
readonly=True, default='galaxy', set_in_upload=True)
+ ### Do we really need these below? can we rely on dataset.extra_files_path:
os.path.join( dataset.extra_files_path, '%s.phenodata' %
dataset.metadata.base_name ) ?
+ ### Do these have a different purpose? Ross will need to clarify
+ ### Uploading these datatypes will not work until this is sorted out (set_peek
fails)...
MetadataElement( name="pheno_path",
desc="Path to phenotype data for this experiment", readonly=True)
MetadataElement( name="pheno",
@@ -253,11 +253,19 @@
file_ext = None
+ is_binary = True
+
+ composite_type = 'basic'
+
+ def __init__( self, **kwd ):
+ Html.__init__( self, **kwd )
+ self.add_composite_file( '%s.phenodata', substitute_name_with_metadata =
'base_name' )
+
def set_peek( self, dataset ):
"""expects a .pheno file in the extra_files_dir - ugh
note that R is wierd and does not include the row.name in
the header. why?"""
- p = file(dataset.metadata.pheno_path,'r').readlines()
+ p = file(dataset.metadata.pheno_path,'r').readlines() #this fails
head = p[0].strip().split('\t')
head.insert(0,'ChipFileName') # fix R write.table b0rken-ness
p[0] = '\t'.join(head)
@@ -295,6 +303,7 @@
if not dataset.peek:
dataset.set_peek()
pk = dataset.peek # use the peek which is the pheno data insead of dataset (!)
+ ###this is probably not the best source, can we just access the raw data
directly?
if pk:
p = pk.split('\n')
h = p[0].strip().split('\t') # hope is header
@@ -339,10 +348,6 @@
"""Returns the mime type of the datatype"""
return 'application/gzip'
- def sniff(self):
- """ can we be bothered looking for the signature or loading via
rpy?
- """
- return true
class AffyBatch( RexpBase ):
"""derived class for BioC data structures in Galaxy
"""
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/datatypes/metadata.py Wed Jul 15 14:11:35 2009 -0400
@@ -212,12 +212,13 @@
is a MetadataSpecCollection) of datatype.
"""
- def __init__( self, datatype, name=None, desc=None, param=MetadataParameter,
default=None, no_value = None, visible=True, **kwargs ):
+ def __init__( self, datatype, name=None, desc=None, param=MetadataParameter,
default=None, no_value = None, visible=True, set_in_upload = False, **kwargs ):
self.name = name
self.desc = desc or name
self.default = default
self.no_value = no_value
self.visible = visible
+ self.set_in_upload = set_in_upload
# Catch-all, allows for extra attributes to be set
self.__dict__.update(kwargs)
#set up param last, as it uses values set above
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/datatypes/registry.py
--- a/lib/galaxy/datatypes/registry.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/datatypes/registry.py Wed Jul 15 14:11:35 2009 -0400
@@ -308,3 +308,19 @@
def get_composite_extensions( self ):
return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if
d_type.composite_type is not None ]
+ def get_upload_metadata_params( self, context, group, tool ):
+ """Returns dict of case value:inputs for metadata conditional for
upload tool"""
+ rval = {}
+ for ext, d_type in self.datatypes_by_extension.iteritems():
+ inputs = []
+ for meta_name, meta_spec in d_type.metadata_spec.iteritems():
+ if meta_spec.set_in_upload:
+ help_txt = meta_spec.desc
+ if not help_txt or help_txt == meta_name:
+ help_txt = ""
+ inputs.append( '<param type="text"
name="%s" label="Set metadata value for "%s""
value="%s" help="%s"/>' % ( meta_name, meta_name,
meta_spec.default, help_txt ) )
+ rval[ ext ] = "\n".join( inputs )
+ if 'auto' not in rval and 'txt' in rval: #need to manually add
'auto' datatype
+ rval[ 'auto' ] = rval[ 'txt' ]
+ return rval
+
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/tools/__init__.py Wed Jul 15 14:11:35 2009 -0400
@@ -596,18 +596,40 @@
elif elem.tag == "conditional":
group = Conditional()
group.name = elem.get( "name" )
- # Should have one child "input" which determines the case
- input_elem = elem.find( "param" )
- assert input_elem is not None, "<conditional> must have a
child <param>"
- group.test_param = self.parse_param_elem( input_elem, enctypes, context
)
- # Must refresh when test_param changes
- group.test_param.refresh_on_change = True
- # And a set of possible cases
- for case_elem in elem.findall( "when" ):
- case = ConditionalWhen()
- case.value = case_elem.get( "value" )
- case.inputs = self.parse_input_elem( case_elem, enctypes, context )
- group.cases.append( case )
+
+ group.name = elem.get( "name" )
+
+ group.value_ref = elem.get( 'value_ref', None )
+ group.value_ref_in_group = util.string_as_bool( elem.get(
'value_ref_in_group', 'True' ) )
+ value_from = elem.get( "value_from" )
+ if value_from:
+ value_from = value_from.split( ':' )
+ group.value_from = locals().get( value_from[0] )
+ group.test_param = rval[ group.value_ref ]
+ group.test_param.refresh_on_change = True
+ for attr in value_from[1].split( '.' ):
+ group.value_from = getattr( group.value_from, attr )
+ for case_value, case_inputs in group.value_from( context, group, self
).iteritems():
+ case = ConditionalWhen()
+ case.value = case_value
+ if case_inputs:
+ case.inputs = self.parse_input_elem( ElementTree.XML(
"<when>%s</when>" % case_inputs ), enctypes, context )
+ else:
+ case.inputs = {}
+ group.cases.append( case )
+ else:
+ # Should have one child "input" which determines the case
+ input_elem = elem.find( "param" )
+ assert input_elem is not None, "<conditional> must have a
child <param>"
+ group.test_param = self.parse_param_elem( input_elem, enctypes,
context )
+ # Must refresh when test_param changes
+ group.test_param.refresh_on_change = True
+ # And a set of possible cases
+ for case_elem in elem.findall( "when" ):
+ case = ConditionalWhen()
+ case.value = case_elem.get( "value" )
+ case.inputs = self.parse_input_elem( case_elem, enctypes, context
)
+ group.cases.append( case )
rval[group.name] = group
elif elem.tag == "upload_dataset":
group = UploadDataset()
@@ -615,6 +637,7 @@
group.title = elem.get( "title" )
group.file_type_name = elem.get( 'file_type_name',
group.file_type_name )
group.default_file_type = elem.get( 'default_file_type',
group.default_file_type )
+ group.metadata_ref = elem.get( 'metadata_ref', group.metadata_ref
)
rval[ group.file_type_name ].refresh_on_change = True
rval[ group.file_type_name ].refresh_on_change_values =
self.app.datatypes_registry.get_composite_extensions()
group.inputs = self.parse_input_elem( elem, enctypes, context )
@@ -917,7 +940,10 @@
old_current_case = group_state['__current_case__']
group_prefix = "%s|" % ( key )
# Deal with the 'test' element and see if it's value changed
- test_param_key = group_prefix + input.test_param.name
+ if input.value_ref and not input.value_ref_in_group: #we are referencing
an existant parameter, which is not part of this group
+ test_param_key = prefix + input.test_param.name
+ else:
+ test_param_key = group_prefix + input.test_param.name
test_param_error = None
test_incoming = get_incoming_value( incoming, test_param_key, None )
if test_param_key not in incoming \
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/actions/upload.py
--- a/lib/galaxy/tools/actions/upload.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/tools/actions/upload.py Wed Jul 15 14:11:35 2009 -0400
@@ -46,21 +46,26 @@
uploaded_datasets = dataset_upload_input.get_uploaded_datasets( trans,
incoming )
for uploaded_dataset in uploaded_datasets:
precreated_dataset = self.get_precreated_dataset(
uploaded_dataset.precreated_name )
- dataset = self.add_file( trans, uploaded_dataset.primary_file,
uploaded_dataset.name, uploaded_dataset.file_type, uploaded_dataset.is_multi_byte,
uploaded_dataset.dbkey, space_to_tab = uploaded_dataset.space_to_tab, info =
uploaded_dataset.info, precreated_dataset = precreated_dataset )
- if uploaded_dataset.composite_files:
+ dataset = self.add_file( trans, uploaded_dataset.primary_file,
uploaded_dataset.name, uploaded_dataset.file_type, uploaded_dataset.is_multi_byte,
uploaded_dataset.dbkey, space_to_tab = uploaded_dataset.space_to_tab, info =
uploaded_dataset.info, precreated_dataset = precreated_dataset, metadata =
uploaded_dataset.metadata )
+ composite_files = dataset.datatype.get_composite_files( dataset )
+ if composite_files:
os.mkdir( dataset.extra_files_path ) #make extra files path
- for name, value in uploaded_dataset.composite_files.iteritems():
+ for name, value in composite_files.iteritems():
#what about binary files here, need to skip converting newlines
- if value is None and not dataset.datatype.writable_files[ name
].optional:
+ if uploaded_dataset.composite_files[ value.name ] is None and not
value.optional:
dataset.info = "A required composite data file was not
provided (%s)" % name
dataset.state = dataset.states.ERROR
break
- elif value is not None:
- if value.space_to_tab:
- sniff.convert_newlines_sep2tabs( value.filename )
+ elif uploaded_dataset.composite_files[ value.name] is not None:
+ if uploaded_dataset.composite_files[ value.name
].space_to_tab:
+ sniff.convert_newlines_sep2tabs(
uploaded_dataset.composite_files[ value.name ].filename )
else:
- sniff.convert_newlines( value.filename )
- shutil.move( value.filename, os.path.join(
dataset.extra_files_path, name ) )
+ sniff.convert_newlines( uploaded_dataset.composite_files[
value.name ].filename )
+ shutil.move( uploaded_dataset.composite_files[ value.name
].filename, os.path.join( dataset.extra_files_path, name ) )
+ if dataset.datatype.composite_type == 'auto_primary_file':
+ #now that metadata is set, we should create the primary file as
required
+ open( dataset.file_name, 'wb+' ).write(
dataset.datatype.generate_primary_file( dataset = dataset ) )
+
data_list.append( dataset )
#clean up extra temp names
uploaded_dataset.clean_up_temp_files()
@@ -125,7 +130,7 @@
trans.log_event( 'job id %d ended with errors, err_msg: %s' % ( job.id,
err_msg ), tool_id=job.tool_id )
return dict( output=data )
- def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey,
info=None, space_to_tab=False, precreated_dataset=None ):
+ def add_file( self, trans, temp_name, file_name, file_type, is_multi_byte, dbkey,
info=None, space_to_tab=False, precreated_dataset=None, metadata = {} ):
def dataset_no_data_error( data, message = 'there was an error uploading your
file' ):
data.info = "No data: %s." % message
data.state = data.states.ERROR
@@ -217,6 +222,7 @@
if trans.app.datatypes_registry.get_datatype_by_extension( file_type
).composite_type != 'auto_primary_file' and self.check_html( temp_name ):
return dataset_no_data_error( data, message = "you attempted
to upload an inappropriate file" )
#raise BadFileException( "you attempted to upload an
inappropriate file." )
+ #if data_type != 'binary' and data_type != 'zip' and not
trans.app.datatypes_registry.get_datatype_by_extension( ext ).is_binary:
if data_type != 'binary' and data_type != 'zip':
if space_to_tab:
self.line_count = sniff.convert_newlines_sep2tabs( temp_name )
@@ -235,9 +241,14 @@
data.info = info
data.flush()
shutil.move( temp_name, data.file_name )
- data.state = data.states.OK
+ ## FIXME
+ data.state = data.states.OK ##THIS SHOULD BE THE LAST THING DONE
+ #### its bad to set other things after this point, i.e. metadata and composite
files...this creates a race condition where a dataset could be pushed into a job before
its metadata, etc is set
data.set_size()
data.init_meta()
+ #need to set metadata, has to be done after extention is set
+ for meta_name, meta_value in metadata.iteritems():
+ setattr( data.metadata, meta_name, meta_value )
if self.line_count is not None:
try:
if is_multi_byte:
diff -r dacc94994979 -r dd5f1fe8f5e9 lib/galaxy/tools/parameters/grouping.py
--- a/lib/galaxy/tools/parameters/grouping.py Wed Jul 15 12:18:43 2009 -0400
+++ b/lib/galaxy/tools/parameters/grouping.py Wed Jul 15 14:11:35 2009 -0400
@@ -92,6 +92,7 @@
self.file_type_name = 'file_type'
self.default_file_type = 'txt'
self.file_type_to_ext = { 'auto':self.default_file_type }
+ self.metadata_ref = 'files_metadata'
def get_file_type( self, context ):
return context.get( self.file_type_name, self.default_file_type )
def get_datatype_ext( self, trans, context ):
@@ -297,6 +298,7 @@
self.composite_files = odict()
self.dbkey = None
self.warnings = []
+ self.metadata = {}
self._temp_filenames = [] #store all created filenames here, delete on
cleanup
def register_temp_file( self, filename ):
@@ -333,6 +335,13 @@
dataset.datatype = d_type
dataset.dbkey = dbkey
+ #load metadata
+ files_metadata = context.get( self.metadata_ref, {} )
+ for meta_name, meta_spec in d_type.metadata_spec.iteritems():
+ if meta_spec.set_in_upload:
+ if meta_name in files_metadata:
+ dataset.metadata[ meta_name ] = files_metadata[ meta_name ]
+
temp_name = None
precreated_name = None
is_multi_byte = False
@@ -359,10 +368,10 @@
dataset.warnings.extend( warnings )
dataset.register_temp_file( temp_name )
- keys = writable_files.keys()
+ keys = [ value.name for value in writable_files.values() ]
for i, group_incoming in enumerate( groups_incoming[ writable_files_offset :
] ):
key = keys[ i + writable_files_offset ]
- if group_incoming is None and not writable_files[ key ].optional:
+ if group_incoming is None and not writable_files[ writable_files.keys()[
keys.index( key ) ] ].optional:
dataset.warnings.append( "A required composite file (%s) was not
specified." % ( key ) )
dataset.composite_files[ key ] = None
else:
@@ -372,7 +381,7 @@
dataset.register_temp_file( temp_name )
else:
dataset.composite_files[ key ] = None
- if not writable_files[ key ].optional:
+ if not writable_files[ writable_files.keys()[ keys.index( key ) ]
].optional:
dataset.warnings.append( "A required composite file (%s)
was not specified." % ( key ) )
return [ dataset ]
else:
@@ -404,6 +413,8 @@
Group.__init__( self )
self.test_param = None
self.cases = []
+ self.value_ref = None
+ self.value_ref_in_group = True #When our test_param is not part of the
conditional Group, this is False
def get_current_case( self, value, trans ):
# Convert value to user representation
str_value = self.test_param.filter_value( value, trans )
@@ -460,4 +471,4 @@
class ConditionalWhen( object ):
def __init__( self ):
self.value = None
- self.inputs = None
\ No newline at end of file
+ self.inputs = None
diff -r dacc94994979 -r dd5f1fe8f5e9 templates/tool_form.mako
--- a/templates/tool_form.mako Wed Jul 15 12:18:43 2009 -0400
+++ b/templates/tool_form.mako Wed Jul 15 14:11:35 2009 -0400
@@ -102,7 +102,9 @@
current_case = group_state['__current_case__']
group_prefix = prefix + input.name + "|"
%>
- ${row_for_param( group_prefix, input.test_param, group_state, group_errors,
other_values )}
+ %if input.value_ref_in_group:
+ ${row_for_param( group_prefix, input.test_param, group_state,
group_errors, other_values )}
+ %endif
${do_inputs( input.cases[current_case].inputs, group_state, group_errors,
group_prefix, other_values )}
%elif input.type == "upload_dataset":
%if input.get_datatype( trans, other_values ).composite_type is None: #have
non-composite upload appear as before
diff -r dacc94994979 -r dd5f1fe8f5e9 tools/data_source/upload.xml
--- a/tools/data_source/upload.xml Wed Jul 15 12:18:43 2009 -0400
+++ b/tools/data_source/upload.xml Wed Jul 15 14:11:35 2009 -0400
@@ -1,6 +1,6 @@
<?xml version="1.0"?>
-<tool name="Upload File" id="upload1"
version="1.0.1">
+<tool name="Upload File" id="upload1"
version="1.0.2">
<description>
from your computer
</description>
@@ -15,7 +15,7 @@
</options>
</param>
<param name="async_datasets" type="hidden"
value="None"/>
- <upload_dataset name="files" title="Specify Files for Dataset"
file_type_name="file_type">
+ <upload_dataset name="files" title="Specify Files for Dataset"
file_type_name="file_type" metadata_ref="files_metadata">
<param name="file_data" type="file" size="30"
label="File" ajax-upload="true">
<validator type="expression" message="You will need to reselect
the file you specified (%s)." substitute_value_in_message="True">not ( (
isinstance( value, unicode ) or isinstance( value, str ) ) and value != ""
)</validator> <!-- use validator to post message to user about needing to
reselect the file, since most browsers won't accept the value attribute for file
inputs -->
</param>
@@ -25,6 +25,7 @@
</param>
</upload_dataset>
<param name="dbkey" type="genomebuild"
label="Genome" />
+ <conditional name="files_metadata" title="Specify metadata"
value_from="self:app.datatypes_registry.get_upload_metadata_params"
value_ref="file_type" value_ref_in_group="False" />
<!-- <param name="other_dbkey" type="text" label="Or
user-defined Genome" /> -->
</inputs>
<help>