1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/97f46da40bb3/
Changeset: 97f46da40bb3
User: jmchilton
Date: 2014-12-28 04:46:22+00:00
Summary: Fix remaining tool shed test cases for new escaped values.
I think in this case the values on the page are correct now - but the test framework needs to be updated to handle the escaped value.
Affected #: 1 file
diff -r 461a6a9379001959b5b3259f90a2b9471ce391ed -r 97f46da40bb3767c6cf0d440b1d2d58e06edf5e6 test/tool_shed/base/twilltestcase.py
--- a/test/tool_shed/base/twilltestcase.py
+++ b/test/tool_shed/base/twilltestcase.py
@@ -430,6 +430,8 @@
url = '/admin_toolshed/manage_repository?id=%s' % self.security.encode_id( installed_repository.id )
self.visit_galaxy_url( url )
strings_displayed.append( str( installed_repository.installed_changeset_revision ) )
+ # Every place Galaxy's XXXX tool appears in attribute - need to quote.
+ strings_displayed = map( lambda x: x.replace("'", "'"), strings_displayed )
self.check_for_strings( strings_displayed, strings_not_displayed )
def display_installed_workflow_image( self, repository, workflow_name, strings_displayed=[], strings_not_displayed=[] ):
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/1ed5a63aae0a/
Changeset: 1ed5a63aae0a
User: jmchilton
Date: 2014-12-27 22:35:51+00:00
Summary: Merge next-stable.
Affected #: 9 files
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/web/base/controllers/admin.py
--- a/lib/galaxy/web/base/controllers/admin.py
+++ b/lib/galaxy/web/base/controllers/admin.py
@@ -7,7 +7,7 @@
from galaxy.web.form_builder import CheckboxField
from string import punctuation as PUNCTUATION
import galaxy.queue_worker
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.util import shed_util_common as suc
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/galaxy/controllers/admin.py
--- a/lib/galaxy/webapps/galaxy/controllers/admin.py
+++ b/lib/galaxy/webapps/galaxy/controllers/admin.py
@@ -17,7 +17,7 @@
from galaxy.web.params import QuotaParamParser
from tool_shed.util import common_util
from tool_shed.util import encoding_util
-from markupsafe import escape
+from tool_shed.util.web_util import escape
log = logging.getLogger( __name__ )
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
--- a/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
+++ b/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
@@ -8,9 +8,9 @@
from galaxy.web.form_builder import CheckboxField
from galaxy.util import json
from galaxy.model.orm import or_
-from markupsafe import escape
import tool_shed.repository_types.util as rt_util
+from tool_shed.util.web_util import escape
from tool_shed.util import common_util
from tool_shed.util import encoding_util
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/tool_shed/controllers/admin.py
--- a/lib/galaxy/webapps/tool_shed/controllers/admin.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/admin.py
@@ -3,7 +3,7 @@
from galaxy import util
from galaxy.util import inflector
from galaxy import web
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from galaxy.web.base.controller import BaseUIController
from galaxy.web.base.controllers.admin import Admin
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/tool_shed/controllers/repository.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py
@@ -6,7 +6,6 @@
from time import strftime
from datetime import date
from datetime import datetime
-from markupsafe import escape
from galaxy import util
from galaxy import web
@@ -19,6 +18,7 @@
from tool_shed.capsule import capsule_manager
from tool_shed.dependencies.repository import relation_builder
+from tool_shed.util.web_util import escape
from tool_shed.galaxy_install import dependency_display
from tool_shed.metadata import repository_metadata_manager
from tool_shed.utility_containers import ToolShedUtilityContainerManager
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/tool_shed/controllers/repository_review.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository_review.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository_review.py
@@ -2,7 +2,7 @@
import os
from sqlalchemy.sql.expression import func
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from galaxy import util
from galaxy import web
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/galaxy/webapps/tool_shed/controllers/upload.py
--- a/lib/galaxy/webapps/tool_shed/controllers/upload.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/upload.py
@@ -9,7 +9,7 @@
from galaxy import web
from galaxy.datatypes import checkers
from galaxy.web.base.controller import BaseUIController
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.dependencies import attribute_handlers
from tool_shed.galaxy_install import dependency_display
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/tool_shed/util/repository_util.py
--- a/lib/tool_shed/util/repository_util.py
+++ b/lib/tool_shed/util/repository_util.py
@@ -7,7 +7,7 @@
from galaxy import web
from galaxy.web.form_builder import build_select_field
from galaxy.webapps.tool_shed.model import directory_hash_id
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.dependencies.repository import relation_builder
diff -r 63ec80be64bb00414be0f7122a51907114ffb714 -r 1ed5a63aae0a5c8091483658c579d9d762a4c3fb lib/tool_shed/util/web_util.py
--- /dev/null
+++ b/lib/tool_shed/util/web_util.py
@@ -0,0 +1,21 @@
+from markupsafe import escape as raw_escape
+
+ALLOWED_ELEMENTS = ["<b>", "</b>", "<br/>"]
+ALLOWED_MAP = dict(map(lambda x: (x, raw_escape(x)), ALLOWED_ELEMENTS))
+
+
+def escape( value ):
+ """ A tool shed variant of markupsafe.escape that allows a select few
+ HTML elements that are repeatedly used in messages created deep
+ in the toolshed components. Ideally abstract things would be produced
+ in these components and messages in the views or client side - this is
+ what should be worked toward - but for now - we have this hack.
+
+ >>> escape("A <b>repo</b>")
+ u'A <b>repo</b>'
+ """
+ escaped = str( raw_escape( value ) )
+ # Unescape few selected tags.
+ for key, value in ALLOWED_MAP.iteritems():
+ escaped = escaped.replace(value, key)
+ return escaped
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/93a868eb314d/
Changeset: 93a868eb314d
Branch: next-stable
User: jmchilton
Date: 2014-12-27 22:30:59+00:00
Summary: Fixes for over escaping in c2bed0a.
Fixes dozens of tool functional tests.
Affected #: 9 files
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/web/base/controllers/admin.py
--- a/lib/galaxy/web/base/controllers/admin.py
+++ b/lib/galaxy/web/base/controllers/admin.py
@@ -7,7 +7,7 @@
from galaxy.web.form_builder import CheckboxField
from string import punctuation as PUNCTUATION
import galaxy.queue_worker
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.util import shed_util_common as suc
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/galaxy/controllers/admin.py
--- a/lib/galaxy/webapps/galaxy/controllers/admin.py
+++ b/lib/galaxy/webapps/galaxy/controllers/admin.py
@@ -17,7 +17,7 @@
from galaxy.web.params import QuotaParamParser
from tool_shed.util import common_util
from tool_shed.util import encoding_util
-from markupsafe import escape
+from tool_shed.util.web_util import escape
log = logging.getLogger( __name__ )
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
--- a/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
+++ b/lib/galaxy/webapps/galaxy/controllers/admin_toolshed.py
@@ -8,9 +8,9 @@
from galaxy.web.form_builder import CheckboxField
from galaxy.util import json
from galaxy.model.orm import or_
-from markupsafe import escape
import tool_shed.repository_types.util as rt_util
+from tool_shed.util.web_util import escape
from tool_shed.util import common_util
from tool_shed.util import encoding_util
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/tool_shed/controllers/admin.py
--- a/lib/galaxy/webapps/tool_shed/controllers/admin.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/admin.py
@@ -3,7 +3,7 @@
from galaxy import util
from galaxy.util import inflector
from galaxy import web
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from galaxy.web.base.controller import BaseUIController
from galaxy.web.base.controllers.admin import Admin
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/tool_shed/controllers/repository.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py
@@ -6,7 +6,6 @@
from time import strftime
from datetime import date
from datetime import datetime
-from markupsafe import escape
from galaxy import util
from galaxy import web
@@ -19,6 +18,7 @@
from tool_shed.capsule import capsule_manager
from tool_shed.dependencies.repository import relation_builder
+from tool_shed.util.web_util import escape
from tool_shed.galaxy_install import dependency_display
from tool_shed.metadata import repository_metadata_manager
from tool_shed.utility_containers import ToolShedUtilityContainerManager
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/tool_shed/controllers/repository_review.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository_review.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository_review.py
@@ -2,7 +2,7 @@
import os
from sqlalchemy.sql.expression import func
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from galaxy import util
from galaxy import web
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/galaxy/webapps/tool_shed/controllers/upload.py
--- a/lib/galaxy/webapps/tool_shed/controllers/upload.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/upload.py
@@ -9,7 +9,7 @@
from galaxy import web
from galaxy.datatypes import checkers
from galaxy.web.base.controller import BaseUIController
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.dependencies import attribute_handlers
from tool_shed.galaxy_install import dependency_display
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/tool_shed/util/repository_util.py
--- a/lib/tool_shed/util/repository_util.py
+++ b/lib/tool_shed/util/repository_util.py
@@ -7,7 +7,7 @@
from galaxy import web
from galaxy.web.form_builder import build_select_field
from galaxy.webapps.tool_shed.model import directory_hash_id
-from markupsafe import escape
+from tool_shed.util.web_util import escape
from tool_shed.dependencies.repository import relation_builder
diff -r 501f510ff101849c45d9b0925bb55fe59319088b -r 93a868eb314d257c21bb19380b360bcf0d8dd82c lib/tool_shed/util/web_util.py
--- /dev/null
+++ b/lib/tool_shed/util/web_util.py
@@ -0,0 +1,21 @@
+from markupsafe import escape as raw_escape
+
+ALLOWED_ELEMENTS = ["<b>", "</b>", "<br/>"]
+ALLOWED_MAP = dict(map(lambda x: (x, raw_escape(x)), ALLOWED_ELEMENTS))
+
+
+def escape( value ):
+ """ A tool shed variant of markupsafe.escape that allows a select few
+ HTML elements that are repeatedly used in messages created deep
+ in the toolshed components. Ideally abstract things would be produced
+ in these components and messages in the views or client side - this is
+ what should be worked toward - but for now - we have this hack.
+
+ >>> escape("A <b>repo</b>")
+ u'A <b>repo</b>'
+ """
+ escaped = str( raw_escape( value ) )
+ # Unescape few selected tags.
+ for key, value in ALLOWED_MAP.iteritems():
+ escaped = escaped.replace(value, key)
+ return escaped
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
6 new commits in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/919dd242fd62/
Changeset: 919dd242fd62
Branch: arff_datatype
User: BjoernGruening
Date: 2014-12-16 11:40:29+00:00
Summary: Created new branch arff_datatype
Affected #: 0 files
https://bitbucket.org/galaxy/galaxy-central/commits/807b1e8a7815/
Changeset: 807b1e8a7815
Branch: arff_datatype
User: BjoernGruening
Date: 2014-12-16 12:08:50+00:00
Summary: Add Arff datatype class with sniffer and metadata.
Affected #: 1 file
diff -r 919dd242fd625a71d26b3935cabfbdb9b48c97b4 -r 807b1e8a7815f1514712146e909a508acc31a93d lib/galaxy/datatypes/text.py
--- a/lib/galaxy/datatypes/text.py
+++ b/lib/galaxy/datatypes/text.py
@@ -6,6 +6,7 @@
from galaxy.datatypes.data import Text
from galaxy.datatypes.data import get_file_peek
from galaxy.datatypes.data import nice_size
+from galaxy.datatypes.metadata import MetadataElement
from galaxy import util
import tempfile
@@ -155,3 +156,106 @@
return True
return False
+
+class Arff( Text ):
+ """
+ An ARFF (Attribute-Relation File Format) file is an ASCII text file that describes a list of instances sharing a set of attributes.
+ http://weka.wikispaces.com/ARFF
+ """
+ file_ext = "arff"
+
+
+ """Add metadata elements"""
+ MetadataElement( name="comment_lines", default=0, desc="Number of comment lines", readonly=True, optional=True, no_value=0 )
+ MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=True, no_value=0 )
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ dataset.blurb = "Attribute-Relation File Format (ARFF)"
+ dataset.blurb += ", %s comments, %s attributes" % ( dataset.metadata.comment_lines, dataset.metadata.columns )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disc'
+
+ def sniff( self, filename ):
+ """
+ Try to guess the Arff filetype.
+ It usually starts with a "format-version:" string and has several stanzas which starts with "id:".
+ """
+ with open( filename ) as handle:
+ relation_found = False
+ attribute_found = False
+ prefix = ""
+ for line_count, line in enumerate( handle ):
+ if line_count > 1000:
+ # only investigate the first 1000 lines
+ return False
+ line = line.strip()
+ if not line:
+ continue
+
+ start_string = line[:20].upper()
+ if start_string.startswith("@RELATION"):
+ relation_found = True
+ elif start_string.startswith("@ATTRIBUTE"):
+ attribute_found = True
+ elif start_string.startswith("@DATA"):
+ # @DATA should be the last data block
+ if relation_found and attribute_found:
+ return True
+ return False
+
+ def set_meta( self, dataset, **kwd ):
+ """
+ Trying to count the comment lines and the number of columns included.
+ A typical ARFF data block looks like this:
+ @DATA
+ 5.1,3.5,1.4,0.2,Iris-setosa
+ 4.9,3.0,1.4,0.2,Iris-setosa
+ """
+ if dataset.has_data():
+ comment_lines = 0
+ first_real_line = False
+ data_block = False
+ with open( dataset.file_name ) as handle:
+ for line in handle:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('%') and not first_real_line:
+ comment_lines += 1
+ else:
+ first_real_line = True
+ if data_block:
+ if line.startswith('{'):
+ # Sparse representation
+ """
+ @data
+ 0, X, 0, Y, "class A", {5}
+ or
+ @data
+ {1 X, 3 Y, 4 "class A"}, {5}
+ """
+ token = line.split('}',1)
+ first_part = token[0]
+ last_column = first_part.split(',')[-1].strip()
+ numeric_value = last_column.split()[0]
+ column_count = int(numeric_value)
+ if len(token) > 1:
+ # we have an additional weight
+ column_count -= 1
+ else:
+ columns = line.strip().split(',')
+ column_count = len(columns)
+ if columns[-1].strip().startswith('{'):
+ # we have an additional weight at the end
+ column_count -= 1
+
+ # We have now the column_count and we know the initial comment lines. So we can terminate here.
+ break
+ if line[:5].upper() == "@DATA":
+ data_block = True
+ dataset.metadata.comment_lines = comment_lines
+ dataset.metadata.columns = column_count
+
https://bitbucket.org/galaxy/galaxy-central/commits/a876fb01dba3/
Changeset: a876fb01dba3
Branch: arff_datatype
User: BjoernGruening
Date: 2014-12-16 12:16:00+00:00
Summary: Add Arff datatype and sniffer to datatypes_conf.xml.
Affected #: 1 file
diff -r 807b1e8a7815f1514712146e909a508acc31a93d -r a876fb01dba3595d68ae99fea8e2070fa3ffad55 config/datatypes_conf.xml.sample
--- a/config/datatypes_conf.xml.sample
+++ b/config/datatypes_conf.xml.sample
@@ -3,6 +3,7 @@
<registration converters_path="lib/galaxy/datatypes/converters" display_path="display_applications"><datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true" description="A binary sequence file in 'ab1' format with a '.ab1' file extension. You must manually select this 'File Format' when uploading the file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Ab1"/><datatype extension="afg" type="galaxy.datatypes.assembly:Amos" display_in_upload="false" />
+ <!--datatype extension="arff" type="galaxy.datatypes.text:Arff" mimetype="text/plain" display_in_upload="True"/--><datatype extension="asn1" type="galaxy.datatypes.data:GenericAsn1" mimetype="text/plain" display_in_upload="true" /><datatype extension="asn1-binary" type="galaxy.datatypes.binary:GenericAsn1Binary" mimetype="application/octet-stream" display_in_upload="true" /><datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true" description="blastz pairwise alignment format. Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines. The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Axt"/>
@@ -298,6 +299,7 @@
<sniffer type="galaxy.datatypes.data:Newick"/><sniffer type="galaxy.datatypes.data:Nexus"/><sniffer type="galaxy.datatypes.text:Obo"/>
+ <!--sniffer type="galaxy.datatypes.text.Arff"/--><sniffer type="galaxy.datatypes.text:Ipynb"/><sniffer type="galaxy.datatypes.text:Json"/><sniffer type="galaxy.datatypes.images:Jpg"/>
https://bitbucket.org/galaxy/galaxy-central/commits/d91d8e8beada/
Changeset: d91d8e8beada
Branch: arff_datatype
User: BjoernGruening
Date: 2014-12-23 00:27:36+00:00
Summary: Activate arff datatype by default
Affected #: 1 file
diff -r a876fb01dba3595d68ae99fea8e2070fa3ffad55 -r d91d8e8beadac2a187645f86c199119064a5ccb0 config/datatypes_conf.xml.sample
--- a/config/datatypes_conf.xml.sample
+++ b/config/datatypes_conf.xml.sample
@@ -3,7 +3,7 @@
<registration converters_path="lib/galaxy/datatypes/converters" display_path="display_applications"><datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true" description="A binary sequence file in 'ab1' format with a '.ab1' file extension. You must manually select this 'File Format' when uploading the file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Ab1"/><datatype extension="afg" type="galaxy.datatypes.assembly:Amos" display_in_upload="false" />
- <!--datatype extension="arff" type="galaxy.datatypes.text:Arff" mimetype="text/plain" display_in_upload="True"/-->
+ <datatype extension="arff" type="galaxy.datatypes.text:Arff" mimetype="text/plain" display_in_upload="True" /><datatype extension="asn1" type="galaxy.datatypes.data:GenericAsn1" mimetype="text/plain" display_in_upload="true" /><datatype extension="asn1-binary" type="galaxy.datatypes.binary:GenericAsn1Binary" mimetype="application/octet-stream" display_in_upload="true" /><datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true" description="blastz pairwise alignment format. Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines. The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Axt"/>
@@ -299,7 +299,7 @@
<sniffer type="galaxy.datatypes.data:Newick"/><sniffer type="galaxy.datatypes.data:Nexus"/><sniffer type="galaxy.datatypes.text:Obo"/>
- <!--sniffer type="galaxy.datatypes.text.Arff"/-->
+ <sniffer type="galaxy.datatypes.text.Arff"/><sniffer type="galaxy.datatypes.text:Ipynb"/><sniffer type="galaxy.datatypes.text:Json"/><sniffer type="galaxy.datatypes.images:Jpg"/>
https://bitbucket.org/galaxy/galaxy-central/commits/63ec80be64bb/
Changeset: 63ec80be64bb
User: jmchilton
Date: 2014-12-26 22:33:02+00:00
Summary: Merge pull request #614.
Affected #: 2 files
diff -r 338a32cc6067d92055324e4b827697051ec99254 -r 63ec80be64bb00414be0f7122a51907114ffb714 config/datatypes_conf.xml.sample
--- a/config/datatypes_conf.xml.sample
+++ b/config/datatypes_conf.xml.sample
@@ -3,6 +3,7 @@
<registration converters_path="lib/galaxy/datatypes/converters" display_path="display_applications"><datatype extension="ab1" type="galaxy.datatypes.binary:Ab1" mimetype="application/octet-stream" display_in_upload="true" description="A binary sequence file in 'ab1' format with a '.ab1' file extension. You must manually select this 'File Format' when uploading the file." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Ab1"/><datatype extension="afg" type="galaxy.datatypes.assembly:Amos" display_in_upload="false" />
+ <datatype extension="arff" type="galaxy.datatypes.text:Arff" mimetype="text/plain" display_in_upload="True" /><datatype extension="asn1" type="galaxy.datatypes.data:GenericAsn1" mimetype="text/plain" display_in_upload="true" /><datatype extension="asn1-binary" type="galaxy.datatypes.binary:GenericAsn1Binary" mimetype="application/octet-stream" display_in_upload="true" /><datatype extension="axt" type="galaxy.datatypes.sequence:Axt" display_in_upload="true" description="blastz pairwise alignment format. Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines. Blocks are separated from one another by blank lines. The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Axt"/>
@@ -298,6 +299,7 @@
<sniffer type="galaxy.datatypes.data:Newick"/><sniffer type="galaxy.datatypes.data:Nexus"/><sniffer type="galaxy.datatypes.text:Obo"/>
+ <sniffer type="galaxy.datatypes.text.Arff"/><sniffer type="galaxy.datatypes.text:Ipynb"/><sniffer type="galaxy.datatypes.text:Json"/><sniffer type="galaxy.datatypes.images:Jpg"/>
diff -r 338a32cc6067d92055324e4b827697051ec99254 -r 63ec80be64bb00414be0f7122a51907114ffb714 lib/galaxy/datatypes/text.py
--- a/lib/galaxy/datatypes/text.py
+++ b/lib/galaxy/datatypes/text.py
@@ -6,6 +6,7 @@
from galaxy.datatypes.data import Text
from galaxy.datatypes.data import get_file_peek
from galaxy.datatypes.data import nice_size
+from galaxy.datatypes.metadata import MetadataElement
from galaxy import util
import tempfile
@@ -155,3 +156,106 @@
return True
return False
+
+class Arff( Text ):
+ """
+ An ARFF (Attribute-Relation File Format) file is an ASCII text file that describes a list of instances sharing a set of attributes.
+ http://weka.wikispaces.com/ARFF
+ """
+ file_ext = "arff"
+
+
+ """Add metadata elements"""
+ MetadataElement( name="comment_lines", default=0, desc="Number of comment lines", readonly=True, optional=True, no_value=0 )
+ MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=True, no_value=0 )
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ dataset.blurb = "Attribute-Relation File Format (ARFF)"
+ dataset.blurb += ", %s comments, %s attributes" % ( dataset.metadata.comment_lines, dataset.metadata.columns )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disc'
+
+ def sniff( self, filename ):
+ """
+ Try to guess the Arff filetype.
+ It usually starts with a "format-version:" string and has several stanzas which starts with "id:".
+ """
+ with open( filename ) as handle:
+ relation_found = False
+ attribute_found = False
+ prefix = ""
+ for line_count, line in enumerate( handle ):
+ if line_count > 1000:
+ # only investigate the first 1000 lines
+ return False
+ line = line.strip()
+ if not line:
+ continue
+
+ start_string = line[:20].upper()
+ if start_string.startswith("@RELATION"):
+ relation_found = True
+ elif start_string.startswith("@ATTRIBUTE"):
+ attribute_found = True
+ elif start_string.startswith("@DATA"):
+ # @DATA should be the last data block
+ if relation_found and attribute_found:
+ return True
+ return False
+
+ def set_meta( self, dataset, **kwd ):
+ """
+ Trying to count the comment lines and the number of columns included.
+ A typical ARFF data block looks like this:
+ @DATA
+ 5.1,3.5,1.4,0.2,Iris-setosa
+ 4.9,3.0,1.4,0.2,Iris-setosa
+ """
+ if dataset.has_data():
+ comment_lines = 0
+ first_real_line = False
+ data_block = False
+ with open( dataset.file_name ) as handle:
+ for line in handle:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('%') and not first_real_line:
+ comment_lines += 1
+ else:
+ first_real_line = True
+ if data_block:
+ if line.startswith('{'):
+ # Sparse representation
+ """
+ @data
+ 0, X, 0, Y, "class A", {5}
+ or
+ @data
+ {1 X, 3 Y, 4 "class A"}, {5}
+ """
+ token = line.split('}',1)
+ first_part = token[0]
+ last_column = first_part.split(',')[-1].strip()
+ numeric_value = last_column.split()[0]
+ column_count = int(numeric_value)
+ if len(token) > 1:
+ # we have an additional weight
+ column_count -= 1
+ else:
+ columns = line.strip().split(',')
+ column_count = len(columns)
+ if columns[-1].strip().startswith('{'):
+ # we have an additional weight at the end
+ column_count -= 1
+
+ # We have now the column_count and we know the initial comment lines. So we can terminate here.
+ break
+ if line[:5].upper() == "@DATA":
+ data_block = True
+ dataset.metadata.comment_lines = comment_lines
+ dataset.metadata.columns = column_count
+
https://bitbucket.org/galaxy/galaxy-central/commits/3ce3ebfdd734/
Changeset: 3ce3ebfdd734
Branch: arff_datatype
User: jmchilton
Date: 2014-12-26 22:34:57+00:00
Summary: Close arff_datatype.
Affected #: 0 files
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.