3 new changesets in galaxy-central: http://bitbucket.org/galaxy/galaxy-central/changeset/83c4366e0641/ changeset: 83c4366e0641 branch: xml_filetype user: peterjc date: 2011-06-06 12:48:17 summary: Implement GenericXml filetype ('xml'). Closes issue #568 affected #: 2 files (1.3 KB) --- a/lib/galaxy/datatypes/registry.py Fri Jun 03 17:00:20 2011 -0400 +++ b/lib/galaxy/datatypes/registry.py Mon Jun 06 11:48:17 2011 +0100 @@ -180,7 +180,8 @@ 'tabular' : tabular.Tabular(), 'taxonomy' : tabular.Taxonomy(), 'txt' : data.Text(), - 'wig' : interval.Wiggle() + 'wig' : interval.Wiggle(), + 'xml' : xml.GenericXml(), } self.mimetypes_by_extension = { 'ab1' : 'application/octet-stream', @@ -211,7 +212,8 @@ 'tabular' : 'text/plain', 'taxonomy' : 'text/plain', 'txt' : 'text/plain', - 'wig' : 'text/plain' + 'wig' : 'text/plain', + 'xml' : 'application/xml', } # super supertype fix for input steps in workflows. if 'data' not in self.datatypes_by_extension: @@ -224,6 +226,7 @@ binary.Bam(), binary.Sff(), xml.BlastXml(), + xml.GenericXml(), sequence.Maf(), sequence.Lav(), sequence.csFasta(), --- a/lib/galaxy/datatypes/xml.py Fri Jun 03 17:00:20 2011 -0400 +++ b/lib/galaxy/datatypes/xml.py Mon Jun 06 11:48:17 2011 +0100 @@ -7,7 +7,43 @@ log = logging.getLogger(__name__) -class BlastXml( data.Text ): +class GenericXml( data.Text ): + """Base format class for any XML file.""" + file_ext = "xml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'XML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is XML or not + + >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) + >>> GenericXml().sniff( fname ) + True + >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' ) + >>> BlastXml().sniff( fname ) + True + >>> fname = get_test_fname( 'interval.interval' ) + >>> GenericXml().sniff( fname ) + False + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + line = handle.readline() + handle.close() + + #TODO - Is there a more robust way to do this? + return line.startswith('<?xml ') + + +class BlastXml( GenericXml ): """NCBI Blast XML Output data""" file_ext = "blastxml" @@ -52,7 +88,7 @@ return True -class MEMEXml( data.Text ): +class MEMEXml( GenericXml ): """MEME XML Output data""" file_ext = "memexml" http://bitbucket.org/galaxy/galaxy-central/changeset/b005f35a1104/ changeset: b005f35a1104 branch: xml_filetype user: natefoo date: 2011-08-15 19:27:43 summary: Close the xml_filetype branch. affected #: 0 files (0 bytes) http://bitbucket.org/galaxy/galaxy-central/changeset/6165799c4e49/ changeset: 6165799c4e49 user: natefoo date: 2011-08-15 19:28:40 summary: Merged the xml_filetype branch from Peter Cock, which adds a generic XML datatype. affected #: 2 files (1.3 KB) --- a/lib/galaxy/datatypes/registry.py Mon Aug 15 09:18:52 2011 -0400 +++ b/lib/galaxy/datatypes/registry.py Mon Aug 15 13:28:40 2011 -0400 @@ -170,7 +170,8 @@ 'tabular' : tabular.Tabular(), 'taxonomy' : tabular.Taxonomy(), 'txt' : data.Text(), - 'wig' : interval.Wiggle() + 'wig' : interval.Wiggle(), + 'xml' : xml.GenericXml(), } self.mimetypes_by_extension = { 'ab1' : 'application/octet-stream', @@ -201,7 +202,8 @@ 'tabular' : 'text/plain', 'taxonomy' : 'text/plain', 'txt' : 'text/plain', - 'wig' : 'text/plain' + 'wig' : 'text/plain', + 'xml' : 'application/xml', } # super supertype fix for input steps in workflows. if 'data' not in self.datatypes_by_extension: @@ -214,6 +216,7 @@ binary.Bam(), binary.Sff(), xml.BlastXml(), + xml.GenericXml(), sequence.Maf(), sequence.Lav(), sequence.csFasta(), --- a/lib/galaxy/datatypes/xml.py Mon Aug 15 09:18:52 2011 -0400 +++ b/lib/galaxy/datatypes/xml.py Mon Aug 15 13:28:40 2011 -0400 @@ -7,7 +7,43 @@ log = logging.getLogger(__name__) -class BlastXml( data.Text ): +class GenericXml( data.Text ): + """Base format class for any XML file.""" + file_ext = "xml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'XML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is XML or not + + >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) + >>> GenericXml().sniff( fname ) + True + >>> fname = get_test_fname( 'tblastn_four_human_vs_rhodopsin.xml' ) + >>> BlastXml().sniff( fname ) + True + >>> fname = get_test_fname( 'interval.interval' ) + >>> GenericXml().sniff( fname ) + False + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + line = handle.readline() + handle.close() + + #TODO - Is there a more robust way to do this? + return line.startswith('<?xml ') + + +class BlastXml( GenericXml ): """NCBI Blast XML Output data""" file_ext = "blastxml" @@ -52,7 +88,7 @@ return True -class MEMEXml( data.Text ): +class MEMEXml( GenericXml ): """MEME XML Output data""" file_ext = "memexml" Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.