# HG changeset patch -- Bitbucket.org # Project galaxy-dist # URL http://bitbucket.org/galaxy/galaxy-dist/overview # User jeremy goecks <jeremy.goecks@emory.edu> # Date 1289243703 18000 # Node ID c61ec9e333d5a0ade7a23b3f4b55ae3deb3aa0bb # Parent 629e14c5f561446c963b05560f245e2ed7041ff0 Enable gff2bed tool to convert GFF3 files to BED and add functional test for this conversion. --- a/tools/filters/gff2bed.xml +++ b/tools/filters/gff2bed.xml @@ -16,6 +16,11 @@ <param name="input" value="gff2bed_in2.gff" ftype="gff"/><output name="out_file1" file="gff2bed_out2.bed"/></test> + <test> + <!-- Test conversion of gff3 file. --> + <param name="input" value="5.gff3" ftype="gff"/> + <output name="out_file1" file="gff2bed_out3.bed"/> + </test></tests><help> --- /dev/null +++ b/test-data/gff2bed_out3.bed @@ -0,0 +1,145 @@ +ctgA 22131 24633 my_feature 0 + +ctgA 46989 48410 my_feature 0 - +ctgA 44704 47713 my_feature 0 - +ctgA 36648 40440 my_feature 0 - +ctgA 23071 23185 my_feature 0 + +ctgA 37241 38653 my_feature 0 + +ctgA 37496 40559 motif 0 - +ctgA 36033 38167 my_feature 0 + +ctgA 28331 30033 motif 0 - +ctgA 4714 5968 my_feature 0 - +ctgA 48252 48366 motif 0 + +ctgA 999 20000 BAC 0 + +ctgA 19499 20000 right_end_read 0 - +ctgA 999 1500 left_end_read 0 + +ctgA 13800 14007 motif 0 - +ctgA 1049 9000 coding 0 + +ctgA 1200 1500 CDS 0 + +ctgA 2999 3902 CDS 0 + +ctgA 4999 5500 CDS 0 + +ctgA 6999 7608 CDS 0 + +ctgA 1049 9000 processed_transcript 0 + +ctgA 1049 1200 5'-UTR 0 + +ctgA 7608 9000 3'-UTR 0 + +ctgA 5409 7503 match 0 - +ctgA 6999 7503 HSP 0 - +ctgA 5409 5500 HSP 0 - +ctgA 46011 48851 motif 0 + +ctgA 6884 8999 match 0 - +ctgA 8305 8999 HSP 0 - +ctgA 8054 8080 HSP 0 - +ctgA 7409 7737 HSP 0 - +ctgA 6884 7241 HSP 0 - +ctgA 13279 16394 my_feature 0 + +ctgA 29770 32937 match 0 + +ctgA 29770 29942 HSP 0 + +ctgA 30041 30340 HSP 0 + +ctgA 30809 31307 HSP 0 + +ctgA 31760 31984 HSP 0 + +ctgA 32373 32937 HSP 0 + +ctgA 36615 37227 match 0 - +ctgA 37207 37227 HSP 0 - +ctgA 36615 37057 HSP 0 - +ctgA 11910 15561 motif 0 + +ctgA 1049 3202 match 0 + +ctgA 1049 1500 HSP 0 + +ctgA 2999 3202 HSP 0 + +ctgA 15395 16159 motif 0 + +ctgA 1149 7200 match 0 + +ctgA 1149 1500 HSP 0 + +ctgA 4999 5500 HSP 0 + +ctgA 6999 7200 HSP 0 + +ctgA 1049 7300 match 0 + +ctgA 1049 1500 HSP 0 + +ctgA 4999 5500 HSP 0 + +ctgA 6999 7300 HSP 0 + +ctgA 19156 22915 my_feature 0 - +ctgA 7999 9000 match 0 - +ctgA 7999 9000 HSP 0 - +ctgA 28341 28447 motif 0 - +ctgA 17666 17690 motif 0 + +ctgA 44400 45925 trace 0 + +ctgA 26121 34466 match 0 + +ctgA 26121 26126 HSP 0 + +ctgA 26496 26869 HSP 0 + +ctgA 27200 27325 HSP 0 + +ctgA 27371 27433 HSP 0 + +ctgA 27564 27565 HSP 0 + +ctgA 27812 28091 HSP 0 + +ctgA 28092 28201 HSP 0 + +ctgA 28328 28377 HSP 0 + +ctgA 28828 29194 HSP 0 + +ctgA 29516 29702 HSP 0 + +ctgA 29712 30061 HSP 0 + +ctgA 30328 30774 HSP 0 + +ctgA 30807 31306 HSP 0 + +ctgA 31515 31729 HSP 0 + +ctgA 31752 32154 HSP 0 + +ctgA 32594 32696 HSP 0 + +ctgA 32891 32901 HSP 0 + +ctgA 33126 33388 HSP 0 + +ctgA 33438 33443 HSP 0 + +ctgA 33758 34209 HSP 0 + +ctgA 34400 34466 HSP 0 + +ctgA 0 50000 contig 0 + +ctgA 41136 47829 match 0 - +ctgA 47448 47829 HSP 0 - +ctgA 46815 46992 HSP 0 - +ctgA 46091 46318 HSP 0 - +ctgA 45789 46022 HSP 0 - +ctgA 45230 45488 HSP 0 - +ctgA 44762 45030 HSP 0 - +ctgA 44064 44556 HSP 0 - +ctgA 43394 43811 HSP 0 - +ctgA 42889 43270 HSP 0 - +ctgA 42056 42474 HSP 0 - +ctgA 41753 41948 HSP 0 - +ctgA 41136 41318 HSP 0 - +ctgA 12530 15870 match 0 + +ctgA 12530 12895 HSP 0 + +ctgA 13121 13449 HSP 0 + +ctgA 13451 13745 HSP 0 + +ctgA 13907 13965 HSP 0 + +ctgA 13997 14488 HSP 0 + +ctgA 14563 14899 HSP 0 + +ctgA 15184 15276 HSP 0 + +ctgA 15638 15736 HSP 0 + +ctgA 15744 15870 HSP 0 + +ctgA 7499 8000 match 0 - +ctgA 7499 8000 HSP 0 - +ctgA 1299 9000 coding 0 + +ctgA 3300 3902 CDS 0 + +ctgA 4999 5500 CDS 0 + +ctgA 6999 7600 CDS 0 + +ctgA 1299 9000 processed_transcript 0 + +ctgA 1299 1500 5'-UTR 0 + +ctgA 2999 3300 5'-UTR 0 + +ctgA 7600 9000 3'-UTR 0 + +ctgA 26502 35904 match 0 - +ctgA 35641 35904 HSP 0 - +ctgA 35332 35507 HSP 0 - +ctgA 34604 34983 HSP 0 - +ctgA 34243 34313 HSP 0 - +ctgA 33437 33868 HSP 0 - +ctgA 33052 33325 HSP 0 - +ctgA 32207 32680 HSP 0 - +ctgA 32009 32057 HSP 0 - +ctgA 31420 31817 HSP 0 - +ctgA 31231 31236 HSP 0 - +ctgA 30464 30798 HSP 0 - +ctgA 30107 30216 HSP 0 - +ctgA 29512 29647 HSP 0 - +ctgA 28776 29058 HSP 0 - +ctgA 28224 28316 HSP 0 - +ctgA 27886 28076 HSP 0 - +ctgA 27447 27860 HSP 0 - +ctgA 27171 27185 HSP 0 - +ctgA 26502 26799 HSP 0 - +ctgA 49405 50000 match 0 + +ctgA 49405 49476 HSP 0 + +ctgA 49761 50000 HSP 0 + +ctgA 1049 9000 gene 0 + +ctgA 33324 35791 motif 0 + +ctgA 31784 32359 match 0 + +ctgA 31784 31939 HSP 0 + +ctgA 32328 32359 HSP 0 + --- a/lib/galaxy/tools/util/gff_util.py +++ b/lib/galaxy/tools/util/gff_util.py @@ -42,13 +42,18 @@ def convert_gff_coords_to_bed( interval def parse_gff_attributes( attr_str ): """ - Parses a GFF attribute string and returns a dictionary of name-value pairs. - The general format for a GFF attribute string is name1 "value1" ; name2 "value2" + Parses a GFF/GTF attribute string and returns a dictionary of name-value pairs. + The general format for a GFF3 attributes string is name1=value1;name2=value2 + The general format for a GTF attribute string is name1 "value1" ; name2 "value2" """ attributes_list = attr_str.split(";") attributes = {} for name_value_pair in attributes_list: + # Try splitting by space and, if necessary, by '=' sign. pair = name_value_pair.strip().split(" ") + if len( pair ) == 1: + pair = name_value_pair.strip().split("=") + print pair if pair == '': continue name = pair[0].strip()