Hi,
I have the following GFF file from a SNAP
X1 SNAP Einit 2579 2712 -3.221 + . X1-snap.1
X1 SNAP Exon 2813 2945 4.836 + . X1-snap.1
X1 SNAP Eterm 3013 3033 10.467 + . X1-snap.1
X1 SNAP Esngl 3457 3702 -17.856 + . X1-snap.2
X1 SNAP Einit 4901 4974 -4.954 + . X1-snap.3
X1 SNAP Eterm 5021 5150 14.231 + . X1-snap.3
X1 SNAP Einit 6245 7325 -1.525 - . X1-snap.4
X1 SNAP Eterm 5974 6008 5.398 - . X1-snap.4
With the code below I have tried to parse the above GFF file
from BCBio import GFF
from pprint import pprint
from BCBio.GFF import GFFExaminer
def retrieve_pred_genes_data():
with open("test/X1_small.snap.gff") as sf:
#examiner = GFFExaminer()
#pprint(examiner.available_limits(sf))
for rec in GFF.parse(sf):
pprint(rec.id)
pprint(rec.description)
pprint(rec.name)
pprint(rec.features)
#pprint(rec.type) #'SeqRecord' object has no attribute
#pprint(rec.ref) #'SeqRecord' object has no attribute
#pprint(rec.ref_db) #'SeqRecord' object has no attribute
#pprint(rec.location) #'SeqRecord' object has no attribute
#pprint(rec.location_operator) #'SeqRecord' object has no attribute
#pprint(rec.strand) #'SeqRecord' object has no attribute
#pprint(rec.sub_features) #'SeqRecord' object has no attribute
retrieve_pred_genes_data()
and got the following output:
'X1'
'<unknown description>'
'<unknown name>'
[SeqFeature(FeatureLocation(ExactPosition(2578), ExactPosition(2712), strand=1), type='Einit'),
SeqFeature(FeatureLocation(ExactPosition(2812), ExactPosition(2945), strand=1), type='Exon'),
SeqFeature(FeatureLocation(ExactPosition(3012), ExactPosition(3033), strand=1), type='Eterm'),
SeqFeature(FeatureLocation(ExactPosition(3456), ExactPosition(3702), strand=1), type='Esngl'),
SeqFeature(FeatureLocation(ExactPosition(4900), ExactPosition(4974), strand=1), type='Einit'),
SeqFeature(FeatureLocation(ExactPosition(5020), ExactPosition(5150), strand=1), type='Eterm'),
SeqFeature(FeatureLocation(ExactPosition(6160), ExactPosition(7325), strand=-1), type='Einit'),
SeqFeature(FeatureLocation(ExactPosition(5973), ExactPosition(6008), strand=-1), type='Eterm')]
and with GFFExaminer I got these:
{'gff_id': {('X1',): 8},
'gff_source': {('SNAP',): 8},
'gff_source_type': {('SNAP', 'Einit'): 3,
('SNAP', 'Esngl'): 1,
('SNAP', 'Eterm'): 3,
('SNAP', 'Exon'): 1},
'gff_type': {('Einit',): 3, ('Esngl',): 1, ('Eterm',): 3, ('Exon',): 1}}
I found these examples ( https://github.com/patena/jonikaslab-mutant-pools/blob/master/notes_on_GFF_parsing.txt ), but I got these kind of errors:
#pprint(rec.type) #'SeqRecord' object has no attribute
#pprint(rec.ref) #'SeqRecord' object has no attribute
#pprint(rec.ref_db) #'SeqRecord' object has no attribute
#pprint(rec.location) #'SeqRecord' object has no attribute
#pprint(rec.location_operator) #'SeqRecord' object has no attribute
#pprint(rec.strand) #'SeqRecord' object has no attribute
#pprint(rec.sub_features) #'SeqRecord' object has no attribute
What did I do wrong and how is it possible to access all fields in the above GFF file?
Thank you in advance.
Mic