details: http://www.bx.psu.edu/hg/galaxy/rev/b5391bd3e9c7 changeset: 3784:b5391bd3e9c7 user: jeremy goecks <jeremy.goecks@emory.edu> date: Fri May 14 10:15:48 2010 -0400 description: Add GFF filtering tool. Tool filters GFF file based on an attribute value. diffstat: test-data/gff_filtering_in1.gff | 200 +++++++++++++++++++++++++++++++++++++++ test-data/gff_filtering_out1.gff | 46 ++++++++ tool_conf.xml.sample | 13 +- tools/ngs_rna/gff_filtering.py | 137 ++++++++++++++++++++++++++ tools/ngs_rna/gff_filtering.xml | 58 +++++++++++ 5 files changed, 449 insertions(+), 5 deletions(-) diffs (481 lines): diff -r e972d3f8413e -r b5391bd3e9c7 test-data/gff_filtering_in1.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gff_filtering_in1.gff Fri May 14 10:15:48 2010 -0400 @@ -0,0 +1,200 @@ +chr1 Cufflinks exon 21199802 21199861 1000 + . gene_id "CUFF.1383"; transcript_id "CUFF.1383.1"; exon_number "1"; FPKM "5.0037092300"; frac "1.000000"; conf_lo "0.000000"; conf_hi "15.011128"; cov "0.284211"; +chr1 Cufflinks transcript 55716264 55716295 1000 . . gene_id "CUFF.5021"; transcript_id "CUFF.5021.1"; FPKM "29.7095235531"; frac "1.000000"; conf_lo "0.000000"; conf_hi "71.725135"; cov "1.687500"; +chr1 Cufflinks transcript 74904128 74904154 1000 . . gene_id "CUFF.7563"; transcript_id "CUFF.7563.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr1 Cufflinks exon 134487086 134487123 1000 . . gene_id "CUFF.11837"; transcript_id "CUFF.11837.1"; exon_number "1"; FPKM "25.0185461500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "60.400113"; cov "1.421053"; +chr1 Cufflinks exon 136200923 136200959 1000 . . gene_id "CUFF.12099"; transcript_id "CUFF.12099.1"; exon_number "1"; FPKM "38.5420846095"; frac "1.000000"; conf_lo "0.000000"; conf_hi "83.046650"; cov "2.189189"; +chr1 Cufflinks transcript 138116837 138116866 1000 . . gene_id "CUFF.12557"; transcript_id "CUFF.12557.1"; FPKM "31.6901584567"; frac "1.000000"; conf_lo "0.000000"; conf_hi "76.506810"; cov "1.800000"; +chr1 Cufflinks exon 152240694 152240779 1000 . . gene_id "CUFF.13107"; transcript_id "CUFF.13107.1"; exon_number "1"; FPKM "16.5820596576"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.729373"; cov "0.941860"; +chr1 Cufflinks exon 157633998 157634024 1000 . . gene_id "CUFF.13967"; transcript_id "CUFF.13967.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr1 Cufflinks exon 167704819 167704854 1000 . . gene_id "CUFF.15347"; transcript_id "CUFF.15347.1"; exon_number "1"; FPKM "26.4084653806"; frac "1.000000"; conf_lo "0.000000"; conf_hi "63.755675"; cov "1.500000"; +chr1 Cufflinks transcript 184245403 184245429 1000 . . gene_id "CUFF.17679"; transcript_id "CUFF.17679.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr10 Cufflinks exon 12731734 12731826 1000 . . gene_id "CUFF.19287"; transcript_id "CUFF.19287.1"; exon_number "1"; FPKM "10.2226317602"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.679616"; cov "0.580645"; +chr10 Cufflinks transcript 42305464 42305515 1000 . . gene_id "CUFF.21709"; transcript_id "CUFF.21709.1"; FPKM "18.2827837250"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.138544"; cov "1.038462"; +chr10 Cufflinks exon 52431636 52431662 1000 . . gene_id "CUFF.22495"; transcript_id "CUFF.22495.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr10 Cufflinks transcript 62044837 62045189 1000 . . gene_id "CUFF.23531"; transcript_id "CUFF.23531.1"; FPKM "19.5178121606"; frac "1.000000"; conf_lo "9.264456"; conf_hi "29.771168"; cov "1.108611"; +chr10 Cufflinks transcript 75372919 75373002 1000 . . gene_id "CUFF.24985"; transcript_id "CUFF.24985.1"; FPKM "124.4970510798"; frac "1.000000"; conf_lo "71.411330"; conf_hi "177.582772"; cov "7.071429"; +chr10 Cufflinks transcript 80362428 80363292 1000 - . gene_id "CUFF.26065"; transcript_id "CUFF.26065.1"; FPKM "43.6170921216"; frac "1.000000"; conf_lo "32.260169"; conf_hi "54.974016"; cov "2.477449"; +chr10 Cufflinks exon 87908564 87908597 1000 + . gene_id "CUFF.27209"; transcript_id "CUFF.27209.1"; exon_number "2"; FPKM "11.4913582411"; frac "1.000000"; conf_lo "0.000000"; conf_hi "30.264098"; cov "0.652709"; +chr10 Cufflinks exon 89142681 89142707 1000 . . gene_id "CUFF.27307"; transcript_id "CUFF.27307.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr10 Cufflinks exon 93425781 93425807 1000 . . gene_id "CUFF.28101"; transcript_id "CUFF.28101.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr10 Cufflinks exon 108207916 108207986 1000 . . gene_id "CUFF.29319"; transcript_id "CUFF.29319.1"; exon_number "1"; FPKM "13.3902077986"; frac "1.000000"; conf_lo "0.000000"; conf_hi "32.326821"; cov "0.760563"; +chr10 Cufflinks transcript 117583959 117584039 1000 . . gene_id "CUFF.30522"; transcript_id "CUFF.30522.1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "37.934890"; cov "1.000000"; +chr11 Cufflinks transcript 3357264 3357347 1000 . . gene_id "CUFF.32520"; transcript_id "CUFF.32520.1"; FPKM "16.9768706018"; frac "1.000000"; conf_lo "0.000000"; conf_hi "36.580072"; cov "0.964286"; +chr11 Cufflinks transcript 5005248 5005351 1000 . . gene_id "CUFF.33024"; transcript_id "CUFF.33024.1"; FPKM "9.1413918625"; frac "1.000000"; conf_lo "0.000000"; conf_hi "22.069272"; cov "0.519231"; +chr11 Cufflinks transcript 7904565 7904642 1000 . . gene_id "CUFF.33508"; transcript_id "CUFF.33508.1"; FPKM "61.6484988869"; frac "1.000000"; conf_lo "22.882428"; conf_hi "100.414569"; cov "3.501633"; +chr11 Cufflinks exon 49932116 49932142 1000 . . gene_id "CUFF.37546"; transcript_id "CUFF.37546.1"; exon_number "1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.816931"; cov "1.000000"; +chr11 Cufflinks transcript 60046420 60046446 1000 . . gene_id "CUFF.39152"; transcript_id "CUFF.39152.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr11 Cufflinks exon 78140156 78140259 1000 . . gene_id "CUFF.43148"; transcript_id "CUFF.43148.1"; exon_number "1"; FPKM "54.8483511750"; frac "1.000000"; conf_lo "23.181641"; conf_hi "86.515061"; cov "3.115385"; +chr11 Cufflinks transcript 79846512 79846547 1000 . . gene_id "CUFF.43570"; transcript_id "CUFF.43570.1"; FPKM "26.4084653806"; frac "1.000000"; conf_lo "0.000000"; conf_hi "63.755675"; cov "1.500000"; +chr11 Cufflinks transcript 85583926 85583952 1000 . . gene_id "CUFF.44478"; transcript_id "CUFF.44478.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr11 Cufflinks transcript 98554214 98554301 1000 . . gene_id "CUFF.46823"; transcript_id "CUFF.46823.1"; FPKM "10.8034631102"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.081867"; cov "0.613636"; +chr11 Cufflinks transcript 100797512 100797539 1000 . . gene_id "CUFF.47191"; transcript_id "CUFF.47191.1"; FPKM "33.9537412036"; frac "1.000000"; conf_lo "0.000000"; conf_hi "81.971583"; cov "1.928571"; +chr11 Cufflinks exon 105616462 105616737 1000 . . gene_id "CUFF.48385"; transcript_id "CUFF.48385.1"; exon_number "1"; FPKM "18.9452034252"; frac "1.000000"; conf_lo "7.520816"; conf_hi "30.369591"; cov "1.076087"; +chr11 Cufflinks transcript 106985457 106985483 1000 . . gene_id "CUFF.48949"; transcript_id "CUFF.48949.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr11 Cufflinks exon 120257794 120257821 1000 . . gene_id "CUFF.51205"; transcript_id "CUFF.51205.1"; exon_number "1"; FPKM "50.9306118054"; frac "1.000000"; conf_lo "0.000000"; conf_hi "109.740217"; cov "2.892857"; +chr12 Cufflinks transcript 16542765 16542791 1000 . . gene_id "CUFF.52907"; transcript_id "CUFF.52907.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr12 Cufflinks exon 21319385 21319483 1000 . . gene_id "CUFF.53189"; transcript_id "CUFF.53189.1"; exon_number "1"; FPKM "9.6030783202"; frac "1.000000"; conf_lo "0.000000"; conf_hi "23.183882"; cov "0.545455"; +chr12 Cufflinks transcript 30469129 30469156 1000 . . gene_id "CUFF.53733"; transcript_id "CUFF.53733.1"; FPKM "33.9537412036"; frac "1.000000"; conf_lo "0.000000"; conf_hi "81.971583"; cov "1.928571"; +chr12 Cufflinks transcript 30557416 30557442 1000 . . gene_id "CUFF.53863"; transcript_id "CUFF.53863.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr12 Cufflinks exon 30701762 30702509 1000 . . gene_id "CUFF.53897"; transcript_id "CUFF.53897.1"; exon_number "1"; FPKM "48.9333329111"; frac "1.000000"; conf_lo "37.780391"; conf_hi "60.086275"; cov "2.779412"; +chr12 Cufflinks transcript 56895077 56895103 1000 . . gene_id "CUFF.55817"; transcript_id "CUFF.55817.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr12 Cufflinks transcript 81915713 81915797 1000 . . gene_id "CUFF.58203"; transcript_id "CUFF.58203.1"; FPKM "22.3695236165"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.739047"; cov "1.270588"; +chr12 Cufflinks transcript 118458135 118458232 1000 . . gene_id "CUFF.63640"; transcript_id "CUFF.63640.1"; FPKM "9.7010689153"; frac "1.000000"; conf_lo "0.000000"; conf_hi "23.420452"; cov "0.551020"; +chr13 Cufflinks exon 8262302 8262372 1000 . . gene_id "CUFF.64064"; transcript_id "CUFF.64064.1"; exon_number "1"; FPKM "13.3902077986"; frac "1.000000"; conf_lo "0.000000"; conf_hi "32.326821"; cov "0.760563"; +chr13 Cufflinks transcript 14110852 14110956 1000 . . gene_id "CUFF.64684"; transcript_id "CUFF.64684.1"; FPKM "13.5814964814"; frac "1.000000"; conf_lo "0.000000"; conf_hi "29.264058"; cov "0.771429"; +chr13 Cufflinks transcript 14146567 14146593 1000 . . gene_id "CUFF.64702"; transcript_id "CUFF.64702.1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.816931"; cov "1.000000"; +chr13 Cufflinks transcript 14564150 14564243 1000 . . gene_id "CUFF.64894"; transcript_id "CUFF.64894.1"; FPKM "15.1708205378"; frac "1.000000"; conf_lo "0.000000"; conf_hi "32.688575"; cov "0.861702"; +chr13 Cufflinks transcript 17823162 17823188 1000 . . gene_id "CUFF.65082"; transcript_id "CUFF.65082.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr13 Cufflinks transcript 29757640 29757666 1000 . . gene_id "CUFF.65972"; transcript_id "CUFF.65972.1"; FPKM "70.4225743482"; frac "1.000000"; conf_lo "0.000000"; conf_hi "140.845149"; cov "4.000000"; +chr13 Cufflinks transcript 42402998 42403051 1000 . . gene_id "CUFF.66816"; transcript_id "CUFF.66816.1"; FPKM "26.4084653806"; frac "1.000000"; conf_lo "0.000000"; conf_hi "56.902335"; cov "1.500000"; +chr13 Cufflinks transcript 42853325 42853406 1000 . . gene_id "CUFF.66850"; transcript_id "CUFF.66850.1"; FPKM "17.3909406165"; frac "1.000000"; conf_lo "0.000000"; conf_hi "37.472269"; cov "0.987805"; +chr13 Cufflinks exon 42990504 42990546 1000 . . gene_id "CUFF.66994"; transcript_id "CUFF.66994.1"; exon_number "1"; FPKM "33.1641193151"; frac "1.000000"; conf_lo "0.000000"; conf_hi "71.458746"; cov "1.883721"; +chr13 Cufflinks exon 49159496 49159569 1000 . . gene_id "CUFF.67788"; transcript_id "CUFF.67788.1"; exon_number "1"; FPKM "44.9657653777"; frac "1.000000"; conf_lo "10.974842"; conf_hi "78.956689"; cov "2.554054"; +chr13 Cufflinks exon 49243161 49243187 1000 + . gene_id "CUFF.67842"; transcript_id "CUFF.67842.1"; exon_number "2"; FPKM "17.3246767151"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.848677"; cov "0.984041"; +chr13 Cufflinks transcript 74671517 74671579 1000 . . gene_id "CUFF.70454"; transcript_id "CUFF.70454.1"; FPKM "18.8363155420"; frac "1.000000"; conf_lo "0.000000"; conf_hi "42.679570"; cov "1.069902"; +chr13 Cufflinks exon 100759121 100759147 1000 . . gene_id "CUFF.72728"; transcript_id "CUFF.72728.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr13 Cufflinks transcript 100200304 100200330 1000 . . gene_id "CUFF.73108"; transcript_id "CUFF.73108.1"; FPKM "123.2395051093"; frac "1.000000"; conf_lo "30.079196"; conf_hi "216.399814"; cov "7.000000"; +chr13 Cufflinks exon 107766857 107766915 1000 . . gene_id "CUFF.73428"; transcript_id "CUFF.73428.1"; exon_number "1"; FPKM "16.1136398932"; frac "1.000000"; conf_lo "0.000000"; conf_hi "38.901768"; cov "0.915254"; +chr13 Cufflinks transcript 115623552 115623640 1000 . . gene_id "CUFF.74362"; transcript_id "CUFF.74362.1"; FPKM "21.3641517686"; frac "1.000000"; conf_lo "0.000000"; conf_hi "42.728304"; cov "1.213483"; +chr14 Cufflinks transcript 31651249 31651275 1000 . . gene_id "CUFF.77182"; transcript_id "CUFF.77182.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr14 Cufflinks transcript 31949103 31949152 1000 . . gene_id "CUFF.77316"; transcript_id "CUFF.77316.1"; FPKM "85.5634278330"; frac "1.000000"; conf_lo "28.521143"; conf_hi "142.605713"; cov "4.860000"; +chr14 Cufflinks transcript 32190589 32190685 1000 . . gene_id "CUFF.77438"; transcript_id "CUFF.77438.1"; FPKM "14.7016199026"; frac "1.000000"; conf_lo "0.000000"; conf_hi "31.677588"; cov "0.835052"; +chr14 Cufflinks exon 55013239 55013265 1000 . . gene_id "CUFF.79372"; transcript_id "CUFF.79372.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr14 Cufflinks exon 55250464 55250514 1000 . . gene_id "CUFF.79616"; transcript_id "CUFF.79616.1"; exon_number "1"; FPKM "18.6412696804"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.004006"; cov "1.058824"; +chr14 Cufflinks exon 67604227 67604668 1000 . . gene_id "CUFF.81446"; transcript_id "CUFF.81446.1"; exon_number "1"; FPKM "123.6776546104"; frac "1.000000"; conf_lo "100.611653"; conf_hi "146.743656"; cov "7.024887"; +chr14 Cufflinks exon 75165582 75165744 1000 . . gene_id "CUFF.82088"; transcript_id "CUFF.82088.1"; exon_number "1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509"; +chr15 Cufflinks transcript 8254591 8254734 1000 . . gene_id "CUFF.85556"; transcript_id "CUFF.85556.1"; FPKM "13.2042326903"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.408465"; cov "0.750000"; +chr15 Cufflinks transcript 34371407 34371433 1000 . . gene_id "CUFF.87246"; transcript_id "CUFF.87246.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr15 Cufflinks exon 65980225 65980255 1000 + . gene_id "CUFF.89502"; transcript_id "CUFF.89502.1"; exon_number "2"; FPKM "14.1193775302"; frac "1.000000"; conf_lo "0.000000"; conf_hi "30.423030"; cov "0.801980"; +chr15 Cufflinks transcript 85895971 85896018 1000 . . gene_id "CUFF.92834"; transcript_id "CUFF.92834.1"; FPKM "19.8063490354"; frac "1.000000"; conf_lo "0.000000"; conf_hi "47.816756"; cov "1.125000"; +chr15 Cufflinks exon 90828279 90828371 1000 . . gene_id "CUFF.93426"; transcript_id "CUFF.93426.1"; exon_number "1"; FPKM "15.3339476403"; frac "1.000000"; conf_lo "0.000000"; conf_hi "33.040065"; cov "0.870968"; +chr15 Cufflinks exon 99013431 99013472 1000 . . gene_id "CUFF.94604"; transcript_id "CUFF.94604.1"; exon_number "1"; FPKM "22.6358274691"; frac "1.000000"; conf_lo "0.000000"; conf_hi "54.647722"; cov "1.285714"; +chr15 Cufflinks transcript 102336976 102337075 1000 . . gene_id "CUFF.95298"; transcript_id "CUFF.95298.1"; FPKM "9.5070475370"; frac "1.000000"; conf_lo "0.000000"; conf_hi "22.952043"; cov "0.540000"; +chr16 Cufflinks transcript 10417989 10425202 1000 - . gene_id "CUFF.97232"; transcript_id "CUFF.97232.1"; FPKM "21.1642179587"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.805591"; cov "1.202127"; +chr16 Cufflinks transcript 40788986 40789084 1000 . . gene_id "CUFF.101774"; transcript_id "CUFF.101774.1"; FPKM "9.6030783202"; frac "1.000000"; conf_lo "0.000000"; conf_hi "23.183882"; cov "0.545455"; +chr16 Cufflinks transcript 57154027 57154067 1000 . . gene_id "CUFF.103364"; transcript_id "CUFF.103364.1"; FPKM "162.3154457537"; frac "1.000000"; conf_lo "75.554191"; conf_hi "249.076701"; cov "9.219512"; +chr16 Cufflinks exon 74862302 74862560 1000 . . gene_id "CUFF.105450"; transcript_id "CUFF.105450.1"; exon_number "1"; FPKM "11.0120241741"; frac "1.000000"; conf_lo "2.020744"; conf_hi "20.003304"; cov "0.625483"; +chr16 Cufflinks exon 91023578 91023669 1000 . . gene_id "CUFF.106912"; transcript_id "CUFF.106912.1"; exon_number "1"; FPKM "10.3337473228"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.947873"; cov "0.586957"; +chr16 Cufflinks transcript 98168779 98168914 1000 . . gene_id "CUFF.107834"; transcript_id "CUFF.107834.1"; FPKM "24.4666664555"; frac "1.000000"; conf_lo "5.971605"; conf_hi "42.961728"; cov "1.389706"; +chr17 Cufflinks exon 8483212 8483268 1000 . . gene_id "CUFF.108498"; transcript_id "CUFF.108498.1"; exon_number "1"; FPKM "50.0370923000"; frac "1.000000"; conf_lo "9.181978"; conf_hi "90.892207"; cov "2.842105"; +chr17 Cufflinks transcript 17478023 17478102 1000 . . gene_id "CUFF.109264"; transcript_id "CUFF.109264.1"; FPKM "23.7676188425"; frac "1.000000"; conf_lo "0.000000"; conf_hi "47.535238"; cov "1.350000"; +chr17 Cufflinks exon 24611086 24611151 1000 . . gene_id "CUFF.110088"; transcript_id "CUFF.110088.1"; exon_number "1"; FPKM "21.6069262205"; frac "1.000000"; conf_lo "0.000000"; conf_hi "46.556456"; cov "1.227273"; +chr17 Cufflinks exon 30355791 30355913 1000 . . gene_id "CUFF.111759"; transcript_id "CUFF.111759.1"; exon_number "1"; FPKM "19.3232673516"; frac "1.000000"; conf_lo "2.040012"; conf_hi "36.606523"; cov "1.097561"; +chr17 Cufflinks exon 31608702 31608787 1000 . . gene_id "CUFF.111943"; transcript_id "CUFF.111943.1"; exon_number "1"; FPKM "16.5820596576"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.729373"; cov "0.941860"; +chr17 Cufflinks transcript 32414547 32414573 1000 . . gene_id "CUFF.112117"; transcript_id "CUFF.112117.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr17 Cufflinks transcript 33245409 33245463 1000 . . gene_id "CUFF.112363"; transcript_id "CUFF.112363.1"; FPKM "17.2855409764"; frac "1.000000"; conf_lo "0.000000"; conf_hi "41.730987"; cov "0.981818"; +chr17 Cufflinks transcript 56750339 56750374 1000 . . gene_id "CUFF.115617"; transcript_id "CUFF.115617.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "105.633862"; cov "3.000000"; +chr17 Cufflinks transcript 57144800 57144939 1000 . . gene_id "CUFF.115679"; transcript_id "CUFF.115679.1"; FPKM "13.5814964814"; frac "1.000000"; conf_lo "0.000000"; conf_hi "27.162993"; cov "0.771429"; +chr17 Cufflinks exon 71694628 71694708 1000 . . gene_id "CUFF.117117"; transcript_id "CUFF.117117.1"; exon_number "1"; FPKM "23.4741914494"; frac "1.000000"; conf_lo "0.000000"; conf_hi "46.948383"; cov "1.333333"; +chr17 Cufflinks transcript 71698227 71698263 1000 . . gene_id "CUFF.117119"; transcript_id "CUFF.117119.1"; FPKM "25.6947230730"; frac "1.000000"; conf_lo "0.000000"; conf_hi "62.032549"; cov "1.459459"; +chr17 Cufflinks exon 87862079 87862105 1000 . . gene_id "CUFF.118851"; transcript_id "CUFF.118851.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr18 Cufflinks transcript 39571718 39571880 1000 . . gene_id "CUFF.123569"; transcript_id "CUFF.123569.1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509"; +chr18 Cufflinks exon 46452826 46452864 1000 . . gene_id "CUFF.124197"; transcript_id "CUFF.124197.1"; exon_number "1"; FPKM "48.7540899334"; frac "1.000000"; conf_lo "0.000000"; conf_hi "97.508180"; cov "2.769231"; +chr18 Cufflinks exon 50151390 50151416 1000 . . gene_id "CUFF.124439"; transcript_id "CUFF.124439.1"; exon_number "1"; FPKM "17.5638395629"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.733298"; cov "0.997626"; +chr18 Cufflinks exon 65106749 65106787 1000 . . gene_id "CUFF.125675"; transcript_id "CUFF.125675.1"; exon_number "1"; FPKM "48.7540899334"; frac "1.000000"; conf_lo "0.000000"; conf_hi "97.508180"; cov "2.769231"; +chr18 Cufflinks transcript 65202282 65202424 1000 . . gene_id "CUFF.125729"; transcript_id "CUFF.125729.1"; FPKM "13.2965699818"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.593140"; cov "0.755245"; +chr18 Cufflinks exon 69735254 69735280 1000 . . gene_id "CUFF.126371"; transcript_id "CUFF.126371.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr19 Cufflinks transcript 5370466 5370492 1000 . . gene_id "CUFF.129319"; transcript_id "CUFF.129319.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr19 Cufflinks exon 17633088 17633203 1000 . . gene_id "CUFF.131333"; transcript_id "CUFF.131333.1"; exon_number "1"; FPKM "20.4893265884"; frac "1.000000"; conf_lo "2.163116"; conf_hi "38.815537"; cov "1.163793"; +chr19 Cufflinks transcript 41997624 41997859 1000 . . gene_id "CUFF.133569"; transcript_id "CUFF.133569.1"; FPKM "28.1988698132"; frac "1.000000"; conf_lo "13.125940"; conf_hi "43.271800"; cov "1.601695"; +chr19 Cufflinks transcript 53311236 53311262 1000 . . gene_id "CUFF.134885"; transcript_id "CUFF.134885.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr19 Cufflinks exon 56516515 56516684 1000 . . gene_id "CUFF.135203"; transcript_id "CUFF.135203.1"; exon_number "1"; FPKM "33.5542854247"; frac "1.000000"; conf_lo "14.181710"; conf_hi "52.926861"; cov "1.905882"; +chr2 Cufflinks transcript 3246910 3246936 1000 . . gene_id "CUFF.136019"; transcript_id "CUFF.136019.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr2 Cufflinks transcript 4543774 4543977 1000 . . gene_id "CUFF.136435"; transcript_id "CUFF.136435.1"; FPKM "37.2825393608"; frac "1.000000"; conf_lo "18.641270"; conf_hi "55.923809"; cov "2.117647"; +chr2 Cufflinks exon 7271328 7271354 1000 . . gene_id "CUFF.137407"; transcript_id "CUFF.137407.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr2 Cufflinks exon 11509202 11509287 1000 . . gene_id "CUFF.137559"; transcript_id "CUFF.137559.1"; exon_number "1"; FPKM "22.1094128768"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.218826"; cov "1.255814"; +chr2 Cufflinks transcript 30200331 30200938 1000 . . gene_id "CUFF.140289"; transcript_id "CUFF.140289.1"; FPKM "100.0741846001"; frac "1.000000"; conf_lo "82.383401"; conf_hi "117.764968"; cov "5.684211"; +chr2 Cufflinks transcript 49559781 49559842 1000 . . gene_id "CUFF.143035"; transcript_id "CUFF.143035.1"; FPKM "15.3339476403"; frac "1.000000"; conf_lo "0.000000"; conf_hi "37.019424"; cov "0.870968"; +chr2 Cufflinks transcript 78736720 78736746 1000 . . gene_id "CUFF.146383"; transcript_id "CUFF.146383.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr2 Cufflinks transcript 97470595 97470703 1000 . . gene_id "CUFF.148117"; transcript_id "CUFF.148117.1"; FPKM "17.4441239211"; frac "1.000000"; conf_lo "0.000000"; conf_hi "34.888248"; cov "0.990826"; +chr2 Cufflinks transcript 106644220 106644341 1000 . . gene_id "CUFF.148977"; transcript_id "CUFF.148977.1"; FPKM "27.2743167045"; frac "1.000000"; conf_lo "6.656871"; conf_hi "47.891762"; cov "1.549180"; +chr2 Cufflinks transcript 118960859 118961004 1000 . . gene_id "CUFF.150015"; transcript_id "CUFF.150015.1"; FPKM "9.7675145928"; frac "1.000000"; conf_lo "0.000000"; conf_hi "21.046069"; cov "0.554795"; +chr2 Cufflinks exon 125388931 125389219 1000 . . gene_id "CUFF.151331"; transcript_id "CUFF.151331.1"; exon_number "1"; FPKM "23.0274507817"; frac "1.000000"; conf_lo "10.718761"; conf_hi "35.336141"; cov "1.307958"; +chr2 Cufflinks transcript 145141669 145141755 1000 . . gene_id "CUFF.153993"; transcript_id "CUFF.153993.1"; FPKM "10.9276408471"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.381659"; cov "0.620690"; +chr2 Cufflinks exon 178035510 178035611 1000 . . gene_id "CUFF.158329"; transcript_id "CUFF.158329.1"; exon_number "1"; FPKM "9.3206348402"; frac "1.000000"; conf_lo "0.000000"; conf_hi "22.502003"; cov "0.529412"; +chr2 Cufflinks transcript 181047445 181047471 1000 . . gene_id "CUFF.158989"; transcript_id "CUFF.158989.1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.816931"; cov "1.000000"; +chr3 Cufflinks transcript 55664118 55664197 1000 . . gene_id "CUFF.163141"; transcript_id "CUFF.163141.1"; FPKM "11.8838094213"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.690054"; cov "0.675000"; +chr3 Cufflinks exon 66896354 66896433 1000 . . gene_id "CUFF.163935"; transcript_id "CUFF.163935.1"; exon_number "1"; FPKM "5.9419047106"; frac "1.000000"; conf_lo "0.000000"; conf_hi "17.825714"; cov "0.337500"; +chr3 Cufflinks transcript 80648745 80648771 1000 . . gene_id "CUFF.164995"; transcript_id "CUFF.164995.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr3 Cufflinks exon 107424385 107424452 1000 . . gene_id "CUFF.169311"; transcript_id "CUFF.169311.1"; exon_number "1"; FPKM "13.9809522603"; frac "1.000000"; conf_lo "0.000000"; conf_hi "33.753005"; cov "0.794118"; +chr3 Cufflinks transcript 130936639 130936898 1000 . . gene_id "CUFF.171349"; transcript_id "CUFF.171349.1"; FPKM "20.1110620975"; frac "1.000000"; conf_lo "7.983635"; conf_hi "32.238489"; cov "1.142308"; +chr3 Cufflinks exon 136592671 136592771 1000 . . gene_id "CUFF.171861"; transcript_id "CUFF.171861.1"; exon_number "1"; FPKM "32.9452142371"; frac "1.000000"; conf_lo "8.040973"; conf_hi "57.849455"; cov "1.871287"; +chr3 Cufflinks transcript 152641383 152641451 1000 . . gene_id "CUFF.172987"; transcript_id "CUFF.172987.1"; FPKM "20.6674946457"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.532262"; cov "1.173913"; +chr3 Cufflinks transcript 152861374 152861508 1000 . . gene_id "CUFF.173007"; transcript_id "CUFF.173007.1"; FPKM "24.6479010219"; frac "1.000000"; conf_lo "6.015839"; conf_hi "43.279963"; cov "1.400000"; +chr3 Cufflinks transcript 157698536 157698562 1000 . . gene_id "CUFF.173579"; transcript_id "CUFF.173579.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr4 Cufflinks exon 13715310 13715630 1000 . . gene_id "CUFF.174817"; transcript_id "CUFF.174817.1"; exon_number "1"; FPKM "19.2510308382"; frac "1.000000"; conf_lo "8.572480"; conf_hi "29.929581"; cov "1.093458"; +chr4 Cufflinks transcript 62063768 62063821 1000 . . gene_id "CUFF.179577"; transcript_id "CUFF.179577.1"; FPKM "26.3875633685"; frac "1.000000"; conf_lo "0.000000"; conf_hi "56.869362"; cov "1.498813"; +chr4 Cufflinks transcript 77180436 77180462 1000 . . gene_id "CUFF.181019"; transcript_id "CUFF.181019.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr4 Cufflinks transcript 102112070 102112096 1000 . . gene_id "CUFF.183381"; transcript_id "CUFF.183381.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr4 Cufflinks exon 117233771 117233830 1000 + . gene_id "CUFF.185373"; transcript_id "CUFF.185373.1"; exon_number "1"; FPKM "4.8015391601"; frac "1.000000"; conf_lo "0.000000"; conf_hi "14.404617"; cov "0.272727"; +chr4 Cufflinks exon 135563059 135563110 1000 . . gene_id "CUFF.189099"; transcript_id "CUFF.189099.1"; exon_number "1"; FPKM "18.2827837250"; frac "1.000000"; conf_lo "0.000000"; conf_hi "44.138544"; cov "1.038462"; +chr4 Cufflinks exon 147515029 147515097 1000 . . gene_id "CUFF.190627"; transcript_id "CUFF.190627.1"; exon_number "1"; FPKM "34.4458244094"; frac "1.000000"; conf_lo "3.636542"; conf_hi "65.255106"; cov "1.956522"; +chr5 Cufflinks exon 3949522 3949685 1000 . . gene_id "CUFF.192485"; transcript_id "CUFF.192485.1"; exon_number "1"; FPKM "23.1879208220"; frac "1.000000"; conf_lo "6.791585"; conf_hi "39.584257"; cov "1.317073"; +chr5 Cufflinks exon 22635516 22635542 1000 . . gene_id "CUFF.194445"; transcript_id "CUFF.194445.1"; exon_number "1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr5 Cufflinks exon 34172212 34172238 1000 . . gene_id "CUFF.196463"; transcript_id "CUFF.196463.1"; exon_number "1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.816931"; cov "1.000000"; +chr5 Cufflinks transcript 35362816 35362905 1000 . . gene_id "CUFF.196905"; transcript_id "CUFF.196905.1"; FPKM "10.5633861522"; frac "1.000000"; conf_lo "0.000000"; conf_hi "25.502270"; cov "0.600000"; +chr5 Cufflinks transcript 37083127 37083166 1000 . . gene_id "CUFF.197139"; transcript_id "CUFF.197139.1"; FPKM "23.7676188425"; frac "1.000000"; conf_lo "0.000000"; conf_hi "57.380108"; cov "1.350000"; +chr5 Cufflinks transcript 68089694 68089831 1000 . . gene_id "CUFF.199409"; transcript_id "CUFF.199409.1"; FPKM "17.2229122047"; frac "1.000000"; conf_lo "1.818271"; conf_hi "32.627553"; cov "0.978261"; +chr5 Cufflinks exon 98615179 98615254 1000 . . gene_id "CUFF.201693"; transcript_id "CUFF.201693.1"; exon_number "1"; FPKM "12.5092730750"; frac "1.000000"; conf_lo "0.000000"; conf_hi "30.200057"; cov "0.710526"; +chr5 Cufflinks exon 105915104 105915181 1000 . . gene_id "CUFF.202385"; transcript_id "CUFF.202385.1"; exon_number "1"; FPKM "12.1885224833"; frac "1.000000"; conf_lo "0.000000"; conf_hi "29.425696"; cov "0.692308"; +chr5 Cufflinks transcript 116136788 116136814 1000 . . gene_id "CUFF.204349"; transcript_id "CUFF.204349.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr5 Cufflinks exon 122819526 122819619 1000 . . gene_id "CUFF.205487"; transcript_id "CUFF.205487.1"; exon_number "1"; FPKM "25.2486782797"; frac "1.000000"; conf_lo "2.649470"; conf_hi "47.847887"; cov "1.434124"; +chr5 Cufflinks exon 132055557 132055583 1000 . . gene_id "CUFF.207163"; transcript_id "CUFF.207163.1"; exon_number "1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr5 Cufflinks transcript 145619548 145619710 1000 . . gene_id "CUFF.209965"; transcript_id "CUFF.209965.1"; FPKM "40.8278115086"; frac "1.000000"; conf_lo "19.004428"; conf_hi "62.651195"; cov "2.319018"; +chr6 Cufflinks exon 48466822 48466848 1000 . . gene_id "CUFF.215907"; transcript_id "CUFF.215907.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr6 Cufflinks exon 63803818 63803844 1000 . . gene_id "CUFF.217337"; transcript_id "CUFF.217337.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr6 Cufflinks transcript 77393088 77393138 1000 . . gene_id "CUFF.218387"; transcript_id "CUFF.218387.1"; FPKM "18.6412696804"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.004006"; cov "1.058824"; +chr6 Cufflinks transcript 82379452 82379478 1000 . . gene_id "CUFF.218947"; transcript_id "CUFF.218947.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr6 Cufflinks exon 83928984 83929105 1000 . . gene_id "CUFF.219317"; transcript_id "CUFF.219317.1"; exon_number "1"; FPKM "46.7559714935"; frac "1.000000"; conf_lo "19.761399"; conf_hi "73.750544"; cov "2.655738"; +chr6 Cufflinks exon 103658447 103658521 1000 . . gene_id "CUFF.221849"; transcript_id "CUFF.221849.1"; exon_number "1"; FPKM "19.0140950740"; frac "1.000000"; conf_lo "0.000000"; conf_hi "40.969681"; cov "1.080000"; +chr6 Cufflinks exon 113781332 113781358 1000 . . gene_id "CUFF.222775"; transcript_id "CUFF.222775.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr6 Cufflinks exon 118857949 118858148 1000 . . gene_id "CUFF.223543"; transcript_id "CUFF.223543.1"; exon_number "1"; FPKM "19.0140950740"; frac "1.000000"; conf_lo "5.569100"; conf_hi "32.459091"; cov "1.080000"; +chr6 Cufflinks transcript 124996314 124996340 1000 . . gene_id "CUFF.224545"; transcript_id "CUFF.224545.1"; FPKM "17.6056435870"; frac "1.000000"; conf_lo "0.000000"; conf_hi "52.816931"; cov "1.000000"; +chr6 Cufflinks transcript 127077329 127077364 1000 . . gene_id "CUFF.225409"; transcript_id "CUFF.225409.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "105.633862"; cov "3.000000"; +chr6 Cufflinks exon 136038245 136038304 1000 . . gene_id "CUFF.225521"; transcript_id "CUFF.225521.1"; exon_number "1"; FPKM "23.7676188425"; frac "1.000000"; conf_lo "0.000000"; conf_hi "51.212101"; cov "1.350000"; +chr7 Cufflinks exon 13845893 13845919 1000 . . gene_id "CUFF.227869"; transcript_id "CUFF.227869.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr7 Cufflinks exon 56060938 56061023 1000 . . gene_id "CUFF.232920"; transcript_id "CUFF.232920.1"; exon_number "1"; FPKM "11.0547064384"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.688422"; cov "0.627907"; +chr7 Cufflinks exon 71484947 71484992 1000 . . gene_id "CUFF.234766"; transcript_id "CUFF.234766.1"; exon_number "1"; FPKM "20.6674946457"; frac "1.000000"; conf_lo "0.000000"; conf_hi "49.895746"; cov "1.173913"; +chr7 Cufflinks transcript 71784522 71784548 1000 . . gene_id "CUFF.234810"; transcript_id "CUFF.234810.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr7 Cufflinks exon 72605044 72605070 1000 . . gene_id "CUFF.234948"; transcript_id "CUFF.234948.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr7 Cufflinks transcript 85554210 85554343 1000 . . gene_id "CUFF.235778"; transcript_id "CUFF.235778.1"; FPKM "17.7370289869"; frac "1.000000"; conf_lo "1.872548"; conf_hi "33.601510"; cov "1.007463"; +chr7 Cufflinks transcript 98723325 98723416 1000 . . gene_id "CUFF.237594"; transcript_id "CUFF.237594.1"; FPKM "10.3337473228"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.947873"; cov "0.586957"; +chr7 Cufflinks exon 104055491 104055589 1000 . . gene_id "CUFF.238474"; transcript_id "CUFF.238474.1"; exon_number "1"; FPKM "28.8092349606"; frac "1.000000"; conf_lo "5.286593"; conf_hi "52.331877"; cov "1.636364"; +chr7 Cufflinks exon 107603711 107603748 1000 . . gene_id "CUFF.239180"; transcript_id "CUFF.239180.1"; exon_number "1"; FPKM "25.0185461500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "60.400113"; cov "1.421053"; +chr7 Cufflinks exon 133792511 133792572 1000 . . gene_id "CUFF.242136"; transcript_id "CUFF.242136.1"; exon_number "1"; FPKM "15.3339476403"; frac "1.000000"; conf_lo "0.000000"; conf_hi "37.019424"; cov "0.870968"; +chr8 Cufflinks exon 9970398 9970545 1000 . . gene_id "CUFF.245320"; transcript_id "CUFF.245320.1"; exon_number "1"; FPKM "22.4828826889"; frac "1.000000"; conf_lo "5.487421"; conf_hi "39.478345"; cov "1.277027"; +chr8 Cufflinks transcript 10425062 10425088 1000 . . gene_id "CUFF.245408"; transcript_id "CUFF.245408.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr8 Cufflinks transcript 13018897 13018981 1000 . . gene_id "CUFF.245840"; transcript_id "CUFF.245840.1"; FPKM "16.7771427124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "36.149718"; cov "0.952941"; +chr8 Cufflinks transcript 14745123 14745149 1000 . . gene_id "CUFF.246146"; transcript_id "CUFF.246146.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr8 Cufflinks exon 29820895 29820930 1000 . . gene_id "CUFF.247504"; transcript_id "CUFF.247504.1"; exon_number "1"; FPKM "26.4084653806"; frac "1.000000"; conf_lo "0.000000"; conf_hi "63.755675"; cov "1.500000"; +chr8 Cufflinks transcript 36676568 36676665 1000 . . gene_id "CUFF.248480"; transcript_id "CUFF.248480.1"; FPKM "9.7010689153"; frac "1.000000"; conf_lo "0.000000"; conf_hi "23.420452"; cov "0.551020"; +chr8 Cufflinks transcript 50146533 50146584 1000 . . gene_id "CUFF.249617"; transcript_id "CUFF.249617.1"; FPKM "27.4241755875"; frac "1.000000"; conf_lo "0.000000"; conf_hi "59.090886"; cov "1.557692"; +chr8 Cufflinks exon 51010775 51010850 1000 . . gene_id "CUFF.249695"; transcript_id "CUFF.249695.1"; exon_number "1"; FPKM "18.7639096125"; frac "1.000000"; conf_lo "0.000000"; conf_hi "40.430606"; cov "1.065789"; +chr8 Cufflinks exon 63391023 63391095 1000 . . gene_id "CUFF.250329"; transcript_id "CUFF.250329.1"; exon_number "1"; FPKM "13.0233527904"; frac "1.000000"; conf_lo "0.000000"; conf_hi "31.441155"; cov "0.739726"; +chr8 Cufflinks transcript 73858591 73858706 1000 . . gene_id "CUFF.251697"; transcript_id "CUFF.251697.1"; FPKM "12.2935959530"; frac "1.000000"; conf_lo "0.000000"; conf_hi "26.489018"; cov "0.698276"; +chr8 Cufflinks transcript 75514699 75514725 1000 . . gene_id "CUFF.252263"; transcript_id "CUFF.252263.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr8 Cufflinks exon 86101515 86101548 1000 . . gene_id "CUFF.252891"; transcript_id "CUFF.252891.1"; exon_number "1"; FPKM "27.9619045206"; frac "1.000000"; conf_lo "0.000000"; conf_hi "67.506009"; cov "1.588235"; +chr8 Cufflinks transcript 93627215 93627241 1000 . . gene_id "CUFF.254225"; transcript_id "CUFF.254225.1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr8 Cufflinks exon 108148187 108148357 1000 . . gene_id "CUFF.255477"; transcript_id "CUFF.255477.1"; exon_number "1"; FPKM "11.1193538445"; frac "1.000000"; conf_lo "0.000000"; conf_hi "22.238708"; cov "0.631579"; +chr8 Cufflinks exon 108190587 108190664 1000 . . gene_id "CUFF.255507"; transcript_id "CUFF.255507.1"; exon_number "1"; FPKM "12.1885224833"; frac "1.000000"; conf_lo "0.000000"; conf_hi "29.425696"; cov "0.692308"; +chr8 Cufflinks exon 120931685 120931769 1000 . . gene_id "CUFF.257375"; transcript_id "CUFF.257375.1"; exon_number "1"; FPKM "16.7771427124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "36.149718"; cov "0.952941"; +chr9 Cufflinks transcript 20449846 20449932 1000 . . gene_id "CUFF.260747"; transcript_id "CUFF.260747.1"; FPKM "234.9313045507"; frac "1.000000"; conf_lo "163.275950"; conf_hi "306.586659"; cov "13.344091"; +chr9 Cufflinks exon 25083606 25083643 1000 . . gene_id "CUFF.261551"; transcript_id "CUFF.261551.1"; exon_number "1"; FPKM "25.0185461500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "60.400113"; cov "1.421053"; +chr9 Cufflinks transcript 27906894 27906964 1000 . . gene_id "CUFF.261925"; transcript_id "CUFF.261925.1"; FPKM "26.7804155972"; frac "1.000000"; conf_lo "0.000000"; conf_hi "53.560831"; cov "1.521127"; +chr9 Cufflinks transcript 34418034 34418065 1000 . . gene_id "CUFF.262975"; transcript_id "CUFF.262975.1"; FPKM "29.7095235531"; frac "1.000000"; conf_lo "0.000000"; conf_hi "71.725135"; cov "1.687500"; +chr9 Cufflinks exon 34710789 34710815 1000 . . gene_id "CUFF.263065"; transcript_id "CUFF.263065.1"; exon_number "1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr9 Cufflinks transcript 40823051 40823090 1000 . . gene_id "CUFF.263747"; transcript_id "CUFF.263747.1"; FPKM "23.7676188425"; frac "1.000000"; conf_lo "0.000000"; conf_hi "57.380108"; cov "1.350000"; +chr9 Cufflinks exon 59236547 59236671 1000 . . gene_id "CUFF.267045"; transcript_id "CUFF.267045.1"; exon_number "1"; FPKM "12.1905895442"; frac "1.000000"; conf_lo "0.000000"; conf_hi "25.808020"; cov "0.692425"; +chr9 Cufflinks transcript 66418162 66418188 1000 . . gene_id "CUFF.268423"; transcript_id "CUFF.268423.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr9 Cufflinks transcript 95913236 95913262 1000 . . gene_id "CUFF.270957"; transcript_id "CUFF.270957.1"; FPKM "35.2112871741"; frac "1.000000"; conf_lo "0.000000"; conf_hi "85.007567"; cov "2.000000"; +chr9 Cufflinks exon 103101949 103102030 1000 . . gene_id "CUFF.271979"; transcript_id "CUFF.271979.1"; exon_number "1"; FPKM "11.5939604110"; frac "1.000000"; conf_lo "0.000000"; conf_hi "27.990296"; cov "0.658537"; +chr9 Cufflinks transcript 105579135 105579173 1000 . . gene_id "CUFF.272347"; transcript_id "CUFF.272347.1"; FPKM "24.3770449667"; frac "1.000000"; conf_lo "0.000000"; conf_hi "58.851393"; cov "1.384615"; +chr9 Cufflinks exon 107445870 107445930 1000 . . gene_id "CUFF.272761"; transcript_id "CUFF.272761.1"; exon_number "1"; FPKM "38.9633095779"; frac "1.000000"; conf_lo "4.113466"; conf_hi "73.813153"; cov "2.213115"; +chr9 Cufflinks exon 113779194 113779220 1000 . . gene_id "CUFF.274445"; transcript_id "CUFF.274445.1"; exon_number "1"; FPKM "52.8169307611"; frac "1.000000"; conf_lo "0.000000"; conf_hi "113.804669"; cov "3.000000"; +chr9 Cufflinks transcript 120860476 120860606 1000 . . gene_id "CUFF.275115"; transcript_id "CUFF.275115.1"; FPKM "25.4005086867"; frac "1.000000"; conf_lo "6.199529"; conf_hi "44.601488"; cov "1.442748"; +chrX Cufflinks exon 10274057 10274087 1000 . . gene_id "CUFF.276147"; transcript_id "CUFF.276147.1"; exon_number "1"; FPKM "99.5432248142"; frac "1.000000"; conf_lo "21.405127"; conf_hi "177.681323"; cov "5.654052"; +chrX Cufflinks transcript 39881431 39881678 1000 . . gene_id "CUFF.277419"; transcript_id "CUFF.277419.1"; FPKM "42.1683560109"; frac "1.000000"; conf_lo "24.187709"; conf_hi "60.149003"; cov "2.395161"; +chrX Cufflinks transcript 90114645 90131913 1000 - . gene_id "CUFF.279771"; transcript_id "CUFF.279771.1"; FPKM "6.8891648819"; frac "1.000000"; conf_lo "0.000000"; conf_hi "20.667495"; cov "0.391304"; +chrX Cufflinks transcript 148249672 148249713 1000 . . gene_id "CUFF.282847"; transcript_id "CUFF.282847.1"; FPKM "56.5895686726"; frac "1.000000"; conf_lo "5.974320"; conf_hi "107.204818"; cov "3.214286"; +chrX Cufflinks transcript 148481505 148482455 1000 + . gene_id "CUFF.282965"; transcript_id "CUFF.282965.1"; FPKM "40.1706233958"; frac "1.000000"; conf_lo "16.978103"; conf_hi "63.363144"; cov "2.281690"; +chrX Cufflinks transcript 158986411 158986471 1000 . . gene_id "CUFF.283831"; transcript_id "CUFF.283831.1"; FPKM "15.5853238312"; frac "1.000000"; conf_lo "0.000000"; conf_hi "37.626300"; cov "0.885246"; \ No newline at end of file diff -r e972d3f8413e -r b5391bd3e9c7 test-data/gff_filtering_out1.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gff_filtering_out1.gff Fri May 14 10:15:48 2010 -0400 @@ -0,0 +1,46 @@ +chr10 Cufflinks transcript 62044837 62045189 1000 . . gene_id "CUFF.23531"; transcript_id "CUFF.23531.1"; FPKM "19.5178121606"; frac "1.000000"; conf_lo "9.264456"; conf_hi "29.771168"; cov "1.108611"; +chr10 Cufflinks transcript 75372919 75373002 1000 . . gene_id "CUFF.24985"; transcript_id "CUFF.24985.1"; FPKM "124.4970510798"; frac "1.000000"; conf_lo "71.411330"; conf_hi "177.582772"; cov "7.071429"; +chr10 Cufflinks transcript 80362428 80363292 1000 - . gene_id "CUFF.26065"; transcript_id "CUFF.26065.1"; FPKM "43.6170921216"; frac "1.000000"; conf_lo "32.260169"; conf_hi "54.974016"; cov "2.477449"; +chr11 Cufflinks transcript 7904565 7904642 1000 . . gene_id "CUFF.33508"; transcript_id "CUFF.33508.1"; FPKM "61.6484988869"; frac "1.000000"; conf_lo "22.882428"; conf_hi "100.414569"; cov "3.501633"; +chr11 Cufflinks exon 78140156 78140259 1000 . . gene_id "CUFF.43148"; transcript_id "CUFF.43148.1"; exon_number "1"; FPKM "54.8483511750"; frac "1.000000"; conf_lo "23.181641"; conf_hi "86.515061"; cov "3.115385"; +chr11 Cufflinks exon 105616462 105616737 1000 . . gene_id "CUFF.48385"; transcript_id "CUFF.48385.1"; exon_number "1"; FPKM "18.9452034252"; frac "1.000000"; conf_lo "7.520816"; conf_hi "30.369591"; cov "1.076087"; +chr12 Cufflinks exon 30701762 30702509 1000 . . gene_id "CUFF.53897"; transcript_id "CUFF.53897.1"; exon_number "1"; FPKM "48.9333329111"; frac "1.000000"; conf_lo "37.780391"; conf_hi "60.086275"; cov "2.779412"; +chr13 Cufflinks exon 49159496 49159569 1000 . . gene_id "CUFF.67788"; transcript_id "CUFF.67788.1"; exon_number "1"; FPKM "44.9657653777"; frac "1.000000"; conf_lo "10.974842"; conf_hi "78.956689"; cov "2.554054"; +chr13 Cufflinks transcript 100200304 100200330 1000 . . gene_id "CUFF.73108"; transcript_id "CUFF.73108.1"; FPKM "123.2395051093"; frac "1.000000"; conf_lo "30.079196"; conf_hi "216.399814"; cov "7.000000"; +chr14 Cufflinks transcript 31949103 31949152 1000 . . gene_id "CUFF.77316"; transcript_id "CUFF.77316.1"; FPKM "85.5634278330"; frac "1.000000"; conf_lo "28.521143"; conf_hi "142.605713"; cov "4.860000"; +chr14 Cufflinks exon 67604227 67604668 1000 . . gene_id "CUFF.81446"; transcript_id "CUFF.81446.1"; exon_number "1"; FPKM "123.6776546104"; frac "1.000000"; conf_lo "100.611653"; conf_hi "146.743656"; cov "7.024887"; +chr14 Cufflinks exon 75165582 75165744 1000 . . gene_id "CUFF.82088"; transcript_id "CUFF.82088.1"; exon_number "1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509"; +chr16 Cufflinks transcript 57154027 57154067 1000 . . gene_id "CUFF.103364"; transcript_id "CUFF.103364.1"; FPKM "162.3154457537"; frac "1.000000"; conf_lo "75.554191"; conf_hi "249.076701"; cov "9.219512"; +chr16 Cufflinks exon 74862302 74862560 1000 . . gene_id "CUFF.105450"; transcript_id "CUFF.105450.1"; exon_number "1"; FPKM "11.0120241741"; frac "1.000000"; conf_lo "2.020744"; conf_hi "20.003304"; cov "0.625483"; +chr16 Cufflinks transcript 98168779 98168914 1000 . . gene_id "CUFF.107834"; transcript_id "CUFF.107834.1"; FPKM "24.4666664555"; frac "1.000000"; conf_lo "5.971605"; conf_hi "42.961728"; cov "1.389706"; +chr17 Cufflinks exon 8483212 8483268 1000 . . gene_id "CUFF.108498"; transcript_id "CUFF.108498.1"; exon_number "1"; FPKM "50.0370923000"; frac "1.000000"; conf_lo "9.181978"; conf_hi "90.892207"; cov "2.842105"; +chr17 Cufflinks exon 30355791 30355913 1000 . . gene_id "CUFF.111759"; transcript_id "CUFF.111759.1"; exon_number "1"; FPKM "19.3232673516"; frac "1.000000"; conf_lo "2.040012"; conf_hi "36.606523"; cov "1.097561"; +chr18 Cufflinks transcript 39571718 39571880 1000 . . gene_id "CUFF.123569"; transcript_id "CUFF.123569.1"; FPKM "20.4139057543"; frac "1.000000"; conf_lo "4.982443"; conf_hi "35.845368"; cov "1.159509"; +chr19 Cufflinks exon 17633088 17633203 1000 . . gene_id "CUFF.131333"; transcript_id "CUFF.131333.1"; exon_number "1"; FPKM "20.4893265884"; frac "1.000000"; conf_lo "2.163116"; conf_hi "38.815537"; cov "1.163793"; +chr19 Cufflinks transcript 41997624 41997859 1000 . . gene_id "CUFF.133569"; transcript_id "CUFF.133569.1"; FPKM "28.1988698132"; frac "1.000000"; conf_lo "13.125940"; conf_hi "43.271800"; cov "1.601695"; +chr19 Cufflinks exon 56516515 56516684 1000 . . gene_id "CUFF.135203"; transcript_id "CUFF.135203.1"; exon_number "1"; FPKM "33.5542854247"; frac "1.000000"; conf_lo "14.181710"; conf_hi "52.926861"; cov "1.905882"; +chr2 Cufflinks transcript 4543774 4543977 1000 . . gene_id "CUFF.136435"; transcript_id "CUFF.136435.1"; FPKM "37.2825393608"; frac "1.000000"; conf_lo "18.641270"; conf_hi "55.923809"; cov "2.117647"; +chr2 Cufflinks transcript 30200331 30200938 1000 . . gene_id "CUFF.140289"; transcript_id "CUFF.140289.1"; FPKM "100.0741846001"; frac "1.000000"; conf_lo "82.383401"; conf_hi "117.764968"; cov "5.684211"; +chr2 Cufflinks transcript 106644220 106644341 1000 . . gene_id "CUFF.148977"; transcript_id "CUFF.148977.1"; FPKM "27.2743167045"; frac "1.000000"; conf_lo "6.656871"; conf_hi "47.891762"; cov "1.549180"; +chr2 Cufflinks exon 125388931 125389219 1000 . . gene_id "CUFF.151331"; transcript_id "CUFF.151331.1"; exon_number "1"; FPKM "23.0274507817"; frac "1.000000"; conf_lo "10.718761"; conf_hi "35.336141"; cov "1.307958"; +chr3 Cufflinks transcript 130936639 130936898 1000 . . gene_id "CUFF.171349"; transcript_id "CUFF.171349.1"; FPKM "20.1110620975"; frac "1.000000"; conf_lo "7.983635"; conf_hi "32.238489"; cov "1.142308"; +chr3 Cufflinks exon 136592671 136592771 1000 . . gene_id "CUFF.171861"; transcript_id "CUFF.171861.1"; exon_number "1"; FPKM "32.9452142371"; frac "1.000000"; conf_lo "8.040973"; conf_hi "57.849455"; cov "1.871287"; +chr3 Cufflinks transcript 152861374 152861508 1000 . . gene_id "CUFF.173007"; transcript_id "CUFF.173007.1"; FPKM "24.6479010219"; frac "1.000000"; conf_lo "6.015839"; conf_hi "43.279963"; cov "1.400000"; +chr4 Cufflinks exon 13715310 13715630 1000 . . gene_id "CUFF.174817"; transcript_id "CUFF.174817.1"; exon_number "1"; FPKM "19.2510308382"; frac "1.000000"; conf_lo "8.572480"; conf_hi "29.929581"; cov "1.093458"; +chr4 Cufflinks exon 147515029 147515097 1000 . . gene_id "CUFF.190627"; transcript_id "CUFF.190627.1"; exon_number "1"; FPKM "34.4458244094"; frac "1.000000"; conf_lo "3.636542"; conf_hi "65.255106"; cov "1.956522"; +chr5 Cufflinks exon 3949522 3949685 1000 . . gene_id "CUFF.192485"; transcript_id "CUFF.192485.1"; exon_number "1"; FPKM "23.1879208220"; frac "1.000000"; conf_lo "6.791585"; conf_hi "39.584257"; cov "1.317073"; +chr5 Cufflinks transcript 68089694 68089831 1000 . . gene_id "CUFF.199409"; transcript_id "CUFF.199409.1"; FPKM "17.2229122047"; frac "1.000000"; conf_lo "1.818271"; conf_hi "32.627553"; cov "0.978261"; +chr5 Cufflinks exon 122819526 122819619 1000 . . gene_id "CUFF.205487"; transcript_id "CUFF.205487.1"; exon_number "1"; FPKM "25.2486782797"; frac "1.000000"; conf_lo "2.649470"; conf_hi "47.847887"; cov "1.434124"; +chr5 Cufflinks transcript 145619548 145619710 1000 . . gene_id "CUFF.209965"; transcript_id "CUFF.209965.1"; FPKM "40.8278115086"; frac "1.000000"; conf_lo "19.004428"; conf_hi "62.651195"; cov "2.319018"; +chr6 Cufflinks exon 83928984 83929105 1000 . . gene_id "CUFF.219317"; transcript_id "CUFF.219317.1"; exon_number "1"; FPKM "46.7559714935"; frac "1.000000"; conf_lo "19.761399"; conf_hi "73.750544"; cov "2.655738"; +chr6 Cufflinks exon 118857949 118858148 1000 . . gene_id "CUFF.223543"; transcript_id "CUFF.223543.1"; exon_number "1"; FPKM "19.0140950740"; frac "1.000000"; conf_lo "5.569100"; conf_hi "32.459091"; cov "1.080000"; +chr7 Cufflinks transcript 85554210 85554343 1000 . . gene_id "CUFF.235778"; transcript_id "CUFF.235778.1"; FPKM "17.7370289869"; frac "1.000000"; conf_lo "1.872548"; conf_hi "33.601510"; cov "1.007463"; +chr7 Cufflinks exon 104055491 104055589 1000 . . gene_id "CUFF.238474"; transcript_id "CUFF.238474.1"; exon_number "1"; FPKM "28.8092349606"; frac "1.000000"; conf_lo "5.286593"; conf_hi "52.331877"; cov "1.636364"; +chr8 Cufflinks exon 9970398 9970545 1000 . . gene_id "CUFF.245320"; transcript_id "CUFF.245320.1"; exon_number "1"; FPKM "22.4828826889"; frac "1.000000"; conf_lo "5.487421"; conf_hi "39.478345"; cov "1.277027"; +chr9 Cufflinks transcript 20449846 20449932 1000 . . gene_id "CUFF.260747"; transcript_id "CUFF.260747.1"; FPKM "234.9313045507"; frac "1.000000"; conf_lo "163.275950"; conf_hi "306.586659"; cov "13.344091"; +chr9 Cufflinks exon 107445870 107445930 1000 . . gene_id "CUFF.272761"; transcript_id "CUFF.272761.1"; exon_number "1"; FPKM "38.9633095779"; frac "1.000000"; conf_lo "4.113466"; conf_hi "73.813153"; cov "2.213115"; +chr9 Cufflinks transcript 120860476 120860606 1000 . . gene_id "CUFF.275115"; transcript_id "CUFF.275115.1"; FPKM "25.4005086867"; frac "1.000000"; conf_lo "6.199529"; conf_hi "44.601488"; cov "1.442748"; +chrX Cufflinks exon 10274057 10274087 1000 . . gene_id "CUFF.276147"; transcript_id "CUFF.276147.1"; exon_number "1"; FPKM "99.5432248142"; frac "1.000000"; conf_lo "21.405127"; conf_hi "177.681323"; cov "5.654052"; +chrX Cufflinks transcript 39881431 39881678 1000 . . gene_id "CUFF.277419"; transcript_id "CUFF.277419.1"; FPKM "42.1683560109"; frac "1.000000"; conf_lo "24.187709"; conf_hi "60.149003"; cov "2.395161"; +chrX Cufflinks transcript 148249672 148249713 1000 . . gene_id "CUFF.282847"; transcript_id "CUFF.282847.1"; FPKM "56.5895686726"; frac "1.000000"; conf_lo "5.974320"; conf_hi "107.204818"; cov "3.214286"; +chrX Cufflinks transcript 148481505 148482455 1000 + . gene_id "CUFF.282965"; transcript_id "CUFF.282965.1"; FPKM "40.1706233958"; frac "1.000000"; conf_lo "16.978103"; conf_hi "63.363144"; cov "2.281690"; diff -r e972d3f8413e -r b5391bd3e9c7 tool_conf.xml.sample --- a/tool_conf.xml.sample Thu May 13 15:35:41 2010 -0400 +++ b/tool_conf.xml.sample Fri May 14 10:15:48 2010 -0400 @@ -237,11 +237,14 @@ <tool file="sr_mapping/PerM.xml" /> </section> <section name="NGS: Expression Analysis" id="ngs-rna-tools"> - <tool file="ngs_rna/tophat_wrapper.xml" /> - <tool file="ngs_rna/cufflinks_wrapper.xml" /> - <tool file="ngs_rna/cuffcompare_wrapper.xml" /> - <tool file="ngs_rna/cuffdiff_wrapper.xml" /> - <tool file="ngs_rna/filter_transcripts_via_tracking.xml" /> + <label text="RNA-seq" id="rna_seq" /> + <tool file="ngs_rna/tophat_wrapper.xml" /> + <tool file="ngs_rna/cufflinks_wrapper.xml" /> + <tool file="ngs_rna/cuffcompare_wrapper.xml" /> + <tool file="ngs_rna/cuffdiff_wrapper.xml" /> + <label text="Filtering" id="filtering" /> + <tool file="ngs_rna/filter_transcripts_via_tracking.xml" /> + <tool file="ngs_rna/gff_filtering.xml" /> </section> <section name="NGS: SAM Tools" id="samtools"> <tool file="samtools/sam_bitwise_flag_filter.xml" /> diff -r e972d3f8413e -r b5391bd3e9c7 tools/ngs_rna/gff_filtering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/gff_filtering.py Fri May 14 10:15:48 2010 -0400 @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# This tool takes a gff file as input and creates filters on attributes based on certain properties. +# The tool will skip over invalid lines within the file, informing the user about the number of lines skipped. +# TODO: much of this code is copied from the Filter1 tool (filtering.py in this directory). The commonalities should be +# abstracted and leveraged in each filtering tool. + +from __future__ import division +import sys, re, os.path + +# Older py compatibility +try: + set() +except: + from sets import Set as set + +assert sys.version_info[:2] >= ( 2, 4 ) + +def get_operands( filter_condition ): + # Note that the order of all_operators is important + items_to_strip = ['+', '-', '**', '*', '//', '/', '%', '<<', '>>', '&', '|', '^', '~', '<=', '<', '>=', '>', '==', '!=', '<>', ' and ', ' or ', ' not ', ' is ', ' is not ', ' in ', ' not in '] + for item in items_to_strip: + if filter_condition.find( item ) >= 0: + filter_condition = filter_condition.replace( item, ' ' ) + operands = set( filter_condition.split( ' ' ) ) + return operands + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +in_fname = sys.argv[1] +out_fname = sys.argv[2] +attribute_type = sys.argv[3] +attribute_name = sys.argv[4] +cond_text = sys.argv[5] + +# Unescape if input has been escaped +mapped_str = { + '__lt__': '<', + '__le__': '<=', + '__eq__': '==', + '__ne__': '!=', + '__gt__': '>', + '__ge__': '>=', + '__sq__': '\'', + '__dq__': '"', +} +for key, value in mapped_str.items(): + cond_text = cond_text.replace( key, value ) + +# Add attribute name to condition text. +cond_text = attribute_name + cond_text + +# Attempt to determine if the condition includes executable stuff and, if so, exit +secured = dir() +operands = get_operands(cond_text) +for operand in operands: + try: + check = int( operand ) + except: + if operand in secured: + stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) ) + +# Set up assignment. +assignment = "%s = attributes[ '%s' ]" % ( attribute_name, attribute_name ) + +# Set up type casting based on attribute type. +type_cast = "%s = %s(%s)" % ( attribute_name, attribute_type, attribute_name) + +# Stats +skipped_lines = 0 +first_invalid_line = 0 +invalid_line = None +lines_kept = 0 +total_lines = 0 +out = open( out_fname, 'wt' ) + +# Read and filter input file, skipping invalid lines +code = ''' +for i, line in enumerate( file( in_fname ) ): + total_lines += 1 + line = line.rstrip( '\\r\\n' ) + if not line or line.startswith( '#' ): + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line + continue + try: + # GTF format: chrom source, name, chromStart, chromEnd, score, strand, frame, attributes. + # Attributes format: name1 "value1" ; name2 "value2" ; ... + elems = line.split( '\t' ) + attributes_list = elems[8].split(";") + attributes = {} + for name_value_pair in attributes_list: + pair = name_value_pair.strip().split(" ") + if pair == '': + continue + name = pair[0].strip() + if name == '': + continue + # Need to strip double quote from values + value = pair[1].strip(" \\"") + attributes[name] = value + %s + %s + if %s: + lines_kept += 1 + print >> out, line + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line +''' % ( assignment, type_cast, cond_text ) + +valid_filter = True +try: + exec code +except Exception, e: + out.close() + if str( e ).startswith( 'invalid syntax' ): + valid_filter = False + stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % cond_text ) + else: + stop_err( str( e ) ) + +if valid_filter: + out.close() + valid_lines = total_lines - skipped_lines + print 'Filtering with %s, ' % ( cond_text ) + if valid_lines > 0: + print 'kept %4.2f%% of %d lines.' % ( 100.0*lines_kept/valid_lines, total_lines ) + else: + print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % cond_text + if skipped_lines > 0: + print 'Skipped %d invalid lines starting at line #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) diff -r e972d3f8413e -r b5391bd3e9c7 tools/ngs_rna/gff_filtering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_rna/gff_filtering.xml Fri May 14 10:15:48 2010 -0400 @@ -0,0 +1,58 @@ +<tool id="gff_filtering" name="Filter GFF" version="0.1"> + <description>file on any attribute using simple expressions</description> + <command interpreter="python"> + gff_filtering.py $input $out_file1 "$attribute_type" "$attribute_name" "$cond" + </command> + <inputs> + <param format="gff" name="input" type="data" label="Filter" help="Query missing? See TIP below."/> + <param name="attribute_name" size="40" type="text" label="Attribute name" help=""> + <validator type="empty_field" message="Enter a valid attribute name."/> + </param> + <param name="attribute_type" type="select" label="Attribute type"> + <option value="float">Float</option> + <option value="integer">Integer</option> + <option value="str">String</option> + </param> + <param name="cond" size="40" type="text" value=">0" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool."> + <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/> + </param> + </inputs> + <outputs> + <data format="input" name="out_file1" metadata_source="input"/> + </outputs> + <tests> + <test> + <param name="input" value="gff_filtering_in1.gff"/> + <param name="attribute_name" value="conf_lo"/> + <param name="attribute_type" value="float"/> + <param name="cond" value=">0"/> + <output name="out_file1" file="gff_filtering_out1.gff"/> + </test> + </tests> + + <help> + +.. class:: warningmark + +Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**) + +.. class:: infomark + +**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the attribute being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings). If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition. The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue". + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**Syntax** + +The filter tool allows you to restrict the dataset using simple conditional statements. + +- Make sure that multi-character operators contain no white space ( e.g., **<=** is valid while **< =** is not valid ) +- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **attribute_name=='chr1'** ) +- Non-numerical values must be included in single or double quotes ( e.g., **attribute_name=='XX22'** ) + +</help> +</tool>