galaxy-commits
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- 15302 discussions
galaxy-dist commit 4f157f2c6fd9: Bug fix for history/view when history has no user.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Dan Blankenberg <dan(a)bx.psu.edu>
# Date 1279204138 14400
# Node ID 4f157f2c6fd9cc1b692e55fe38dd66243e881439
# Parent 9638200fbfdcc470675ca6b65f0220f28ed1809c
Bug fix for history/view when history has no user.
--- a/templates/history/view.mako
+++ b/templates/history/view.mako
@@ -68,8 +68,10 @@
<%
##TODO: is there a better way to create this URL? Can't use 'f-username' as a key b/c it's not a valid identifier.
href_to_published_histories = h.url_for( controller='/history', action='list_published')
- href_to_user_histories = h.url_for( controller='/history', action='list_published', xxx=history.user.username)
- href_to_user_histories = href_to_user_histories.replace( 'xxx', 'f-username')
+ if history.user is not None:
+ href_to_user_histories = h.url_for( controller='/history', action='list_published', xxx=history.user.username).replace( 'xxx', 'f-username')
+ else:
+ href_to_user_histories = h.url_for( controller='/history', action='list_published' )##should this instead be be None or empty string?
%><div class="unified-panel-header" unselectable="on">
1
0
galaxy-dist commit 9638200fbfdc: Fixes for rgManQQ - manhattan and qqplot generator for
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User fubar: ross Lazarus at gmail period com
# Date 1279184288 14400
# Node ID 9638200fbfdcc470675ca6b65f0220f28ed1809c
# Parent c0eb25264f919b2e7cac25b65483aa1ce4c6f54d
Fixes for rgManQQ - manhattan and qqplot generator for
interval pvalues
New qqplot image
Fixes to fix test output generators to only provide one set of pval columns
--- a/tools/rgenetics/rgManQQ.xml
+++ b/tools/rgenetics/rgManQQ.xml
@@ -38,13 +38,13 @@
<param name='i' value='smallwgaP.xls' ftype='tabular' ></param><param name='name' value='rgManQQtest1' />
- <param name='pval_col' value='5,7' />
+ <param name='pval_col' value='7' /><param name='chrom_col' value='1' /><param name='offset_col' value='2' /><param name='grey' value='0' /><output name='out_html' file='rgtestouts/rgManQQ/rgManQQtest1.html' ftype='html' lines_diff='17'><extra_files type="file" name='Allelep_manhattan.png' value='rgtestouts/rgManQQ/Allelep_manhattan.png' compare="sim_size"
- delta = "100" />
+ delta = "4000" /><extra_files type="file" name='Allelep_qqplot.png' value='rgtestouts/rgManQQ/Allelep_qqplot.png' compare="sim_size"
delta = "4000" /><extra_files type="file" name='Armitagep_manhattan.png' value='rgtestouts/rgManQQ/Armitagep_manhattan.png' compare="sim_size"
--- a/tools/rgenetics/rgtest.sh
+++ b/tools/rgenetics/rgtest.sh
@@ -35,7 +35,7 @@ TOOL="rgManQQ"
NPRE=${TOOL}test1
OUTPATH="$OROOT/$TOOL"
mkdir $OUTPATH
-CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0"
+CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0"
# rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col'
# '$pval_col'
#python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1
Binary file test-data/rgtestouts/rgManQQ/Allelep_qqplot.png has changed
--- a/tools/rgenetics/rgtest_one_tool.sh
+++ b/tools/rgenetics/rgtest_one_tool.sh
@@ -2,7 +2,7 @@
# script to generate all functional test outputs for each rgenetics tool
# could be run at installation to ensure all dependencies are in place?
case $# in 0) echo "USAGE: ${0##*/} TooltoTest galaxyRoot outRoot"; exit 1;;
- [1-3]*) echo "Need ToolToTest and paths for galaxyRoot outRoot as parameters"; exit 2;;
+ [1-2]*) echo "Need ToolToTest and paths for galaxyRoot outRoot as parameters"; exit 2;;
[5-10]*) echo "Too many arguments - ToolToTest and paths for galaxyRoot outRoot as parameters"; exit 2;;
*)
esac
@@ -22,13 +22,13 @@ TOOL="rgManQQ"
NPRE=${TOOL}test1
OUTPATH="$OROOT/$TOOL"
rm -rf $OUTPATH/*
-CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0"
+CL="python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0"
# rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col'
# '$pval_col'
#python /opt/galaxy/tools/rgenetics/rgManQQ.py /opt/galaxy/test-data/smallwgaP.xls rgManQQtest1
#/opt/galaxy/test-data/rgtestouts/rgManQQ/rgManQQtest1.html /opt/galaxy/test-data/rgtestouts/rgManQQ 1 2 5,7
echo "Testing $TOOL using $CL"
-python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 5,7 0
+python $TOOLPATH/$TOOL.py "$INPATH/smallwgaP.xls" $NPRE ${OUTPATH}/${NPRE}.html $OUTPATH 1 2 7 0
;;
'rgfakePhe')
1
0
galaxy-dist commit 14406c63e7bd: Update vcf_to_mafcustomtrack tool to enforce a minimum of one dataset to be selected.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Dan Blankenberg <dan(a)bx.psu.edu>
# Date 1279124304 14400
# Node ID 14406c63e7bd2569bdbb73af2949e7e020c22acd
# Parent 63db00b2f0a3ef973666c81b8cd4934ad36c5317
Update vcf_to_mafcustomtrack tool to enforce a minimum of one dataset to be selected.
--- a/tools/maf/vcf_to_maf_customtrack.xml
+++ b/tools/maf/vcf_to_maf_customtrack.xml
@@ -23,13 +23,13 @@
<option value="-s">Per Sample</option></param><when value="-p">
- <repeat name="vcf_file" title="VCF population file">
+ <repeat name="vcf_file" title="VCF population file" min="1"><param format="tabular" name="vcf_input" type="data" label="VCF file"/><param name="population_name" type="text" label="Name for this population" value=""/></repeat></when><when value="-s">
- <repeat name="vcf_file" title="VCF sample file">
+ <repeat name="vcf_file" title="VCF sample file" min="1"><param format="tabular" name="vcf_input" type="data" label="VCF file"/><!-- add column count validator >= 8? --></repeat>
1
0
galaxy-dist commit c0eb25264f91: Do not require annotation when editing visualization attributes.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User jeremy goecks <jeremy.goecks(a)emory.edu>
# Date 1279141279 14400
# Node ID c0eb25264f919b2e7cac25b65483aa1ce4c6f54d
# Parent 635d6ada4357d7daafb6862758acdf26f94af53f
Do not require annotation when editing visualization attributes.
--- a/lib/galaxy/web/controllers/visualization.py
+++ b/lib/galaxy/web/controllers/visualization.py
@@ -373,13 +373,12 @@ class VisualizationController( BaseContr
visualization_slug_err = "Visualization identifier must consist of only lowercase letters, numbers, and the '-' character"
elif visualization_slug != visualization.slug and trans.sa_session.query( model.Visualization ).filter_by( user=user, slug=visualization_slug, deleted=False ).first():
visualization_slug_err = "Visualization id must be unique"
- elif not visualization_annotation:
- visualization_annotation_err = "Visualization annotation is required"
else:
visualization.title = visualization_title
visualization.slug = visualization_slug
- visualization_annotation = sanitize_html( visualization_annotation, 'utf-8', 'text/html' )
- self.add_item_annotation( trans, visualization, visualization_annotation )
+ if visualization_annotation != "":
+ visualization_annotation = sanitize_html( visualization_annotation, 'utf-8', 'text/html' )
+ self.add_item_annotation( trans, visualization, visualization_annotation )
session.flush()
# Redirect to visualization list.
return trans.response.send_redirect( web.url_for( action='list' ) )
1
0
galaxy-dist commit 0bb17e66a560: Added Sscrofa9.58 to manual_builds.txt and removed phiX from builds.txt.sample (it's in manual_builds.txt)
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Kelly Vincent <kpvincent(a)bx.psu.edu>
# Date 1279209889 14400
# Node ID 0bb17e66a56098391e5bbdd34ca227b595ddf082
# Parent 4f157f2c6fd9cc1b692e55fe38dd66243e881439
Added Sscrofa9.58 to manual_builds.txt and removed phiX from builds.txt.sample (it's in manual_builds.txt)
--- a/tool-data/shared/ucsc/manual_builds.txt
+++ b/tool-data/shared/ucsc/manual_builds.txt
@@ -669,3 +669,4 @@ arabidopsis Arabidopsis thaliana TAIR9
arabidopsis_tair8 Arabidopsis thaliana TAIR8
araTha1 Arabidopsis thaliana TAIR7
mm5 Mouse May 2004 (mm5)
+Sscrofa9.58 Pig May 2010 (SGSC Sscrofa9.58)
--- a/tool-data/shared/ucsc/builds.txt.sample
+++ b/tool-data/shared/ucsc/builds.txt.sample
@@ -150,4 +150,3 @@ falciparum P. falciparum Plasmodium falc
sacCer2 S. cerevisiae June 2008 (SGD/sacCer2) (sacCer2)
sacCer1 S. cerevisiae Oct. 2003 (SGD/sacCer1) (sacCer1)
sc1 SARS coronavirus Apr. 2003 (GenBank Apr. 14 '03/sc1) (sc1)
-phiX phiX174 (phiX)
1
0
galaxy-dist commit 635d6ada4357: Various Bug fixes for libraries including deleting of or adding new versions to LibraryDatasets and for importing into a new LibraryDataset from a history.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Dan Blankenberg <dan(a)bx.psu.edu>
# Date 1279141069 14400
# Node ID 635d6ada4357d7daafb6862758acdf26f94af53f
# Parent 14406c63e7bd2569bdbb73af2949e7e020c22acd
Various Bug fixes for libraries including deleting of or adding new versions to LibraryDatasets and for importing into a new LibraryDataset from a history.
--- a/lib/galaxy/web/controllers/library_common.py
+++ b/lib/galaxy/web/controllers/library_common.py
@@ -1395,7 +1395,7 @@ class LibraryCommon( BaseController ):
else:
# Since permissions on all LibraryDatasetDatasetAssociations must be the same at this point, we only need
# to check one of them to see if the current user can manage permissions on them.
- check_ldda = trans.sa_session.query( trans.app.model.LibraryDatasetDatasetAssociation ).get( trans.security.decode_id( ldda_id_list[0] ) )
+ check_ldda = trans.sa_session.query( trans.app.model.LibraryDatasetDatasetAssociation ).get( ldda_id_list[0] )
if trans.app.security_agent.can_manage_library_item( current_user_roles, check_ldda ):
if replace_dataset:
default_action = ''
@@ -1526,7 +1526,7 @@ class LibraryCommon( BaseController ):
if params.get( 'edit_attributes_button', False ):
# Deny access if the user is not an admin and does not have the LIBRARY_MODIFY permission.
if not ( is_admin or trans.app.security_agent.can_modify_library_item( current_user_roles, library_dataset ) ):
- message = "You are not authorized to modify library dataset '%s'." % ldda.name
+ message = "You are not authorized to modify library dataset '%s'." % library_dataset.name
return trans.response.send_redirect( web.url_for( controller='library_common',
action='browse_library',
id=library_id,
@@ -1615,7 +1615,7 @@ class LibraryCommon( BaseController ):
trans.sa_session.refresh( library_dataset.library_dataset_dataset_association )
message = "Permisisons updated for library dataset '%s'." % library_dataset.name
status = 'done'
- roles = trans.app.security_agent.get_legitimate_roles( trans, library, cntrller )
+ roles = trans.app.security_agent.get_legitimate_roles( trans, library_dataset, cntrller )
return trans.fill_template( '/library/common/library_dataset_permissions.mako',
cntrller=cntrller,
use_panels=use_panels,
@@ -1768,7 +1768,7 @@ class LibraryCommon( BaseController ):
outext = 'tbz2'
elif action == 'ngxzip':
archive = NgxZip( trans.app.config.nginx_x_archive_files_base )
- except (OSError, zipfile.BadZipFile):
+ except (OSError, zipfile.BadZipfile):
error = True
log.exception( "Unable to create archive for download" )
message = "Unable to create archive for download, please report this error"
@@ -2279,7 +2279,7 @@ class LibraryCommon( BaseController ):
if not library_item or not ( is_admin or trans.app.security_agent.can_access_library_item( current_user_roles, library_item, trans.user ) ):
message = 'Invalid %s id ( %s ) specifield.' % ( item_desc, item_id )
status = 'error'
- elif not ( is_admin or trans.app.security_agent.can_modify_library_item( current_user_roles, item ) ):
+ elif not ( is_admin or trans.app.security_agent.can_modify_library_item( current_user_roles, library_item ) ):
message = "You are not authorized to delete %s '%s'." % ( item_desc, library_item.name )
status = 'error'
else:
@@ -2328,7 +2328,7 @@ class LibraryCommon( BaseController ):
elif library_item.purged:
message = '%s %s has been purged, so it cannot be undeleted' % ( item_desc, library_item.name )
status = ERROR
- elif not ( is_admin or trans.app.security_agent.can_modify_library_item( current_user_roles, item ) ):
+ elif not ( is_admin or trans.app.security_agent.can_modify_library_item( current_user_roles, library_item ) ):
message = "You are not authorized to delete %s '%s'." % ( item_desc, library_item.name )
status = 'error'
else:
--- a/lib/galaxy/security/__init__.py
+++ b/lib/galaxy/security/__init__.py
@@ -246,7 +246,7 @@ class GalaxyRBACAgent( RBACAgent ):
elif type( item ) == self.model.LibraryFolder:
return self.can_access_library( roles, item.parent_library ) and self.check_folder_contents( user, roles, item )[0]
elif type( item ) == self.model.LibraryDataset:
- return self.can_acess_library( roles, item.folder.parent_library ) and self.can_access_dataset( roles, item.library_dataset_dataset_association.dataset )
+ return self.can_access_library( roles, item.folder.parent_library ) and self.can_access_dataset( roles, item.library_dataset_dataset_association.dataset )
elif type( item ) == self.model.LibraryDatasetDatasetAssociation:
return self.can_access_library( roles, item.library_dataset.folder.parent_library ) and self.can_access_dataset( roles, item.dataset )
else:
1
0
galaxy-dist commit 3a6b81352293: New feature: GFF files can be viewed in trackster. Specific additions: (a) generalized bed-to-summary-tree converter and bed-to-interval-index converter to handle both BED and GFF files and renamed accordingly; (b) augmented trackster to provide payload data from both BED and GFF files.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User jeremy goecks <jeremy.goecks(a)emory.edu>
# Date 1279113986 14400
# Node ID 3a6b81352293854ff58970c19590ce14a218fb57
# Parent e2ba0e9c6852f2acba9d7119c09b703d3bc954be
New feature: GFF files can be viewed in trackster. Specific additions: (a) generalized bed-to-summary-tree converter and bed-to-interval-index converter to handle both BED and GFF files and renamed accordingly; (b) augmented trackster to provide payload data from both BED and GFF files.
--- a/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import division
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.interval_index_file import Indexes
-
-def main():
-
- input_fname = sys.argv[1]
- out_fname = sys.argv[2]
- index = Indexes()
- offset = 0
-
- for line in open(input_fname, "r"):
- feature = line.strip().split()
- if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
- offset += len(line)
- continue
- chrom = feature[0]
- chrom_start = int(feature[1])
- chrom_end = int(feature[2])
- index.add( chrom, chrom_start, chrom_end, offset )
- offset += len(line)
-
- index.write( open(out_fname, "w") )
-
-if __name__ == "__main__":
- main()
-
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml
@@ -0,0 +1,14 @@
+<tool id="CONVERTER_gff_to_summary_tree_0" name="Convert GFF to Summary Tree" version="1.0.0">
+<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+ <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1 --gff</command>
+ <inputs>
+ <page>
+ <param format="gff" name="input1" type="data" label="Choose GFF file"/>
+ </page>
+ </inputs>
+ <outputs>
+ <data format="summary_tree" name="output1"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
--- a/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.xml
+++ b/lib/galaxy/datatypes/converters/bed_to_interval_index_converter.xml
@@ -1,6 +1,6 @@
<tool id="CONVERTER_bed_to_interval_index_0" name="Convert BED to Interval Index" version="1.0.0"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
- <command interpreter="python">bed_to_interval_index_converter.py $input1 $output1</command>
+ <command interpreter="python">interval_to_interval_index_converter.py $input1 $output1</command><inputs><page><param format="bed" name="input1" type="data" label="Choose BED file"/>
--- a/datatypes_conf.xml.sample
+++ b/datatypes_conf.xml.sample
@@ -52,6 +52,8 @@
</datatype><datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"><converter file="gff_to_bed_converter.xml" target_datatype="bed"/>
+ <converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/>
+ <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/><display file="ensembl/ensembl_gff.xml" inherit="True"/></datatype><datatype extension="gff3" type="galaxy.datatypes.interval:Gff3" display_in_upload="true"/>
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+"""
+Convert from interval file to interval index file. Default input file format is BED (0-based, half-open intervals).
+
+usage: %prog in_file out_file
+ -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval
+"""
+
+from __future__ import division
+
+import sys, fileinput
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from galaxy.visualization.tracks.summary import *
+from bx.cookbook import doc_optparse
+from galaxy.tools.util.gff_util import convert_gff_coords_to_bed
+from bx.interval_index_file import Indexes
+
+def main():
+
+ # Read options, args.
+ options, args = doc_optparse.parse( __doc__ )
+ try:
+ gff_format = bool( options.gff )
+ input_fname, out_fname = args
+ except:
+ doc_optparse.exception()
+
+ # Do conversion.
+ # TODO: take column numbers from command line.
+ if gff_format:
+ chr_col, start_col, end_col = ( 0, 3, 4 )
+ else:
+ chr_col, start_col, end_col = ( 0, 1, 2 )
+ index = Indexes()
+ offset = 0
+ for line in open(input_fname, "r"):
+ feature = line.strip().split()
+ if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
+ offset += len(line)
+ continue
+ chrom = feature[ chr_col ]
+ chrom_start = int( feature[ start_col ] )
+ chrom_end = int( feature[ end_col ] )
+ if gff_format:
+ chrom_start, chrom_end = convert_gff_coords_to_bed( [chrom_start, chrom_end ] )
+ index.add( chrom, chrom_start, chrom_end, offset )
+ offset += len(line)
+
+ index.write( open(out_fname, "w") )
+
+if __name__ == "__main__":
+ main()
+
--- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml
+++ b/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml
@@ -1,6 +1,6 @@
<tool id="CONVERTER_bed_to_summary_tree_0" name="Convert BED to Summary Tree" version="1.0.0"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
- <command interpreter="python">bed_to_summary_tree_converter.py $input1 $output1</command>
+ <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1</command><inputs><page><param format="bed" name="input1" type="data" label="Choose BED file"/>
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+"""
+Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals).
+
+usage: %prog in_file out_file
+ -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval
+"""
+from __future__ import division
+
+import sys, fileinput
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from galaxy.visualization.tracks.summary import *
+from bx.intervals.io import *
+from bx.cookbook import doc_optparse
+from galaxy.tools.util.gff_util import GFFReaderWrapper
+
+def main():
+ # Read options, args.
+ options, args = doc_optparse.parse( __doc__ )
+ try:
+ gff_format = bool( options.gff )
+ input_fname, out_fname = args
+ except:
+ doc_optparse.exception()
+
+ # Do conversion.
+ # TODO: take column numbers from command line.
+ if gff_format:
+ reader_wrapper_class = GFFReaderWrapper
+ chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 )
+ else:
+ reader_wrapper_class = NiceReaderWrapper
+ chr_col, start_col, end_col, strand_col = ( 0, 1, 2, 5 )
+ reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ),
+ chrom_col=chr_col,
+ start_col=start_col,
+ end_col=end_col,
+ strand_col=strand_col,
+ fix_strand=True )
+ st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30)
+ for line in list( reader_wrapper ):
+ if type( line ) is GenomicInterval:
+ st.insert_range( line[ chr_col ], long( line[ start_col ] ), long( line[ end_col ] ) )
+
+ st.write(out_fname)
+
+if __name__ == "__main__":
+ main()
--- a/lib/galaxy/datatypes/interval.py
+++ b/lib/galaxy/datatypes/interval.py
@@ -772,6 +772,10 @@ class Gff( Tabular, _RemoteCallMixin ):
return True
except:
return False
+
+ def get_track_type( self ):
+ return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"}
+
class Gff3( Gff ):
"""Tab delimited data in Gff3 format"""
--- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import division
-
-import sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.visualization.tracks.summary import *
-from bx.arrays.bed import BedReader
-
-def main():
-
- input_fname = sys.argv[1]
- out_fname = sys.argv[2]
-
- reader = BedReader( open( input_fname ) )
-
- st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30)
- for chrom, chrom_start, chrom_end, name, score in reader:
- st.insert_range(chrom, chrom_start, chrom_end)
-
- st.write(out_fname)
-
-if __name__ == "__main__":
- main()
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/gff_to_interval_index_converter.xml
@@ -0,0 +1,14 @@
+<tool id="CONVERTER_gff_to_interval_index_0" name="Convert BED to Interval Index" version="1.0.0">
+<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+ <command interpreter="python">interval_to_interval_index_converter.py $input1 $output1 --gff</command>
+ <inputs>
+ <page>
+ <param format="gff" name="input1" type="data" label="Choose GFF file"/>
+ </page>
+ </inputs>
+ <outputs>
+ <data format="interval_index" name="output1"/>
+ </outputs>
+ <help>
+ </help>
+</tool>
--- a/lib/galaxy/visualization/tracks/data/interval_index.py
+++ b/lib/galaxy/visualization/tracks/data/interval_index.py
@@ -7,6 +7,7 @@ Payload format: [ uid (offset), start, e
import pkg_resources; pkg_resources.require( "bx-python" )
from bx.interval_index_file import Indexes
+from galaxy.datatypes.interval import Bed, Gff
class IntervalIndexDataProvider( object ):
def __init__( self, converted_dataset, original_dataset ):
@@ -24,22 +25,31 @@ class IntervalIndexDataProvider( object
source.seek(offset)
feature = source.readline().split()
payload = [ offset, start, end ]
+ # TODO: can we use column metadata to fill out payload?
if "no_detail" not in kwargs:
length = len(feature)
- if length >= 4:
- payload.append(feature[3]) # name
- if length >= 6: # strand
- payload.append(feature[5])
-
- if length >= 8:
- payload.append(int(feature[6]))
- payload.append(int(feature[7]))
+ if isinstance( self.original_dataset.datatype, Gff ):
+ # GFF dataset.
+ if length >= 3:
+ payload.append( feature[2] ) # name
+ if length >= 7:
+ payload.append( feature[6] ) # strand
+ elif isinstance( self.original_dataset.datatype, Bed ):
+ # BED dataset.
+ if length >= 4:
+ payload.append(feature[3]) # name
+ if length >= 6: # strand
+ payload.append(feature[5])
+
+ if length >= 8:
+ payload.append(int(feature[6]))
+ payload.append(int(feature[7]))
- if length >= 12:
- block_sizes = [ int(n) for n in feature[10].split(',') if n != '']
- block_starts = [ int(n) for n in feature[11].split(',') if n != '' ]
- blocks = zip(block_sizes, block_starts)
- payload.append( [ (start + block[1], start + block[1] + block[0]) for block in blocks] )
+ if length >= 12:
+ block_sizes = [ int(n) for n in feature[10].split(',') if n != '']
+ block_starts = [ int(n) for n in feature[11].split(',') if n != '' ]
+ blocks = zip(block_sizes, block_starts)
+ payload.append( [ (start + block[1], start + block[1] + block[0]) for block in blocks] )
results.append(payload)
1
0
galaxy-dist commit f31c41763836: Add image_path global variable to base_panels so that images can always have the right path. Fix image issues with trackster
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Kanwei Li <kanwei(a)gmail.com>
# Date 1279052547 14400
# Node ID f31c41763836603f42da474c3723d27143085cf8
# Parent 3225b99dd49318ac798fc6ffdc99282f89225e83
Add image_path global variable to base_panels so that images can always have the right path. Fix image issues with trackster
--- a/static/scripts/trackster.js
+++ b/static/scripts/trackster.js
@@ -17,22 +17,22 @@ var DENSITY = 200,
RIGHT_STRAND, LEFT_STRAND;
var right_img = new Image();
-right_img.src = "/static/images/visualization/strand_right.png";
+right_img.src = image_path + "/visualization/strand_right.png";
right_img.onload = function() {
RIGHT_STRAND = CONTEXT.createPattern(right_img, "repeat");
};
var left_img = new Image();
-left_img.src = "/static/images/visualization/strand_left.png";
+left_img.src = image_path + "/visualization/strand_left.png";
left_img.onload = function() {
LEFT_STRAND = CONTEXT.createPattern(left_img, "repeat");
};
var right_img_inv = new Image();
-right_img_inv.src = "/static/images/visualization/strand_right_inv.png";
+right_img_inv.src = image_path + "/visualization/strand_right_inv.png";
right_img_inv.onload = function() {
RIGHT_STRAND_INV = CONTEXT.createPattern(right_img_inv, "repeat");
};
var left_img_inv = new Image();
-left_img_inv.src = "/static/images/visualization/strand_left_inv.png";
+left_img_inv.src = image_path + "/visualization/strand_left_inv.png";
left_img_inv.onload = function() {
LEFT_STRAND_INV = CONTEXT.createPattern(left_img_inv, "repeat");
};
@@ -113,8 +113,8 @@ var View = function( container, chrom, t
if (this.vis_id !== undefined) {
this.hidden_input = $("<input/>").attr("type", "hidden").val(this.vis_id).appendTo(this.chrom_form);
}
- this.zi_link = $("<a/>").click(function() { view.zoom_in(); view.redraw() }).html('<img src="../images/fugue/magnifier-zoom.png" />').appendTo(this.chrom_form);
- this.zo_link = $("<a/>").click(function() { view.zoom_out(); view.redraw() }).html('<img src="../images/fugue/magnifier-zoom-out.png" />').appendTo(this.chrom_form);;
+ this.zi_link = $("<a/>").click(function() { view.zoom_in(); view.redraw() }).html('<img src="'+image_path+'/fugue/magnifier-zoom.png" />').appendTo(this.chrom_form);
+ this.zo_link = $("<a/>").click(function() { view.zoom_out(); view.redraw() }).html('<img src="'+image_path+'/fugue/magnifier-zoom-out.png" />').appendTo(this.chrom_form);;
$.ajax({
url: chrom_url,
@@ -230,7 +230,6 @@ var View = function( container, chrom, t
this.add_label_track( new LabelTrack( this, this.top_labeltrack ) );
this.add_label_track( new LabelTrack( this, this.nav_labeltrack ) );
-
},
move_delta: function(delta_chrom) {
var view = this;
@@ -296,7 +295,7 @@ var View = function( container, chrom, t
if (high > this.max_high) {
high = this.max_high;
}
- if (span < this.min_separation) {
+ if (this.high !== 0 && span < this.min_separation) {
high = low + this.min_separation;
}
this.low = Math.floor(low);
--- a/templates/visualization/display.mako
+++ b/templates/visualization/display.mako
@@ -8,16 +8,15 @@
<script type="text/javascript">
var view;
// To adjust the size of the viewport to fit the fixed-height footer
- var refresh = function( e ) {
+ var refresh = function() {
if (view !== undefined) {
view.viewport_container.height( $(window).height() - 100 );
view.nav_container.width( $("#center").width() );
view.redraw();
}
};
- $(window).bind( "resize", function(e) { refresh(e); } );
- $("#right-border").bind( "click dragend", function(e) { refresh(e); } );
- $(window).trigger( "resize" );
+ $(window).bind( "resize", function() { refresh(); } );
+ $("#right-border").bind( "click dragend", function() { refresh(); } );
</script></%def>
@@ -64,6 +63,6 @@
new ${track["track_type"]}( "${track['name'] | h}", view, ${track['dataset_id']}, ${track['prefs']} )
);
%endfor
-
+ $(window).trigger( "resize" );
</script></%def>
--- a/templates/base_panels.mako
+++ b/templates/base_panels.mako
@@ -41,6 +41,9 @@
<!--[if lt IE 7]>
${h.js( 'IE7', 'ie7-recalc' )}
<![endif]-->
+ <script type="text/javascript">
+ var image_path = '${h.url_for("/static/images")}';
+ </script>
${h.js( 'jquery' )}
</%def>
--- a/static/scripts/packed/trackster.js
+++ b/static/scripts/packed/trackster.js
@@ -1,1 +1,1 @@
-var DENSITY=200,FEATURE_LEVELS=10,DATA_ERROR="There was an error in indexing this dataset. ",DATA_NOCONVERTER="A converter for this dataset is not installed. Please check your datatypes_conf.xml file.",DATA_NONE="No data for this chrom/contig.",DATA_PENDING="Currently indexing... please wait",DATA_LOADING="Loading data...",CACHED_TILES_FEATURE=10,CACHED_TILES_LINE=30,CACHED_DATA=5,CONTEXT=$("<canvas></canvas>").get(0).getContext("2d"),PX_PER_CHAR=CONTEXT.measureText("A").width,RIGHT_STRAND,LEFT_STRAND;var right_img=new Image();right_img.src="/static/images/visualization/strand_right.png";right_img.onload=function(){RIGHT_STRAND=CONTEXT.createPattern(right_img,"repeat")};var left_img=new Image();left_img.src="/static/images/visualization/strand_left.png";left_img.onload=function(){LEFT_STRAND=CONTEXT.createPattern(left_img,"repeat")};var right_img_inv=new Image();right_img_inv.src="/static/images/visualization/strand_right_inv.png";right_img_inv.onload=function(){RIGHT_STRAND
_INV=CONTEXT.createPattern(right_img_inv,"repeat")};var left_img_inv=new Image();left_img_inv.src="/static/images/visualization/strand_left_inv.png";left_img_inv.onload=function(){LEFT_STRAND_INV=CONTEXT.createPattern(left_img_inv,"repeat")};var Cache=function(a){this.num_elements=a;this.clear()};$.extend(Cache.prototype,{get:function(b){var a=this.key_ary.indexOf(b);if(a!=-1){this.key_ary.splice(a,1);this.key_ary.push(b)}return this.obj_cache[b]},set:function(b,c){if(!this.obj_cache[b]){if(this.key_ary.length>=this.num_elements){var a=this.key_ary.shift();delete this.obj_cache[a]}this.key_ary.push(b)}this.obj_cache[b]=c;return c},clear:function(){this.obj_cache={};this.key_ary=[]}});var View=function(a,c,e,d,b){this.container=a;this.vis_id=d;this.dbkey=b;this.title=e;this.chrom=c;this.tracks=[];this.label_tracks=[];this.max_low=0;this.max_high=0;this.track_id_counter=0;this.zoom_factor=3;this.min_separation=30;this.has_changes=false;this.init();this.reset()};$.extend(View.p
rototype,{init:function(){var b=this.container,a=this;this.content_div=$("<div/>").addClass("content").css("position","relative").appendTo(b);this.top_labeltrack=$("<div/>").addClass("top-labeltrack").appendTo(this.content_div);this.viewport_container=$("<div/>").addClass("viewport-container").addClass("viewport-container").appendTo(this.content_div);this.viewport=$("<div/>").addClass("viewport").appendTo(this.viewport_container);this.nav_container=$("<div/>").addClass("nav-container").appendTo(b);this.nav_labeltrack=$("<div/>").addClass("nav-labeltrack").appendTo(this.nav_container);this.nav=$("<div/>").addClass("nav").appendTo(this.nav_container);this.overview=$("<div/>").addClass("overview").appendTo(this.nav);this.overview_viewport=$("<div/>").addClass("overview-viewport").appendTo(this.overview);this.overview_box=$("<div/>").addClass("overview-box").appendTo(this.overview_viewport);this.nav_controls=$("<div/>").addClass("nav-controls").appendTo(this.nav);this.chrom_form
=$("<form/>").attr("action",function(){void (0)}).appendTo(this.nav_controls);this.chrom_select=$("<select/>").attr({name:"chrom"}).css("width","15em").addClass("no-autocomplete").append("<option value=''>Loading</option>").appendTo(this.chrom_form);this.low_input=$("<input/>").addClass("low").css("width","10em").appendTo(this.chrom_form);$("<span/>").text(" - ").appendTo(this.chrom_form);this.high_input=$("<input/>").addClass("high").css("width","10em").appendTo(this.chrom_form);if(this.vis_id!==undefined){this.hidden_input=$("<input/>").attr("type","hidden").val(this.vis_id).appendTo(this.chrom_form)}this.zi_link=$("<a/>").click(function(){a.zoom_in();a.redraw()}).html('<img src="../images/fugue/magnifier-zoom.png" />').appendTo(this.chrom_form);this.zo_link=$("<a/>").click(function(){a.zoom_out();a.redraw()}).html('<img src="../images/fugue/magnifier-zoom-out.png" />').appendTo(this.chrom_form);$.ajax({url:chrom_url,data:(this.vis_id!==undefined?{vis_id:this.vis_id}:{dbke
y:this.dbkey}),dataType:"json",success:function(c){if(c.reference){a.add_label_track(new ReferenceTrack(a))}a.chrom_data=c.chrom_info;var e='<option value="">Select Chrom/Contig</option>';for(i in a.chrom_data){var d=a.chrom_data[i]["chrom"];e+='<option value="'+d+'">'+d+"</option>"}a.chrom_select.html(e);a.chrom_select.bind("change",function(){a.chrom=a.chrom_select.val();var g=$.grep(a.chrom_data,function(j,k){return j.chrom===a.chrom})[0];a.max_high=g.len;a.reset();a.redraw(true);for(var h in a.tracks){var f=a.tracks[h];if(f.init){f.init()}}a.redraw()})},error:function(){alert("Could not load chroms for this dbkey:",a.dbkey)}});this.content_div.bind("mousewheel",function(c,d){if(Math.abs(d)<0.5){return}if(d>0){a.zoom_in(c.pageX,this.viewport_container)}else{a.zoom_out()}c.preventDefault()});this.content_div.bind("dblclick",function(c){a.zoom_in(c.pageX,this.viewport_container)});this.overview_box.bind("dragstart",function(c){this.current_x=c.offsetX}).bind("drag",function
(c){var f=c.offsetX-this.current_x;this.current_x=c.offsetX;var d=Math.round(f/a.viewport_container.width()*(a.high-a.low));a.move_delta(-2*d)});this.viewport_container.bind("dragstart",function(c){this.original_low=a.low;this.current_height=c.clientY;this.current_x=c.offsetX}).bind("drag",function(f){var c=$(this);var h=f.offsetX-this.current_x;var d=c.scrollTop()-(f.clientY-this.current_height);if(d<c.get(0).scrollHeight-c.height()){c.scrollTop(d)}this.current_height=f.clientY;this.current_x=f.offsetX;var g=Math.round(h/a.viewport_container.width()*(a.high-a.low));a.move_delta(g)});this.top_labeltrack.bind("dragstart",function(c){this.drag_origin_x=c.clientX;this.drag_origin_pos=c.clientX/a.viewport_container.width()*(a.high-a.low)+a.low;this.drag_div=$("<div />").css({height:a.content_div.height(),top:"0px",position:"absolute","background-color":"#cfc",border:"1px solid #6a6",opacity:0.5}).appendTo($(this))}).bind("drag",function(h){var d=Math.min(h.clientX,this.drag_orig
in_x),c=Math.max(h.clientX,this.drag_origin_x),g=(a.high-a.low),f=a.viewport_container.width();a.low_input.val(commatize(Math.round(d/f*g)+a.low));a.high_input.val(commatize(Math.round(c/f*g)+a.low));this.drag_div.css({left:d+"px",width:(c-d)+"px"})}).bind("dragend",function(j){var d=Math.min(j.clientX,this.drag_origin_x),c=Math.max(j.clientX,this.drag_origin_x),g=(a.high-a.low),f=a.viewport_container.width(),h=a.low;a.low=Math.round(d/f*g)+h;a.high=Math.round(c/f*g)+h;this.drag_div.remove();a.redraw()});this.add_label_track(new LabelTrack(this,this.top_labeltrack));this.add_label_track(new LabelTrack(this,this.nav_labeltrack))},move_delta:function(c){var a=this;var b=a.high-a.low;if(a.low-c<a.max_low){a.low=a.max_low;a.high=a.max_low+b}else{if(a.high-c>a.max_high){a.high=a.max_high;a.low=a.max_high-b}else{a.high-=c;a.low-=c}}a.redraw()},add_track:function(a){a.view=this;a.track_id=this.track_id_counter;this.tracks.push(a);if(a.init){a.init()}a.container_div.attr("id","track
_"+a.track_id);this.track_id_counter+=1},add_label_track:function(a){a.view=this;this.label_tracks.push(a)},remove_track:function(a){this.has_changes=true;a.container_div.fadeOut("slow",function(){$(this).remove()});delete this.tracks[this.tracks.indexOf(a)]},update_options:function(){this.has_changes=true;var b=$("ul#sortable-ul").sortable("toArray");for(var c in b){var e=b[c].split("_li")[0].split("track_")[1];this.viewport.append($("#track_"+e))}for(var d in view.tracks){var a=view.tracks[d];if(a&&a.update_options){a.update_options(d)}}},reset:function(){this.low=this.max_low;this.high=this.max_high;this.viewport_container.find(".yaxislabel").remove()},redraw:function(f){var d=this.high-this.low,b=this.low,e=this.high;if(b<this.max_low){b=this.max_low}if(e>this.max_high){e=this.max_high}if(d<this.min_separation){e=b+this.min_separation}this.low=Math.floor(b);this.high=Math.ceil(e);this.resolution=Math.pow(10,Math.ceil(Math.log((this.high-this.low)/200)/Math.LN10));this.zo
om_res=Math.pow(FEATURE_LEVELS,Math.max(0,Math.ceil(Math.log(this.resolution,FEATURE_LEVELS)/Math.log(FEATURE_LEVELS))));this.overview_box.css({left:(this.low/(this.max_high-this.max_low))*this.overview_viewport.width(),width:Math.max(12,(this.high-this.low)/(this.max_high-this.max_low)*this.overview_viewport.width())}).show();this.low_input.val(commatize(this.low));this.high_input.val(commatize(this.high));if(!f){for(var c=0,a=this.tracks.length;c<a;c++){if(this.tracks[c]&&this.tracks[c].enabled){this.tracks[c].draw()}}for(var c=0,a=this.label_tracks.length;c<a;c++){this.label_tracks[c].draw()}}},zoom_in:function(b,c){if(this.max_high===0||this.high-this.low<this.min_separation){return}var d=this.high-this.low,e=d/2+this.low,a=(d/this.zoom_factor)/2;if(b){e=b/this.viewport_container.width()*(this.high-this.low)+this.low}this.low=Math.round(e-a);this.high=Math.round(e+a);this.redraw()},zoom_out:function(){if(this.max_high===0){return}var b=this.high-this.low,c=b/2+this.low,a
=(b*this.zoom_factor)/2;this.low=Math.round(c-a);this.high=Math.round(c+a);this.redraw()}});var Track=function(b,a,c){this.name=b;this.parent_element=c;this.view=a;this.init_global()};$.extend(Track.prototype,{init_global:function(){this.header_div=$("<div class='track-header'>").text(this.name);this.content_div=$("<div class='track-content'>");this.container_div=$("<div />").addClass("track").append(this.header_div).append(this.content_div);this.parent_element.append(this.container_div)},init_each:function(c,b){var a=this;a.enabled=false;a.data_queue={};a.tile_cache.clear();a.data_cache.clear();if(!a.content_div.text()){a.content_div.text(DATA_LOADING)}a.container_div.removeClass("nodata error pending");if(a.view.chrom){$.getJSON(data_url,c,function(d){if(!d||d==="error"||d.kind==="error"){a.container_div.addClass("error");a.content_div.text(DATA_ERROR);if(d.message){var f=a.view.tracks.indexOf(a);var e=$("<a href='javascript:void(0);'></a>").attr("id",f+"_error");e.text("C
lick to view error");$("#"+f+"_error").live("click",function(){show_modal("Trackster Error","<pre>"+d.message+"</pre>",{Close:hide_modal})});a.content_div.append(e)}}else{if(d==="no converter"){a.container_div.addClass("error");a.content_div.text(DATA_NOCONVERTER)}else{if(d.data!==undefined&&(d.data===null||d.data.length===0)){a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}else{if(d==="pending"){a.container_div.addClass("pending");a.content_div.text(DATA_PENDING);setTimeout(function(){a.init()},5000)}else{a.content_div.text("");a.content_div.css("height",a.height_px+"px");a.enabled=true;b(d);a.draw()}}}}})}else{a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}}});var TiledTrack=function(){this.left_offset=200};$.extend(TiledTrack.prototype,Track.prototype,{draw:function(){var j=this.view.low,e=this.view.high,f=e-j,d=this.view.resolution;var l=$("<div style='position: relative;'></div>"),m=this.content_div.width()/f,h;this.content_div.childre
n(":first").remove();this.content_div.append(l),this.max_height=0;var a=Math.floor(j/d/DENSITY);while((a*DENSITY*d)<e){var k=this.content_div.width()+"_"+m+"_"+a;var c=this.tile_cache.get(k);if(c){var g=a*DENSITY*d;var b=(g-j)*m;if(this.left_offset){b-=this.left_offset}c.css({left:b});l.append(c);this.max_height=Math.max(this.max_height,c.height());this.content_div.css("height",this.max_height+"px")}else{this.delayed_draw(this,k,j,e,a,d,l,m)}a+=1}},delayed_draw:function(c,e,a,f,b,d,g,h){setTimeout(function(){if(!(a>c.view.high||f<c.view.low)){tile_element=c.draw_tile(d,b,g,h);if(tile_element){c.tile_cache.set(e,tile_element);c.max_height=Math.max(c.max_height,tile_element.height());c.content_div.css("height",c.max_height+"px")}}},50)}});var LabelTrack=function(a,b){Track.call(this,null,a,b);this.track_type="LabelTrack";this.hidden=true;this.container_div.addClass("label-track")};$.extend(LabelTrack.prototype,Track.prototype,{draw:function(){var c=this.view,d=c.high-c.low,g=M
ath.floor(Math.pow(10,Math.floor(Math.log(d)/Math.log(10)))),a=Math.floor(c.low/g)*g,e=this.content_div.width(),b=$("<div style='position: relative; height: 1.3em;'></div>");while(a<c.high){var f=(a-c.low)/d*e;b.append($("<div class='label'>"+commatize(a)+"</div>").css({position:"absolute",left:f-1}));a+=g}this.content_div.children(":first").remove();this.content_div.append(b)}});var ReferenceTrack=function(a){this.track_type="ReferenceTrack";Track.call(this,null,a,a.nav_labeltrack);TiledTrack.call(this);this.hidden=true;this.height_px=12;this.container_div.addClass("reference-track");this.dummy_canvas=$("<canvas></canvas>").get(0).getContext("2d");this.data_queue={};this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE)};$.extend(ReferenceTrack.prototype,TiledTrack.prototype,{get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:reference_url,dataType:"json",data:{chrom:
this.view.chrom,low:a,high:f,dbkey:this.view.dbkey},success:function(g){c.data_cache.set(e,g);delete c.data_queue[e];c.draw()},error:function(h,g,j){console.log(h,g,j)}})}},draw_tile:function(f,b,k,o){var g=b*DENSITY*f,d=DENSITY*f,e=$("<canvas class='tile'></canvas>"),n=e.get(0).getContext("2d"),j=f+"_"+b;if(o>PX_PER_CHAR){if(this.data_cache.get(j)===undefined){this.get_data(f,b);return}var m=this.data_cache.get(j);if(m===null){return}e.get(0).width=Math.ceil(d*o+this.left_offset);e.get(0).height=this.height_px;e.css({position:"absolute",top:0,left:(g-this.view.low)*o+this.left_offset});for(var h=0,l=m.length;h<l;h++){var a=Math.round(h*o);n.fillText(m[h],a+this.left_offset,10)}k.append(e);return e}}});var LineTrack=function(d,b,a,c){this.track_type="LineTrack";Track.call(this,d,b,b.viewport_container);TiledTrack.call(this);this.height_px=100;this.dataset_id=a;this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE);this.prefs={min_value:undefined,
max_value:undefined,mode:"Line"};if(c.min_value!==undefined){this.prefs.min_value=c.min_value}if(c.max_value!==undefined){this.prefs.max_value=c.max_value}if(c.mode!==undefined){this.prefs.mode=c.mode}};$.extend(LineTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.tracks.indexOf(a);a.vertical_range=undefined;this.init_each({stats:true,chrom:a.view.chrom,low:null,high:null,dataset_id:a.dataset_id},function(c){a.container_div.addClass("line-track");data=c.data;if(isNaN(parseFloat(a.prefs.min_value))||isNaN(parseFloat(a.prefs.max_value))){a.prefs.min_value=data.min;a.prefs.max_value=data.max;$("#track_"+b+"_minval").val(a.prefs.min_value);$("#track_"+b+"_maxval").val(a.prefs.max_value)}a.vertical_range=a.prefs.max_value-a.prefs.min_value;a.total_frequency=data.total_frequency;$("#linetrack_"+b+"_minval").remove();$("#linetrack_"+b+"_maxval").remove();var e=$("<div />").addClass("yaxislabel").attr("id","linetrack_"+b+"_minval").text(a.prefs.min_value);va
r d=$("<div />").addClass("yaxislabel").attr("id","linetrack_"+b+"_maxval").text(a.prefs.max_value);d.css({position:"relative",top:"25px",left:"10px"});d.prependTo(a.container_div);e.css({position:"relative",top:a.height_px+55+"px",left:"10px"});e.prependTo(a.container_div)})},get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:data_url,dataType:"json",data:{chrom:this.view.chrom,low:a,high:f,dataset_id:this.dataset_id,resolution:this.view.resolution},success:function(g){data=g.data;c.data_cache.set(e,data);delete c.data_queue[e];c.draw()},error:function(h,g,j){console.log(h,g,j)}})}},draw_tile:function(p,r,c,e){if(this.vertical_range===undefined){return}var s=r*DENSITY*p,a=DENSITY*p,b=$("<canvas class='tile'></canvas>"),v=p+"_"+r;if(this.data_cache.get(v)===undefined){this.get_data(p,r);return}var j=this.data_cache.get(v);if(j===null){return}b.css({position:"absolute",top:0,left:(s-this.view.low)*
e});b.get(0).width=Math.ceil(a*e+this.left_offset);b.get(0).height=this.height_px;var o=b.get(0).getContext("2d"),k=false,l=this.prefs.min_value,g=this.prefs.max_value,n=this.vertical_range,t=this.total_frequency,d=this.height_px,m=this.prefs.mode;o.beginPath();if(data.length>1){var f=Math.ceil((data[1][0]-data[0][0])*e)}else{var f=10}var u,h;for(var q=0;q<data.length;q++){u=(data[q][0]-s)*e;h=data[q][1];if(m=="Intensity"){if(h===null){continue}if(h<=l){h=l}else{if(h>=g){h=g}}h=255-Math.floor((h-l)/n*255);o.fillStyle="rgb("+h+","+h+","+h+")";o.fillRect(u,0,f,this.height_px)}else{if(h===null){if(k&&m==="Filled"){o.lineTo(u,d)}k=false;continue}else{if(h<=l){h=l}else{if(h>=g){h=g}}h=Math.round(d-(h-l)/n*d);if(k){o.lineTo(u,h)}else{k=true;if(m==="Filled"){o.moveTo(u,d);o.lineTo(u,h)}else{o.moveTo(u,h)}}}}}if(m==="Filled"){if(k){o.lineTo(u,d)}o.fill()}else{o.stroke()}c.append(b);return b},gen_options:function(o){var a=$("<div />").addClass("form-row");var h="track_"+o+"_minval",m
=$("<label></label>").attr("for",h).text("Min value:"),b=(this.prefs.min_value===undefined?"":this.prefs.min_value),n=$("<input></input>").attr("id",h).val(b),l="track_"+o+"_maxval",g=$("<label></label>").attr("for",l).text("Max value:"),k=(this.prefs.max_value===undefined?"":this.prefs.max_value),f=$("<input></input>").attr("id",l).val(k),e="track_"+o+"_mode",d=$("<label></label>").attr("for",e).text("Display mode:"),j=(this.prefs.mode===undefined?"Line":this.prefs.mode),c=$('<select id="'+e+'"><option value="Line" id="mode_Line">Line</option><option value="Filled" id="mode_Filled">Filled</option><option value="Intensity" id="mode_Intensity">Intensity</option></select>');c.children("#mode_"+j).attr("selected","selected");return a.append(m).append(n).append(g).append(f).append(d).append(c)},update_options:function(d){var a=$("#track_"+d+"_minval").val(),c=$("#track_"+d+"_maxval").val(),b=$("#track_"+d+"_mode option:selected").val();if(a!==this.prefs.min_value||c!==this.prefs
.max_value||b!==this.prefs.mode){this.prefs.min_value=parseFloat(a);this.prefs.max_value=parseFloat(c);this.prefs.mode=b;this.vertical_range=this.prefs.max_value-this.prefs.min_value;$("#linetrack_"+d+"_minval").text(this.prefs.min_value);$("#linetrack_"+d+"_maxval").text(this.prefs.max_value);this.tile_cache.clear();this.draw()}}});var FeatureTrack=function(d,b,a,c){this.track_type="FeatureTrack";Track.call(this,d,b,b.viewport_container);TiledTrack.call(this);this.height_px=0;this.container_div.addClass("feature-track");this.dataset_id=a;this.zo_slots={};this.show_labels_scale=0.001;this.showing_details=false;this.vertical_detail_px=10;this.vertical_nodetail_px=3;this.default_font="9px Monaco, Lucida Console, monospace";this.inc_slots={};this.data_queue={};this.s_e_by_tile={};this.tile_cache=new Cache(CACHED_TILES_FEATURE);this.data_cache=new Cache(20);this.prefs={block_color:"black",label_color:"black",show_counts:false};if(c.block_color!==undefined){this.prefs.block_color
=c.block_color}if(c.label_color!==undefined){this.prefs.label_color=c.label_color}if(c.show_counts!==undefined){this.prefs.show_counts=c.show_counts}};$.extend(FeatureTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.max_low+"_"+a.view.max_high;a.mode="Auto";if(a.mode_div){a.mode_div.remove()}this.init_each({low:a.view.max_low,high:a.view.max_high,dataset_id:a.dataset_id,chrom:a.view.chrom,resolution:this.view.resolution},function(d){a.mode_div=$("<div class='right-float menubutton popup' />").text("Display Mode");a.header_div.append(a.mode_div);a.mode="Auto";var c=function(e){a.mode_div.text(e);a.mode=e;a.tile_cache.clear();a.draw()};make_popupmenu(a.mode_div,{Auto:function(){c("Auto")},Dense:function(){c("Dense")},Squish:function(){c("Squish")},Pack:function(){c("Pack")}});a.data_cache.set(b,d);a.draw()})},get_data:function(a,d){var b=this,c=a+"_"+d;if(!b.data_queue[c]){b.data_queue[c]=true;$.getJSON(data_url,{chrom:b.view.chrom,low:a,high:d,dataset
_id:b.dataset_id,resolution:this.view.resolution,mode:this.mode},function(e){b.data_cache.set(c,e);delete b.data_queue[c];b.draw()})}},incremental_slots:function(a,h,c,r){if(!this.inc_slots[a]){this.inc_slots[a]={};this.inc_slots[a].w_scale=1/a;this.inc_slots[a].mode=r;this.s_e_by_tile[a]={}}var n=this.inc_slots[a].w_scale,z=[],l=0,b=$("<canvas></canvas>").get(0).getContext("2d"),o=this.view.max_low;var B=[];if(this.inc_slots[a].mode!==r){delete this.inc_slots[a];this.inc_slots[a]={mode:r,w_scale:n};delete this.s_e_by_tile[a];this.s_e_by_tile[a]={}}for(var w=0,x=h.length;w<x;w++){var g=h[w],m=g[0];if(this.inc_slots[a][m]!==undefined){l=Math.max(l,this.inc_slots[a][m]);B.push(this.inc_slots[a][m])}else{z.push(w)}}for(var w=0,x=z.length;w<x;w++){var g=h[z[w]],m=g[0],s=g[1],d=g[2],q=g[3],e=Math.floor((s-o)*n),f=Math.ceil((d-o)*n);if(q!==undefined&&!c){var t=b.measureText(q).width;if(e-t<0){f+=t}else{e-=t}}var v=0;while(true){var p=true;if(this.s_e_by_tile[a][v]!==undefined){for
(var u=0,A=this.s_e_by_tile[a][v].length;u<A;u++){var y=this.s_e_by_tile[a][v][u];if(f>y[0]&&e<y[1]){p=false;break}}}if(p){if(this.s_e_by_tile[a][v]===undefined){this.s_e_by_tile[a][v]=[]}this.s_e_by_tile[a][v].push([e,f]);this.inc_slots[a][m]=v;l=Math.max(l,v);break}v++}}return l},rect_or_text:function(n,o,f,m,b,d,k,e,h){n.textAlign="center";var j=Math.round(o/2);if((this.mode==="Pack"||this.mode==="Auto")&&d!==undefined&&o>PX_PER_CHAR){n.fillStyle=this.prefs.block_color;n.fillRect(k,h+1,e,9);n.fillStyle="#eee";for(var g=0,l=d.length;g<l;g++){if(b+g>=f&&b+g<=m){var a=Math.floor(Math.max(0,(b+g-f)*o));n.fillText(d[g],a+this.left_offset+j,h+9)}}}else{n.fillStyle=this.prefs.block_color;n.fillRect(k,h+4,e,3)}},draw_tile:function(X,h,n,ak){var E=h*DENSITY*X,ad=(h+1)*DENSITY*X,D=DENSITY*X;var ae=E+"_"+ad;var z=this.data_cache.get(ae);if(z===undefined){this.data_queue[[E,ad]]=true;this.get_data(E,ad);return}var a=Math.ceil(D*ak),L=$("<canvas class='tile'></canvas>"),Z=this.prefs.l
abel_color,f=this.prefs.block_color,m=this.mode,V=(m==="Squish")||(m==="Dense")&&(m!=="Pack")||(m==="Auto"&&(z.extra_info==="no_detail")),P=this.left_offset,aj,s,al;if(z.dataset_type==="summary_tree"){s=30}else{if(m==="Dense"){s=15;al=10}else{al=(V?this.vertical_nodetail_px:this.vertical_detail_px);s=this.incremental_slots(this.view.zoom_res,z.data,V,m)*al+15;aj=this.inc_slots[this.view.zoom_res]}}L.css({position:"absolute",top:0,left:(E-this.view.low)*ak-P});L.get(0).width=a+P;L.get(0).height=s;n.parent().css("height",Math.max(this.height_px,s)+"px");var A=L.get(0).getContext("2d");A.fillStyle=f;A.font=this.default_font;A.textAlign="right";if(z.dataset_type=="summary_tree"){var K,H=55,ac=255-H,g=ac*2/3,R=z.data,C=z.max,l=z.avg;if(R.length>2){var b=Math.ceil((R[1][0]-R[0][0])*ak)}else{var b=50}for(var ag=0,w=R.length;ag<w;ag++){var T=Math.ceil((R[ag][0]-E)*ak);var S=R[ag][1];if(!S){continue}K=Math.floor(ac-(S/C)*ac);A.fillStyle="rgb("+K+","+K+","+K+")";A.fillRect(T+P,0,b,20)
;if(this.prefs.show_counts){if(K>g){A.fillStyle="black"}else{A.fillStyle="#ddd"}A.textAlign="center";A.fillText(R[ag][1],T+P+(b/2),12)}}n.append(L);return L}var ai=z.data;var af=0;for(var ag=0,w=ai.length;ag<w;ag++){var M=ai[ag],J=M[0],ah=M[1],U=M[2],F=M[3];if(ah<=ad&&U>=E){var W=Math.floor(Math.max(0,(ah-E)*ak)),B=Math.ceil(Math.min(a,Math.max(0,(U-E)*ak))),Q=(m==="Dense"?0:aj[J]*al);if(z.dataset_type==="bai"){A.fillStyle=f;if(M[4] instanceof Array){var t=Math.floor(Math.max(0,(M[4][0]-E)*ak)),I=Math.ceil(Math.min(a,Math.max(0,(M[4][1]-E)*ak))),r=Math.floor(Math.max(0,(M[5][0]-E)*ak)),p=Math.ceil(Math.min(a,Math.max(0,(M[5][1]-E)*ak)));if(M[4][1]>=E&&M[4][0]<=ad){this.rect_or_text(A,ak,E,ad,M[4][0],M[4][2],t+P,I-t,Q)}if(M[5][1]>=E&&M[5][0]<=ad){this.rect_or_text(A,ak,E,ad,M[5][0],M[5][2],r+P,p-r,Q)}if(r>I){A.fillStyle="#999";A.fillRect(I+P,Q+5,r-I,1)}}else{A.fillStyle=f;this.rect_or_text(A,ak,E,ad,ah,F,W+P,B-W,Q)}if(m!=="Dense"&&!V&&ah>E){A.fillStyle=this.prefs.label_color;
if(h===0&&W-A.measureText(F).width<0){A.textAlign="left";A.fillText(J,B+2+P,Q+8)}else{A.textAlign="right";A.fillText(J,W-2+P,Q+8)}A.fillStyle=f}}else{if(z.dataset_type==="interval_index"){if(V){A.fillRect(W+P,Q+5,B-W,1)}else{var v=M[4],O=M[5],Y=M[6],e=M[7];var u,aa,G=null,am=null;if(O&&Y){G=Math.floor(Math.max(0,(O-E)*ak));am=Math.ceil(Math.min(a,Math.max(0,(Y-E)*ak)))}if(m!=="Dense"&&F!==undefined&&ah>E){A.fillStyle=Z;if(h===0&&W-A.measureText(F).width<0){A.textAlign="left";A.fillText(F,B+2+P,Q+8)}else{A.textAlign="right";A.fillText(F,W-2+P,Q+8)}A.fillStyle=f}if(e){if(v){if(v=="+"){A.fillStyle=RIGHT_STRAND}else{if(v=="-"){A.fillStyle=LEFT_STRAND}}A.fillRect(W+P,Q,B-W,10);A.fillStyle=f}for(var ae=0,d=e.length;ae<d;ae++){var o=e[ae],c=Math.floor(Math.max(0,(o[0]-E)*ak)),N=Math.ceil(Math.min(a,Math.max((o[1]-E)*ak)));if(c>N){continue}u=5;aa=3;A.fillRect(c+P,Q+aa,N-c,u);if(G!==undefined&&!(c>am||N<G)){u=9;aa=1;var ab=Math.max(c,G),q=Math.min(N,am);A.fillRect(ab+P,Q+aa,q-ab,u)}}
}else{u=9;aa=1;A.fillRect(W+P,Q+aa,B-W,u);if(M.strand){if(M.strand=="+"){A.fillStyle=RIGHT_STRAND_INV}else{if(M.strand=="-"){A.fillStyle=LEFT_STRAND_INV}}A.fillRect(W+P,Q,B-W,10);A.fillStyle=prefs.block_color}}}}}af++}}n.append(L);return L},gen_options:function(j){var a=$("<div />").addClass("form-row");var e="track_"+j+"_block_color",l=$("<label />").attr("for",e).text("Block color:"),m=$("<input />").attr("id",e).attr("name",e).val(this.prefs.block_color),k="track_"+j+"_label_color",g=$("<label />").attr("for",k).text("Text color:"),h=$("<input />").attr("id",k).attr("name",k).val(this.prefs.label_color),f="track_"+j+"_show_count",c=$("<label />").attr("for",f).text("Show summary counts"),b=$('<input type="checkbox" style="float:left;"></input>').attr("id",f).attr("name",f).attr("checked",this.prefs.show_counts),d=$("<div />").append(b).append(c);return a.append(l).append(m).append(g).append(h).append(d)},update_options:function(e){var b=$("#track_"+e+"_block_color").val()
,d=$("#track_"+e+"_label_color").val(),c=$("#track_"+e+"_mode option:selected").val(),a=$("#track_"+e+"_show_count").attr("checked");if(b!==this.prefs.block_color||d!==this.prefs.label_color||a!==this.prefs.show_counts){this.prefs.block_color=b;this.prefs.label_color=d;this.prefs.show_counts=a;this.tile_cache.clear();this.draw()}}});var ReadTrack=function(d,b,a,c){FeatureTrack.call(this,d,b,a,c);this.track_type="ReadTrack";this.vertical_detail_px=10;this.vertical_nodetail_px=5};$.extend(ReadTrack.prototype,TiledTrack.prototype,FeatureTrack.prototype,{});
+var DENSITY=200,FEATURE_LEVELS=10,DATA_ERROR="There was an error in indexing this dataset. ",DATA_NOCONVERTER="A converter for this dataset is not installed. Please check your datatypes_conf.xml file.",DATA_NONE="No data for this chrom/contig.",DATA_PENDING="Currently indexing... please wait",DATA_LOADING="Loading data...",CACHED_TILES_FEATURE=10,CACHED_TILES_LINE=30,CACHED_DATA=5,CONTEXT=$("<canvas></canvas>").get(0).getContext("2d"),PX_PER_CHAR=CONTEXT.measureText("A").width,RIGHT_STRAND,LEFT_STRAND;var right_img=new Image();right_img.src=image_path+"/visualization/strand_right.png";right_img.onload=function(){RIGHT_STRAND=CONTEXT.createPattern(right_img,"repeat")};var left_img=new Image();left_img.src=image_path+"/visualization/strand_left.png";left_img.onload=function(){LEFT_STRAND=CONTEXT.createPattern(left_img,"repeat")};var right_img_inv=new Image();right_img_inv.src=image_path+"/visualization/strand_right_inv.png";right_img_inv.onload=function(){RIGHT_STRAND_INV=CONT
EXT.createPattern(right_img_inv,"repeat")};var left_img_inv=new Image();left_img_inv.src=image_path+"/visualization/strand_left_inv.png";left_img_inv.onload=function(){LEFT_STRAND_INV=CONTEXT.createPattern(left_img_inv,"repeat")};var Cache=function(a){this.num_elements=a;this.clear()};$.extend(Cache.prototype,{get:function(b){var a=this.key_ary.indexOf(b);if(a!=-1){this.key_ary.splice(a,1);this.key_ary.push(b)}return this.obj_cache[b]},set:function(b,c){if(!this.obj_cache[b]){if(this.key_ary.length>=this.num_elements){var a=this.key_ary.shift();delete this.obj_cache[a]}this.key_ary.push(b)}this.obj_cache[b]=c;return c},clear:function(){this.obj_cache={};this.key_ary=[]}});var View=function(a,c,e,d,b){this.container=a;this.vis_id=d;this.dbkey=b;this.title=e;this.chrom=c;this.tracks=[];this.label_tracks=[];this.max_low=0;this.max_high=0;this.track_id_counter=0;this.zoom_factor=3;this.min_separation=30;this.has_changes=false;this.init();this.reset()};$.extend(View.prototype,{in
it:function(){var b=this.container,a=this;this.content_div=$("<div/>").addClass("content").css("position","relative").appendTo(b);this.top_labeltrack=$("<div/>").addClass("top-labeltrack").appendTo(this.content_div);this.viewport_container=$("<div/>").addClass("viewport-container").addClass("viewport-container").appendTo(this.content_div);this.viewport=$("<div/>").addClass("viewport").appendTo(this.viewport_container);this.nav_container=$("<div/>").addClass("nav-container").appendTo(b);this.nav_labeltrack=$("<div/>").addClass("nav-labeltrack").appendTo(this.nav_container);this.nav=$("<div/>").addClass("nav").appendTo(this.nav_container);this.overview=$("<div/>").addClass("overview").appendTo(this.nav);this.overview_viewport=$("<div/>").addClass("overview-viewport").appendTo(this.overview);this.overview_box=$("<div/>").addClass("overview-box").appendTo(this.overview_viewport);this.nav_controls=$("<div/>").addClass("nav-controls").appendTo(this.nav);this.chrom_form=$("<form/>"
).attr("action",function(){void (0)}).appendTo(this.nav_controls);this.chrom_select=$("<select/>").attr({name:"chrom"}).css("width","15em").addClass("no-autocomplete").append("<option value=''>Loading</option>").appendTo(this.chrom_form);this.low_input=$("<input/>").addClass("low").css("width","10em").appendTo(this.chrom_form);$("<span/>").text(" - ").appendTo(this.chrom_form);this.high_input=$("<input/>").addClass("high").css("width","10em").appendTo(this.chrom_form);if(this.vis_id!==undefined){this.hidden_input=$("<input/>").attr("type","hidden").val(this.vis_id).appendTo(this.chrom_form)}this.zi_link=$("<a/>").click(function(){a.zoom_in();a.redraw()}).html('<img src="'+image_path+'/fugue/magnifier-zoom.png" />').appendTo(this.chrom_form);this.zo_link=$("<a/>").click(function(){a.zoom_out();a.redraw()}).html('<img src="'+image_path+'/fugue/magnifier-zoom-out.png" />').appendTo(this.chrom_form);$.ajax({url:chrom_url,data:(this.vis_id!==undefined?{vis_id:this.vis_id}:{dbkey:
this.dbkey}),dataType:"json",success:function(c){if(c.reference){a.add_label_track(new ReferenceTrack(a))}a.chrom_data=c.chrom_info;var e='<option value="">Select Chrom/Contig</option>';for(i in a.chrom_data){var d=a.chrom_data[i]["chrom"];e+='<option value="'+d+'">'+d+"</option>"}a.chrom_select.html(e);a.chrom_select.bind("change",function(){a.chrom=a.chrom_select.val();var g=$.grep(a.chrom_data,function(j,k){return j.chrom===a.chrom})[0];a.max_high=g.len;a.reset();a.redraw(true);for(var h in a.tracks){var f=a.tracks[h];if(f.init){f.init()}}a.redraw()})},error:function(){alert("Could not load chroms for this dbkey:",a.dbkey)}});this.content_div.bind("mousewheel",function(c,d){if(Math.abs(d)<0.5){return}if(d>0){a.zoom_in(c.pageX,this.viewport_container)}else{a.zoom_out()}c.preventDefault()});this.content_div.bind("dblclick",function(c){a.zoom_in(c.pageX,this.viewport_container)});this.overview_box.bind("dragstart",function(c){this.current_x=c.offsetX}).bind("drag",function(c
){var f=c.offsetX-this.current_x;this.current_x=c.offsetX;var d=Math.round(f/a.viewport_container.width()*(a.high-a.low));a.move_delta(-2*d)});this.viewport_container.bind("dragstart",function(c){this.original_low=a.low;this.current_height=c.clientY;this.current_x=c.offsetX}).bind("drag",function(f){var c=$(this);var h=f.offsetX-this.current_x;var d=c.scrollTop()-(f.clientY-this.current_height);if(d<c.get(0).scrollHeight-c.height()){c.scrollTop(d)}this.current_height=f.clientY;this.current_x=f.offsetX;var g=Math.round(h/a.viewport_container.width()*(a.high-a.low));a.move_delta(g)});this.top_labeltrack.bind("dragstart",function(c){this.drag_origin_x=c.clientX;this.drag_origin_pos=c.clientX/a.viewport_container.width()*(a.high-a.low)+a.low;this.drag_div=$("<div />").css({height:a.content_div.height(),top:"0px",position:"absolute","background-color":"#cfc",border:"1px solid #6a6",opacity:0.5}).appendTo($(this))}).bind("drag",function(h){var d=Math.min(h.clientX,this.drag_origin
_x),c=Math.max(h.clientX,this.drag_origin_x),g=(a.high-a.low),f=a.viewport_container.width();a.low_input.val(commatize(Math.round(d/f*g)+a.low));a.high_input.val(commatize(Math.round(c/f*g)+a.low));this.drag_div.css({left:d+"px",width:(c-d)+"px"})}).bind("dragend",function(j){var d=Math.min(j.clientX,this.drag_origin_x),c=Math.max(j.clientX,this.drag_origin_x),g=(a.high-a.low),f=a.viewport_container.width(),h=a.low;a.low=Math.round(d/f*g)+h;a.high=Math.round(c/f*g)+h;this.drag_div.remove();a.redraw()});this.add_label_track(new LabelTrack(this,this.top_labeltrack));this.add_label_track(new LabelTrack(this,this.nav_labeltrack))},move_delta:function(c){var a=this;var b=a.high-a.low;if(a.low-c<a.max_low){a.low=a.max_low;a.high=a.max_low+b}else{if(a.high-c>a.max_high){a.high=a.max_high;a.low=a.max_high-b}else{a.high-=c;a.low-=c}}a.redraw()},add_track:function(a){a.view=this;a.track_id=this.track_id_counter;this.tracks.push(a);if(a.init){a.init()}a.container_div.attr("id","track_"
+a.track_id);this.track_id_counter+=1},add_label_track:function(a){a.view=this;this.label_tracks.push(a)},remove_track:function(a){this.has_changes=true;a.container_div.fadeOut("slow",function(){$(this).remove()});delete this.tracks[this.tracks.indexOf(a)]},update_options:function(){this.has_changes=true;var b=$("ul#sortable-ul").sortable("toArray");for(var c in b){var e=b[c].split("_li")[0].split("track_")[1];this.viewport.append($("#track_"+e))}for(var d in view.tracks){var a=view.tracks[d];if(a&&a.update_options){a.update_options(d)}}},reset:function(){this.low=this.max_low;this.high=this.max_high;this.viewport_container.find(".yaxislabel").remove()},redraw:function(f){var d=this.high-this.low,b=this.low,e=this.high;if(b<this.max_low){b=this.max_low}if(e>this.max_high){e=this.max_high}if(this.high!==0&&d<this.min_separation){e=b+this.min_separation}this.low=Math.floor(b);this.high=Math.ceil(e);this.resolution=Math.pow(10,Math.ceil(Math.log((this.high-this.low)/200)/Math.L
N10));this.zoom_res=Math.pow(FEATURE_LEVELS,Math.max(0,Math.ceil(Math.log(this.resolution,FEATURE_LEVELS)/Math.log(FEATURE_LEVELS))));this.overview_box.css({left:(this.low/(this.max_high-this.max_low))*this.overview_viewport.width(),width:Math.max(12,(this.high-this.low)/(this.max_high-this.max_low)*this.overview_viewport.width())}).show();this.low_input.val(commatize(this.low));this.high_input.val(commatize(this.high));if(!f){for(var c=0,a=this.tracks.length;c<a;c++){if(this.tracks[c]&&this.tracks[c].enabled){this.tracks[c].draw()}}for(var c=0,a=this.label_tracks.length;c<a;c++){this.label_tracks[c].draw()}}},zoom_in:function(b,c){if(this.max_high===0||this.high-this.low<this.min_separation){return}var d=this.high-this.low,e=d/2+this.low,a=(d/this.zoom_factor)/2;if(b){e=b/this.viewport_container.width()*(this.high-this.low)+this.low}this.low=Math.round(e-a);this.high=Math.round(e+a);this.redraw()},zoom_out:function(){if(this.max_high===0){return}var b=this.high-this.low,c=b
/2+this.low,a=(b*this.zoom_factor)/2;this.low=Math.round(c-a);this.high=Math.round(c+a);this.redraw()}});var Track=function(b,a,c){this.name=b;this.parent_element=c;this.view=a;this.init_global()};$.extend(Track.prototype,{init_global:function(){this.header_div=$("<div class='track-header'>").text(this.name);this.content_div=$("<div class='track-content'>");this.container_div=$("<div />").addClass("track").append(this.header_div).append(this.content_div);this.parent_element.append(this.container_div)},init_each:function(c,b){var a=this;a.enabled=false;a.data_queue={};a.tile_cache.clear();a.data_cache.clear();if(!a.content_div.text()){a.content_div.text(DATA_LOADING)}a.container_div.removeClass("nodata error pending");if(a.view.chrom){$.getJSON(data_url,c,function(d){if(!d||d==="error"||d.kind==="error"){a.container_div.addClass("error");a.content_div.text(DATA_ERROR);if(d.message){var f=a.view.tracks.indexOf(a);var e=$("<a href='javascript:void(0);'></a>").attr("id",f+"_erro
r");e.text("Click to view error");$("#"+f+"_error").live("click",function(){show_modal("Trackster Error","<pre>"+d.message+"</pre>",{Close:hide_modal})});a.content_div.append(e)}}else{if(d==="no converter"){a.container_div.addClass("error");a.content_div.text(DATA_NOCONVERTER)}else{if(d.data!==undefined&&(d.data===null||d.data.length===0)){a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}else{if(d==="pending"){a.container_div.addClass("pending");a.content_div.text(DATA_PENDING);setTimeout(function(){a.init()},5000)}else{a.content_div.text("");a.content_div.css("height",a.height_px+"px");a.enabled=true;b(d);a.draw()}}}}})}else{a.container_div.addClass("nodata");a.content_div.text(DATA_NONE)}}});var TiledTrack=function(){this.left_offset=200};$.extend(TiledTrack.prototype,Track.prototype,{draw:function(){var j=this.view.low,e=this.view.high,f=e-j,d=this.view.resolution;var l=$("<div style='position: relative;'></div>"),m=this.content_div.width()/f,h;this.conten
t_div.children(":first").remove();this.content_div.append(l),this.max_height=0;var a=Math.floor(j/d/DENSITY);while((a*DENSITY*d)<e){var k=this.content_div.width()+"_"+m+"_"+a;var c=this.tile_cache.get(k);if(c){var g=a*DENSITY*d;var b=(g-j)*m;if(this.left_offset){b-=this.left_offset}c.css({left:b});l.append(c);this.max_height=Math.max(this.max_height,c.height());this.content_div.css("height",this.max_height+"px")}else{this.delayed_draw(this,k,j,e,a,d,l,m)}a+=1}},delayed_draw:function(c,e,a,f,b,d,g,h){setTimeout(function(){if(!(a>c.view.high||f<c.view.low)){tile_element=c.draw_tile(d,b,g,h);if(tile_element){c.tile_cache.set(e,tile_element);c.max_height=Math.max(c.max_height,tile_element.height());c.content_div.css("height",c.max_height+"px")}}},50)}});var LabelTrack=function(a,b){Track.call(this,null,a,b);this.track_type="LabelTrack";this.hidden=true;this.container_div.addClass("label-track")};$.extend(LabelTrack.prototype,Track.prototype,{draw:function(){var c=this.view,d=c.h
igh-c.low,g=Math.floor(Math.pow(10,Math.floor(Math.log(d)/Math.log(10)))),a=Math.floor(c.low/g)*g,e=this.content_div.width(),b=$("<div style='position: relative; height: 1.3em;'></div>");while(a<c.high){var f=(a-c.low)/d*e;b.append($("<div class='label'>"+commatize(a)+"</div>").css({position:"absolute",left:f-1}));a+=g}this.content_div.children(":first").remove();this.content_div.append(b)}});var ReferenceTrack=function(a){this.track_type="ReferenceTrack";Track.call(this,null,a,a.nav_labeltrack);TiledTrack.call(this);this.hidden=true;this.height_px=12;this.container_div.addClass("reference-track");this.dummy_canvas=$("<canvas></canvas>").get(0).getContext("2d");this.data_queue={};this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE)};$.extend(ReferenceTrack.prototype,TiledTrack.prototype,{get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:reference_url,dataType:"json"
,data:{chrom:this.view.chrom,low:a,high:f,dbkey:this.view.dbkey},success:function(g){c.data_cache.set(e,g);delete c.data_queue[e];c.draw()},error:function(h,g,j){console.log(h,g,j)}})}},draw_tile:function(f,b,k,o){var g=b*DENSITY*f,d=DENSITY*f,e=$("<canvas class='tile'></canvas>"),n=e.get(0).getContext("2d"),j=f+"_"+b;if(o>PX_PER_CHAR){if(this.data_cache.get(j)===undefined){this.get_data(f,b);return}var m=this.data_cache.get(j);if(m===null){return}e.get(0).width=Math.ceil(d*o+this.left_offset);e.get(0).height=this.height_px;e.css({position:"absolute",top:0,left:(g-this.view.low)*o+this.left_offset});for(var h=0,l=m.length;h<l;h++){var a=Math.round(h*o);n.fillText(m[h],a+this.left_offset,10)}k.append(e);return e}}});var LineTrack=function(d,b,a,c){this.track_type="LineTrack";Track.call(this,d,b,b.viewport_container);TiledTrack.call(this);this.height_px=100;this.dataset_id=a;this.data_cache=new Cache(CACHED_DATA);this.tile_cache=new Cache(CACHED_TILES_LINE);this.prefs={min_val
ue:undefined,max_value:undefined,mode:"Line"};if(c.min_value!==undefined){this.prefs.min_value=c.min_value}if(c.max_value!==undefined){this.prefs.max_value=c.max_value}if(c.mode!==undefined){this.prefs.mode=c.mode}};$.extend(LineTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.tracks.indexOf(a);a.vertical_range=undefined;this.init_each({stats:true,chrom:a.view.chrom,low:null,high:null,dataset_id:a.dataset_id},function(c){a.container_div.addClass("line-track");data=c.data;if(isNaN(parseFloat(a.prefs.min_value))||isNaN(parseFloat(a.prefs.max_value))){a.prefs.min_value=data.min;a.prefs.max_value=data.max;$("#track_"+b+"_minval").val(a.prefs.min_value);$("#track_"+b+"_maxval").val(a.prefs.max_value)}a.vertical_range=a.prefs.max_value-a.prefs.min_value;a.total_frequency=data.total_frequency;$("#linetrack_"+b+"_minval").remove();$("#linetrack_"+b+"_maxval").remove();var e=$("<div />").addClass("yaxislabel").attr("id","linetrack_"+b+"_minval").text(a.prefs.
min_value);var d=$("<div />").addClass("yaxislabel").attr("id","linetrack_"+b+"_maxval").text(a.prefs.max_value);d.css({position:"relative",top:"25px",left:"10px"});d.prependTo(a.container_div);e.css({position:"relative",top:a.height_px+55+"px",left:"10px"});e.prependTo(a.container_div)})},get_data:function(d,b){var c=this,a=b*DENSITY*d,f=(b+1)*DENSITY*d,e=d+"_"+b;if(!c.data_queue[e]){c.data_queue[e]=true;$.ajax({url:data_url,dataType:"json",data:{chrom:this.view.chrom,low:a,high:f,dataset_id:this.dataset_id,resolution:this.view.resolution},success:function(g){data=g.data;c.data_cache.set(e,data);delete c.data_queue[e];c.draw()},error:function(h,g,j){console.log(h,g,j)}})}},draw_tile:function(p,r,c,e){if(this.vertical_range===undefined){return}var s=r*DENSITY*p,a=DENSITY*p,b=$("<canvas class='tile'></canvas>"),v=p+"_"+r;if(this.data_cache.get(v)===undefined){this.get_data(p,r);return}var j=this.data_cache.get(v);if(j===null){return}b.css({position:"absolute",top:0,left:(s-th
is.view.low)*e});b.get(0).width=Math.ceil(a*e+this.left_offset);b.get(0).height=this.height_px;var o=b.get(0).getContext("2d"),k=false,l=this.prefs.min_value,g=this.prefs.max_value,n=this.vertical_range,t=this.total_frequency,d=this.height_px,m=this.prefs.mode;o.beginPath();if(data.length>1){var f=Math.ceil((data[1][0]-data[0][0])*e)}else{var f=10}var u,h;for(var q=0;q<data.length;q++){u=(data[q][0]-s)*e;h=data[q][1];if(m=="Intensity"){if(h===null){continue}if(h<=l){h=l}else{if(h>=g){h=g}}h=255-Math.floor((h-l)/n*255);o.fillStyle="rgb("+h+","+h+","+h+")";o.fillRect(u,0,f,this.height_px)}else{if(h===null){if(k&&m==="Filled"){o.lineTo(u,d)}k=false;continue}else{if(h<=l){h=l}else{if(h>=g){h=g}}h=Math.round(d-(h-l)/n*d);if(k){o.lineTo(u,h)}else{k=true;if(m==="Filled"){o.moveTo(u,d);o.lineTo(u,h)}else{o.moveTo(u,h)}}}}}if(m==="Filled"){if(k){o.lineTo(u,d)}o.fill()}else{o.stroke()}c.append(b);return b},gen_options:function(o){var a=$("<div />").addClass("form-row");var h="track_"+
o+"_minval",m=$("<label></label>").attr("for",h).text("Min value:"),b=(this.prefs.min_value===undefined?"":this.prefs.min_value),n=$("<input></input>").attr("id",h).val(b),l="track_"+o+"_maxval",g=$("<label></label>").attr("for",l).text("Max value:"),k=(this.prefs.max_value===undefined?"":this.prefs.max_value),f=$("<input></input>").attr("id",l).val(k),e="track_"+o+"_mode",d=$("<label></label>").attr("for",e).text("Display mode:"),j=(this.prefs.mode===undefined?"Line":this.prefs.mode),c=$('<select id="'+e+'"><option value="Line" id="mode_Line">Line</option><option value="Filled" id="mode_Filled">Filled</option><option value="Intensity" id="mode_Intensity">Intensity</option></select>');c.children("#mode_"+j).attr("selected","selected");return a.append(m).append(n).append(g).append(f).append(d).append(c)},update_options:function(d){var a=$("#track_"+d+"_minval").val(),c=$("#track_"+d+"_maxval").val(),b=$("#track_"+d+"_mode option:selected").val();if(a!==this.prefs.min_value||c
!==this.prefs.max_value||b!==this.prefs.mode){this.prefs.min_value=parseFloat(a);this.prefs.max_value=parseFloat(c);this.prefs.mode=b;this.vertical_range=this.prefs.max_value-this.prefs.min_value;$("#linetrack_"+d+"_minval").text(this.prefs.min_value);$("#linetrack_"+d+"_maxval").text(this.prefs.max_value);this.tile_cache.clear();this.draw()}}});var FeatureTrack=function(d,b,a,c){this.track_type="FeatureTrack";Track.call(this,d,b,b.viewport_container);TiledTrack.call(this);this.height_px=0;this.container_div.addClass("feature-track");this.dataset_id=a;this.zo_slots={};this.show_labels_scale=0.001;this.showing_details=false;this.vertical_detail_px=10;this.vertical_nodetail_px=3;this.default_font="9px Monaco, Lucida Console, monospace";this.inc_slots={};this.data_queue={};this.s_e_by_tile={};this.tile_cache=new Cache(CACHED_TILES_FEATURE);this.data_cache=new Cache(20);this.prefs={block_color:"black",label_color:"black",show_counts:false};if(c.block_color!==undefined){this.pref
s.block_color=c.block_color}if(c.label_color!==undefined){this.prefs.label_color=c.label_color}if(c.show_counts!==undefined){this.prefs.show_counts=c.show_counts}};$.extend(FeatureTrack.prototype,TiledTrack.prototype,{init:function(){var a=this,b=a.view.max_low+"_"+a.view.max_high;a.mode="Auto";if(a.mode_div){a.mode_div.remove()}this.init_each({low:a.view.max_low,high:a.view.max_high,dataset_id:a.dataset_id,chrom:a.view.chrom,resolution:this.view.resolution},function(d){a.mode_div=$("<div class='right-float menubutton popup' />").text("Display Mode");a.header_div.append(a.mode_div);a.mode="Auto";var c=function(e){a.mode_div.text(e);a.mode=e;a.tile_cache.clear();a.draw()};make_popupmenu(a.mode_div,{Auto:function(){c("Auto")},Dense:function(){c("Dense")},Squish:function(){c("Squish")},Pack:function(){c("Pack")}});a.data_cache.set(b,d);a.draw()})},get_data:function(a,d){var b=this,c=a+"_"+d;if(!b.data_queue[c]){b.data_queue[c]=true;$.getJSON(data_url,{chrom:b.view.chrom,low:a,h
igh:d,dataset_id:b.dataset_id,resolution:this.view.resolution,mode:this.mode},function(e){b.data_cache.set(c,e);delete b.data_queue[c];b.draw()})}},incremental_slots:function(a,h,c,r){if(!this.inc_slots[a]){this.inc_slots[a]={};this.inc_slots[a].w_scale=1/a;this.inc_slots[a].mode=r;this.s_e_by_tile[a]={}}var n=this.inc_slots[a].w_scale,z=[],l=0,b=$("<canvas></canvas>").get(0).getContext("2d"),o=this.view.max_low;var B=[];if(this.inc_slots[a].mode!==r){delete this.inc_slots[a];this.inc_slots[a]={mode:r,w_scale:n};delete this.s_e_by_tile[a];this.s_e_by_tile[a]={}}for(var w=0,x=h.length;w<x;w++){var g=h[w],m=g[0];if(this.inc_slots[a][m]!==undefined){l=Math.max(l,this.inc_slots[a][m]);B.push(this.inc_slots[a][m])}else{z.push(w)}}for(var w=0,x=z.length;w<x;w++){var g=h[z[w]],m=g[0],s=g[1],d=g[2],q=g[3],e=Math.floor((s-o)*n),f=Math.ceil((d-o)*n);if(q!==undefined&&!c){var t=b.measureText(q).width;if(e-t<0){f+=t}else{e-=t}}var v=0;while(true){var p=true;if(this.s_e_by_tile[a][v]!==u
ndefined){for(var u=0,A=this.s_e_by_tile[a][v].length;u<A;u++){var y=this.s_e_by_tile[a][v][u];if(f>y[0]&&e<y[1]){p=false;break}}}if(p){if(this.s_e_by_tile[a][v]===undefined){this.s_e_by_tile[a][v]=[]}this.s_e_by_tile[a][v].push([e,f]);this.inc_slots[a][m]=v;l=Math.max(l,v);break}v++}}return l},rect_or_text:function(n,o,f,m,b,d,k,e,h){n.textAlign="center";var j=Math.round(o/2);if((this.mode==="Pack"||this.mode==="Auto")&&d!==undefined&&o>PX_PER_CHAR){n.fillStyle=this.prefs.block_color;n.fillRect(k,h+1,e,9);n.fillStyle="#eee";for(var g=0,l=d.length;g<l;g++){if(b+g>=f&&b+g<=m){var a=Math.floor(Math.max(0,(b+g-f)*o));n.fillText(d[g],a+this.left_offset+j,h+9)}}}else{n.fillStyle=this.prefs.block_color;n.fillRect(k,h+4,e,3)}},draw_tile:function(X,h,n,ak){var E=h*DENSITY*X,ad=(h+1)*DENSITY*X,D=DENSITY*X;var ae=E+"_"+ad;var z=this.data_cache.get(ae);if(z===undefined){this.data_queue[[E,ad]]=true;this.get_data(E,ad);return}var a=Math.ceil(D*ak),L=$("<canvas class='tile'></canvas>"),Z
=this.prefs.label_color,f=this.prefs.block_color,m=this.mode,V=(m==="Squish")||(m==="Dense")&&(m!=="Pack")||(m==="Auto"&&(z.extra_info==="no_detail")),P=this.left_offset,aj,s,al;if(z.dataset_type==="summary_tree"){s=30}else{if(m==="Dense"){s=15;al=10}else{al=(V?this.vertical_nodetail_px:this.vertical_detail_px);s=this.incremental_slots(this.view.zoom_res,z.data,V,m)*al+15;aj=this.inc_slots[this.view.zoom_res]}}L.css({position:"absolute",top:0,left:(E-this.view.low)*ak-P});L.get(0).width=a+P;L.get(0).height=s;n.parent().css("height",Math.max(this.height_px,s)+"px");var A=L.get(0).getContext("2d");A.fillStyle=f;A.font=this.default_font;A.textAlign="right";if(z.dataset_type=="summary_tree"){var K,H=55,ac=255-H,g=ac*2/3,R=z.data,C=z.max,l=z.avg;if(R.length>2){var b=Math.ceil((R[1][0]-R[0][0])*ak)}else{var b=50}for(var ag=0,w=R.length;ag<w;ag++){var T=Math.ceil((R[ag][0]-E)*ak);var S=R[ag][1];if(!S){continue}K=Math.floor(ac-(S/C)*ac);A.fillStyle="rgb("+K+","+K+","+K+")";A.fillRec
t(T+P,0,b,20);if(this.prefs.show_counts){if(K>g){A.fillStyle="black"}else{A.fillStyle="#ddd"}A.textAlign="center";A.fillText(R[ag][1],T+P+(b/2),12)}}n.append(L);return L}var ai=z.data;var af=0;for(var ag=0,w=ai.length;ag<w;ag++){var M=ai[ag],J=M[0],ah=M[1],U=M[2],F=M[3];if(ah<=ad&&U>=E){var W=Math.floor(Math.max(0,(ah-E)*ak)),B=Math.ceil(Math.min(a,Math.max(0,(U-E)*ak))),Q=(m==="Dense"?0:aj[J]*al);if(z.dataset_type==="bai"){A.fillStyle=f;if(M[4] instanceof Array){var t=Math.floor(Math.max(0,(M[4][0]-E)*ak)),I=Math.ceil(Math.min(a,Math.max(0,(M[4][1]-E)*ak))),r=Math.floor(Math.max(0,(M[5][0]-E)*ak)),p=Math.ceil(Math.min(a,Math.max(0,(M[5][1]-E)*ak)));if(M[4][1]>=E&&M[4][0]<=ad){this.rect_or_text(A,ak,E,ad,M[4][0],M[4][2],t+P,I-t,Q)}if(M[5][1]>=E&&M[5][0]<=ad){this.rect_or_text(A,ak,E,ad,M[5][0],M[5][2],r+P,p-r,Q)}if(r>I){A.fillStyle="#999";A.fillRect(I+P,Q+5,r-I,1)}}else{A.fillStyle=f;this.rect_or_text(A,ak,E,ad,ah,F,W+P,B-W,Q)}if(m!=="Dense"&&!V&&ah>E){A.fillStyle=this.prefs
.label_color;if(h===0&&W-A.measureText(F).width<0){A.textAlign="left";A.fillText(J,B+2+P,Q+8)}else{A.textAlign="right";A.fillText(J,W-2+P,Q+8)}A.fillStyle=f}}else{if(z.dataset_type==="interval_index"){if(V){A.fillRect(W+P,Q+5,B-W,1)}else{var v=M[4],O=M[5],Y=M[6],e=M[7];var u,aa,G=null,am=null;if(O&&Y){G=Math.floor(Math.max(0,(O-E)*ak));am=Math.ceil(Math.min(a,Math.max(0,(Y-E)*ak)))}if(m!=="Dense"&&F!==undefined&&ah>E){A.fillStyle=Z;if(h===0&&W-A.measureText(F).width<0){A.textAlign="left";A.fillText(F,B+2+P,Q+8)}else{A.textAlign="right";A.fillText(F,W-2+P,Q+8)}A.fillStyle=f}if(e){if(v){if(v=="+"){A.fillStyle=RIGHT_STRAND}else{if(v=="-"){A.fillStyle=LEFT_STRAND}}A.fillRect(W+P,Q,B-W,10);A.fillStyle=f}for(var ae=0,d=e.length;ae<d;ae++){var o=e[ae],c=Math.floor(Math.max(0,(o[0]-E)*ak)),N=Math.ceil(Math.min(a,Math.max((o[1]-E)*ak)));if(c>N){continue}u=5;aa=3;A.fillRect(c+P,Q+aa,N-c,u);if(G!==undefined&&!(c>am||N<G)){u=9;aa=1;var ab=Math.max(c,G),q=Math.min(N,am);A.fillRect(ab+P,Q
+aa,q-ab,u)}}}else{u=9;aa=1;A.fillRect(W+P,Q+aa,B-W,u);if(M.strand){if(M.strand=="+"){A.fillStyle=RIGHT_STRAND_INV}else{if(M.strand=="-"){A.fillStyle=LEFT_STRAND_INV}}A.fillRect(W+P,Q,B-W,10);A.fillStyle=prefs.block_color}}}}}af++}}n.append(L);return L},gen_options:function(j){var a=$("<div />").addClass("form-row");var e="track_"+j+"_block_color",l=$("<label />").attr("for",e).text("Block color:"),m=$("<input />").attr("id",e).attr("name",e).val(this.prefs.block_color),k="track_"+j+"_label_color",g=$("<label />").attr("for",k).text("Text color:"),h=$("<input />").attr("id",k).attr("name",k).val(this.prefs.label_color),f="track_"+j+"_show_count",c=$("<label />").attr("for",f).text("Show summary counts"),b=$('<input type="checkbox" style="float:left;"></input>').attr("id",f).attr("name",f).attr("checked",this.prefs.show_counts),d=$("<div />").append(b).append(c);return a.append(l).append(m).append(g).append(h).append(d)},update_options:function(e){var b=$("#track_"+e+"_block_
color").val(),d=$("#track_"+e+"_label_color").val(),c=$("#track_"+e+"_mode option:selected").val(),a=$("#track_"+e+"_show_count").attr("checked");if(b!==this.prefs.block_color||d!==this.prefs.label_color||a!==this.prefs.show_counts){this.prefs.block_color=b;this.prefs.label_color=d;this.prefs.show_counts=a;this.tile_cache.clear();this.draw()}}});var ReadTrack=function(d,b,a,c){FeatureTrack.call(this,d,b,a,c);this.track_type="ReadTrack";this.vertical_detail_px=10;this.vertical_nodetail_px=5};$.extend(ReadTrack.prototype,TiledTrack.prototype,FeatureTrack.prototype,{});
1
0
galaxy-dist commit e2ba0e9c6852: Updating sam_indel_filter tool; adding NGS: Indel Analysis section; adding indel_analysis, indel_table, and indel_sam2interval tools
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Kelly Vincent <kpvincent(a)bx.psu.edu>
# Date 1279084192 14400
# Node ID e2ba0e9c6852f2acba9d7119c09b703d3bc954be
# Parent f31c41763836603f42da474c3723d27143085cf8
Updating sam_indel_filter tool; adding NGS: Indel Analysis section; adding indel_analysis, indel_table, and indel_sam2interval tools
--- /dev/null
+++ b/test-data/indel_table_in1.interval
@@ -0,0 +1,6 @@
+chrM 300 301 D - 6
+chrM 303 304 D - 19
+chrM 359 360 D - 1
+chrM 410 411 D - 1
+chrM 435 436 D - 1
+chrM 753 754 I A 1
--- a/test-data/sam_indel_filter_in2.sam
+++ b/test-data/sam_indel_filter_in2.sam
@@ -1,12 +1,12 @@
-081017-and-081020:1:6:774:1836 0 PHIX174 4973 37 26M1I9M * 0 0 GCTTAAAGCTACCAGTTATATGGCTGTTTGGTTTTT IIIIIIIIIIIIIIIIIIIIII@III/IE;%II;I= XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:33C1
-081017-and-081020:1:6:1193:793 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIII7IIIDIIIIIIII,=(>%II? XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
-081017-and-081020:1:8:753:970 0 PHIX174 2974 37 29M1I6M * 0 0 GTGGCGCCATGTCTAAATTGTTTGGAGGCGGGTCAA IIIIIIIIIIIIIIII4IIIIII3I&IIIII*%%&' XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:35
-081017-and-081020:1:17:361:871 0 PHIX174 4739 37 30M1I5M * 0 0 TGAGTATGGTACAGCTAATGGCCGTCTTTATTTTCC IIIIIIIIIIIIIIIIIIIGIIIII%II&'III%/# XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:28C6
-081017-and-081020:1:18:1164:1678 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIIIIIIIII;IIII1I0I)II.I- XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
-081017-and-081020:1:20:754:1256 16 PHIX174 4772 37 4M1I31M * 0 0 CCCTGGCGGTGCATTTTATGCGGACACTTCCTACAG &II(IIIII3IIIII7II,IIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:2A32
-081017-and-081020:1:24:1326:917 0 PHIX174 188 37 25M1D11M * 0 0 TTCGCCATCAACTAACGATTCTGTCAAACCTGACGC IIIIIIIIIIIIIIIIIIIIIIIIII2/&II>'IEI XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:25^A3A7
-081017-and-081020:1:25:1466:511 16 PHIX174 1179 37 21M1D15M * 0 0 CTATTGACTCTACTGTAGACATTTTACTTTTTATGT :I<=IIIIII5IIGI5IIIIIIIIIIIIIIIIIIII XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:21^T15
-081017-and-081020:1:27:1267:1275 0 PHIX174 3716 37 28M1D8M * 0 0 TCATCAGCAAACGCAGAATCAGCGGTATGCTCTTCT IIIIIIIIIIIIIIIII;IIIIIII87III%I(@I. XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:28^G8
-081017-and-081020:1:94:1649:147 16 PHIX174 2755 37 15M2D21M * 0 0 TATACCGTCAAGGACTGTGACTATTGACGTCCTTCC 4IIIIII@I7IIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:15^TG21
-081017-and-081020:1:95:74:43 0 PHIX174 4038 37 29M1I6M * 0 0 ATCGAGGCTCTTAAACCTGCTATTGAGGCTTTTTGG IIIIIIIIIIIIIIIIICI;8I,I>IIIIII1I%5& XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
-081017-and-081020:1:95:58:307 16 PHIX174 3859 37 2M1I33M * 0 0 ACAAATGTCTGGAAAGACGGTAAAGCTGATGGTATT I&*IIIIIIII;IIIBIII>IICIIIIIIFII:III XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:5T29
+081017-and-081020:1:6:774:1836 0 PHIX174 4973 37 26M1I9M * 0 0 GCTTAAAGCTACCAGTTATATGGCTGTTTGGTTTTT IIIIIIIIIIIIIIIIIIIIII@III/IE;%II;I= XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:33C1
+081017-and-081020:1:6:1193:793 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIII7IIIDIIIIIIII,=(>%II? XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:8:753:970 0 PHIX174 2974 37 29M1I6M * 0 0 GTGGCGCCATGTCTAAATTGTTTGGAGGCGGGTCAA IIIIIIIIIIIIIIII4IIIIII3I&IIIII*%%&' XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:35
+081017-and-081020:1:17:361:871 0 PHIX174 4739 37 30M1I5M * 0 0 TGAGTATGGTACAGCTAATGGCCGTCTTTATTTTCC IIIIIIIIIIIIIIIIIIIGIIIII%II&'III%/# XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:28C6
+081017-and-081020:1:18:1164:1678 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIIIIIIIII;IIII1I0I)II.I- XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:20:754:1256 16 PHIX174 4772 37 4M1I31M * 0 0 CCCTGGCGGTGCATTTTATGCGGACACTTCCTACAG &II(IIIII3IIIII7II,IIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:2A32
+081017-and-081020:1:24:1326:917 0 PHIX174 188 37 25M1D11M * 0 0 TTCGCCATCAACTAACGATTCTGTCAAACCTGACGC IIIIIIIIIIIIIIIIIIIIIIIIII2/&II>'IEI XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:25^A3A7
+081017-and-081020:1:25:1466:511 16 PHIX174 1179 37 21M1D15M * 0 0 CTATTGACTCTACTGTAGACATTTTACTTTTTATGT :I<=IIIIII5IIGI5IIIIIIIIIIIIIIIIIIII XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:21^T15
+081017-and-081020:1:27:1267:1275 0 PHIX174 3716 37 28M1D8M * 0 0 TCATCAGCAAACGCAGAATCAGCGGTATGCTCTTCT IIIIIIIIIIIIIIIII;IIIIIII87III%I(@I. XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:28^G8
+081017-and-081020:1:94:1649:147 16 PHIX174 2755 37 15M2D21M * 0 0 TATACCGTCAAGGACTGTGACTATTGACGTCCTTCC 4IIIIII@I7IIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:15^TG21
+081017-and-081020:1:95:74:43 0 PHIX174 4038 37 29M1I6M * 0 0 ATCGAGGCTCTTAAACCTGCTATTGAGGCTTTTTGG IIIIIIIIIIIIIIIIICI;8I,I>IIIIII1I%5& XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:95:58:307 16 PHIX174 3859 37 2M1I33M * 0 0 ACAAATGTCTGGAAAGACGGTAAAGCTGATGGTATT I&*IIIIIIII;IIIBIII>IICIIIIIIFII:III XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:5T29
--- /dev/null
+++ b/test-data/indel_table_out1.interval
@@ -0,0 +1,15 @@
+chrM 300 301 D - 14 6 0 8
+chrM 303 304 D - 41 19 0 22
+chrM 359 360 D - 1 1 0 0
+chrM 410 411 D - 4 1 1 2
+chrM 435 436 D - 2 1 0 1
+chrM 525 526 D - 1 0 0 1
+chrM 714 715 D - 1 0 1 0
+chrM 753 754 I A 2 1 0 1
+chrM 958 959 D - 1 0 0 1
+chrM 995 996 D - 3 0 0 3
+chrM 995 997 D - 1 0 1 0
+chrM 1168 1169 I A 1 0 1 0
+chrM 1168 1169 I C 1 0 0 1
+chrM 1274 1297 D - 1 0 1 0
+chrM 1296 1297 D - 2 0 1 1
--- /dev/null
+++ b/test-data/indel_analysis_out4.interval
@@ -0,0 +1,5 @@
+ref 13 14 T 2 13.33 14.29
+ref 20 21 GG 1 8.33 8.33
+ref 22 23 A 1 8.33 11.11
+ref 24 25 G 1 11.11 14.29
+ref 25 26 T 1 12.50 14.29
--- /dev/null
+++ b/tools/indels/indel_analysis.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+
+"""
+Given an input sam file, provides analysis of the indels..
+
+usage: %prog [options] [input3 sum3[ input4 sum4[ input5 sum5[...]]]]
+ -i, --input=i: The sam file to analyze
+ -t, --threshold=t: The deletion frequency threshold
+ -I, --out_ins=I: The interval output file showing insertions
+ -D, --out_del=D: The interval output file showing deletions
+"""
+
+import re, sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+
+def stop_err( msg ):
+ sys.stderr.write( '%s\n' % msg )
+ sys.exit()
+
+def add_to_ref_pos( ref_pos, pos, bases ):
+ """
+ Adds the bases and counts to the ref_pos dict
+ """
+ for j, base in enumerate( bases ):
+ try:
+ ref_pos[ pos + j ][ base ] += 1
+ except KeyError:
+ try:
+ ref_pos[ pos + j ][ base ] = 1
+ except KeyError:
+ ref_pos[ pos + j ] = { base: 1 }
+
+def __main__():
+ #Parse Command Line
+ options, args = doc_optparse.parse( __doc__ )
+ # prep output files
+ out_ins = open( options.out_ins, 'wb' )
+ out_del = open( options.out_del, 'wb' )
+ # pattern
+ pat = re.compile( '(^(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M$)|(^(?P<match_width>\d+)M$)' )
+ pat_multi = re.compile( '(\d+[MIDNSHP])(\d+[MIDNSHP])(\d+[MIDNSHP])+' )
+ # for tracking occurences at each pos of ref
+ ref_pos = {}
+ indels = {}
+ num_reads = {}
+ multi_indel_lines = 0
+ # go through all lines in input file
+ for i,line in enumerate( open( options.input, 'rb' ) ):
+ if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
+ split_line = line.split( '\t' )
+ chrom = split_line[2].strip()
+ pos = int( split_line[3].strip() )
+ cigar = split_line[5].strip()
+ bases = split_line[9].strip()
+ # if not an indel or match, exit
+ if chrom == '*':
+ continue
+ # find matches like 3M2D7M or 7M3I10M
+ matches = ''
+ m = pat.search( cigar )
+ # unprocessable CIGAR
+ if not m:
+ m = pat_multi.search( cigar )
+ # skip this line if no match
+ if not m:
+ continue
+ # account for multiple indels or operations we don't process
+ else:
+ multi_indel_lines += 1
+ # get matching parts for the indel or full match if matching
+ else:
+ if not ref_pos.has_key( chrom ):
+ ref_pos[ chrom ] = {}
+ indels[ chrom ] = { 'D': {}, 'I': {} }
+ if not num_reads.has_key( chrom ):
+ num_reads[ chrom ] = {}
+ parts = m.groupdict()
+ if parts[ 'match_width' ] or ( parts[ 'lmatch' ] and parts[ 'ins_del_width' ] and parts[ 'rmatch' ] ):
+ match = parts
+ # see if matches meet filter requirements
+ if match:
+ # match/mismatch
+ if parts[ 'match_width' ]:
+ add_to_ref_pos( ref_pos[ chrom ], pos, bases )
+ for i, base in enumerate( bases ):
+ try:
+ num_reads[ chrom ][ i + pos ] += 1
+ except KeyError:
+ num_reads[ chrom ][ i + pos ] = 1
+ # indel
+ else:
+ # pieces of CIGAR string
+ left = int( match[ 'lmatch' ] )
+ middle = int( match[ 'ins_del_width' ] )
+ right = int( match[ 'rmatch' ] )
+ left_bases = bases[ : left ]
+ if match[ 'ins_del' ] == 'I':
+ middle_bases = bases[ left : left + middle ]
+ else:
+ middle_bases = ''
+ right_bases = bases[ -right : ]
+ start = pos + left
+ # add data to ref_pos dict for match/mismatch bases on left and on right
+ add_to_ref_pos( ref_pos[ chrom ], pos, left_bases )
+ for i, base in enumerate( left_bases ):
+ try:
+ num_reads[ chrom ][ i + pos ] += 1
+ except KeyError:
+ num_reads[ chrom ][ i + pos ] = 1
+ if match[ 'ins_del' ] == 'I':
+ add_to_ref_pos( ref_pos[ chrom ], start, right_bases )
+ indel_pos = start
+ else:
+ add_to_ref_pos( ref_pos[ chrom ], start + middle, right_bases )
+ indel_pos = start + middle
+ for i, base in enumerate( right_bases ):
+ try:
+ num_reads[ chrom ][ i + indel_pos ] += 1
+ except KeyError:
+ num_reads[ chrom ][ i + indel_pos ] = 1
+ # for insertions, count instances of particular inserted bases
+ if match[ 'ins_del' ] == 'I':
+ if indels[ chrom ][ 'I' ].has_key( start ):
+ if indels[ chrom ][ 'I' ][ start ].has_key( middle_bases ):
+ indels[ chrom ][ 'I' ][ start ][ middle_bases ] += 1
+ else:
+ indels[ chrom ][ 'I' ][ start ][ middle_bases ] = 1
+ else:
+ indels[ chrom ][ 'I' ][ start ] = { middle_bases: 1 }
+ # for deletions, count number of deletions bases
+ else:
+ if indels[ chrom ][ 'D' ].has_key( start ):
+ if indels[ chrom ][ 'D' ][ start ].has_key( middle ):
+ indels[ chrom ][ 'D' ][ start ][ middle ] += 1
+ else:
+ indels[ chrom ][ 'D' ][ start ][ middle ] = 1
+ else:
+ indels[ chrom ][ 'D' ][ start ] = { middle: 1 }
+ # compute deletion frequencies and insertion frequencies for checking against threshold
+ freqs = {}
+ ins_freqs = {}
+ chroms = ref_pos.keys()
+ chroms.sort()
+ for chrom in chroms:
+ freqs[ chrom ] = {}
+ ins_freqs[ chrom ] = {}
+ poses = num_reads[ chrom ].keys()
+ poses.sort()
+ for pos in poses:
+ # all reads touching this particular position
+ freqs[ chrom ][ pos ] = {}
+ sum_counts = 0.0
+ sum_counts_end = 0.0
+ # get basic counts (match/mismatch)
+ if num_reads[ chrom ].has_key( pos ):
+ sum_counts += float( num_reads[ chrom ][ pos ] )
+ try:
+ sum_counts_end += float( num_reads[ chrom ][ pos + 1 ] )
+ except KeyError:
+ pass
+ # add deletions also touching this position
+ try:
+ sum_counts += float( sum( indels[ chrom ][ 'D' ][ pos ].values() ) )
+ try:
+ sum_counts_end += float( sum( indels[ chrom ][ 'D' ][ pos + 1 ].values() ) )
+ except KeyError:
+ pass
+ for d in indels[ chrom ][ 'D' ][ pos ].keys():
+ freqs[ chrom ][ pos ][ '-' * d ] = indels[ chrom ][ 'D' ][ pos ][ d ] / sum_counts
+ except KeyError:
+ pass
+ # calculate actual frequencies
+ # deletions
+ freqs[ chrom ][ pos ][ 'total' ] = sum_counts
+ for base in ref_pos[ chrom ][ pos ].keys():
+ try:
+ prop = float( ref_pos[ chrom ][ pos ][ base ] ) / sum_counts
+ freqs[ chrom ][ pos ][ base ] = prop
+ except ZeroDivisionError:
+ freqs[ chrom ][ pos ][ base ] = 0.0
+ try:
+ for d in indels[ chrom ][ 'D' ][ pos ].keys():
+ freqs[ chrom ][ pos ][ '-' * d ] = indels[ chrom ][ 'D' ][ pos ][ d ] / sum_counts
+ except KeyError:
+ pass
+ # insertions
+ try:
+ for bases in indels[ chrom ][ 'I' ][ pos ].keys():
+ prop_start = indels[ chrom ][ 'I' ][ pos ][ bases ] / ( indels[ chrom ][ 'I' ][ pos ][ bases ] + sum_counts )
+ try:
+ prop_end = indels[ chrom ][ 'I' ][ pos ][ bases ] / sum_counts_end
+ except ZeroDivisionError:
+ prop_end = 0.0
+ try:
+ ins_freqs[ chrom ][ pos ][ bases ] = [ prop_start, prop_end ]
+ except KeyError:
+ ins_freqs[ chrom ][ pos ] = { bases: [ prop_start, prop_end ] }
+ except KeyError, e:
+ pass
+ # output to files if meet threshold requirement
+ threshold = float( options.threshold )
+ #out_del.write( '#Chrom\tStart\tEnd\t#Del\t#Reads\t%TotReads\n' )
+ #out_ins.write( '#Chrom\tStart\tEnd\tInsBases\t#Reads\t%TotReadsAtStart\t%ReadsAtEnd\n' )
+ for chrom in ref_pos.keys():
+ # deletions file
+ poses = indels[ chrom ][ 'D' ].keys()
+ poses.sort()
+ for pos in poses:
+ start = pos
+ dels = indels[ chrom ][ 'D' ][ start ].keys()
+ dels.sort()
+ for d in dels:
+ end = start + d
+ prop = freqs[ chrom ][ start ][ '-' * d ]
+ if prop > threshold :
+ out_del.write( '%s\t%s\t%s\t%s\t%s\t%.2f\n' % ( chrom, start, end, d, indels[ chrom ][ 'D' ][ pos ][ d ], 100.0 * prop ) )
+ # insertions file
+ poses = indels[ chrom ][ 'I' ].keys()
+ poses.sort()
+ for pos in poses:
+ start = pos
+ end = pos + 1
+ ins_bases = indels[ chrom ][ 'I' ][ start ].keys()
+ ins_bases.sort()
+ for bases in ins_bases:
+ prop_start = ins_freqs[ chrom ][ start ][ bases ][0]
+ prop_end = ins_freqs[ chrom ][ start ][ bases ][1]
+ if prop_start > threshold or prop_end > threshold:
+ out_ins.write( '%s\t%s\t%s\t%s\t%s\t%.2f\t%.2f\n' % ( chrom, start, end, bases, indels[ chrom ][ 'I' ][ start ][ bases ], 100.0 * prop_start, 100.0 * prop_end ) )
+ # close out files
+ out_del.close()
+ out_ins.close()
+ # if skipped lines because of more than one indel, output message
+ if multi_indel_lines > 0:
+ sys.stdout.write( '%s alignments were skipped because they contained more than one indel or had unhandled operations (N/S/H/P).' % multi_indel_lines )
+
+if __name__=="__main__": __main__()
--- /dev/null
+++ b/test-data/indel_sam2interval_out1.interval
@@ -0,0 +1,6 @@
+ref 133 134 I C 1
+ref 256 258 D - 1
+ref 48819784 48819785 I A 1
+ref 87824726 87824727 I G 1
+ref 188841437 188841438 I A 1
+ref 190341171 190341172 D - 1
--- a/tool_conf.xml.sample
+++ b/tool_conf.xml.sample
@@ -3,7 +3,7 @@
<section name="Get Data" id="getext"><tool file="data_source/upload.xml"/><tool file="data_source/ucsc_tablebrowser.xml" />
- <tool file="data_source/ucsc_tablebrowser_test.xml" />
+ <tool file="data_source/ucsc_tablebrowser_test.xml" /><tool file="data_source/ucsc_tablebrowser_archaea.xml" /><tool file="data_source/bx_browser.xml" /><tool file="data_source/microbial_import.xml" />
@@ -23,7 +23,7 @@
<tool file="data_source/encode_db.xml" /><tool file="data_source/epigraph_import.xml" /><tool file="data_source/epigraph_import_test.xml" />
- <tool file="data_source/hbvar.xml" />
+ <tool file="data_source/hbvar.xml" /><tool file="validation/fix_errors.xml" /></section><section name="Send Data" id="send">
@@ -112,7 +112,7 @@
<tool file="extract/phastOdds/phastOdds_tool.xml" /></section><section name="Operate on Genomic Intervals" id="bxops">
- <tool file="new_operations/intersect.xml" />
+ <tool file="new_operations/intersect.xml" /><tool file="new_operations/subtract.xml" /><tool file="new_operations/merge.xml" /><tool file="new_operations/concat.xml" />
@@ -213,7 +213,7 @@
<tool file="fastx_toolkit/fastx_quality_statistics.xml" /><tool file="fastx_toolkit/fastq_quality_boxplot.xml" /><tool file="fastx_toolkit/fastx_nucleotides_distribution.xml" />
- <tool file="metag_tools/split_paired_reads.xml"
+ <tool file="metag_tools/split_paired_reads.xml"
<tool file="fastx_toolkit/fastq_to_fasta.xml" />
--><label text="Roche-454 data" id="454" />
@@ -255,6 +255,12 @@
<tool file="metag_tools/megablast_xml_parser.xml" /><tool file="sr_mapping/PerM.xml" /></section>
+ <section name="NGS: Indel Analysis" id="indel_analysis">
+ <tool file="indels/sam_indel_filter.xml" />
+ <tool file="indels/indel_sam2interval.xml" />
+ <tool file="indels/indel_table.xml" />"
+ <tool file="indels/indel_analysis.xml" />
+ </section><section name="NGS: Expression Analysis" id="ngs-rna-tools"><label text="RNA-seq" id="rna_seq" /><tool file="ngs_rna/tophat_wrapper.xml" />
@@ -267,7 +273,6 @@
</section><section name="NGS: SAM Tools" id="samtools"><tool file="samtools/sam_bitwise_flag_filter.xml" />
- <tool file="samtools/sam_indel_filter.xml" /><tool file="samtools/sam2interval.xml" /><tool file="samtools/sam_to_bam.xml" /><tool file="samtools/sam_merge.xml" />
@@ -309,12 +314,12 @@
<tool file="rgenetics/rgManQQ.xml"/></section><!--
- TODO: uncomment the following EMBOSS section whenever
- moving to test, but comment it in .sample to eliminate
- it from buildbot functional tests since these tools
+ TODO: uncomment the following EMBOSS section whenever
+ moving to test, but comment it in .sample to eliminate
+ it from buildbot functional tests since these tools
rarely change.
-->
-<!--
+<!--
<section name="EMBOSS" id="EMBOSSLite"><tool file="emboss_5/emboss_antigenic.xml" /><tool file="emboss_5/emboss_backtranseq.xml" />
@@ -329,13 +334,13 @@
<tool file="emboss_5/emboss_chips.xml" /><tool file="emboss_5/emboss_cirdna.xml" /><tool file="emboss_5/emboss_codcmp.xml" />
- <tool file="emboss_5/emboss_coderet.xml" />
+ <tool file="emboss_5/emboss_coderet.xml" /><tool file="emboss_5/emboss_compseq.xml" />
- <tool file="emboss_5/emboss_cpgplot.xml" />
+ <tool file="emboss_5/emboss_cpgplot.xml" /><tool file="emboss_5/emboss_cpgreport.xml" /><tool file="emboss_5/emboss_cusp.xml" /><tool file="emboss_5/emboss_cutseq.xml" />
- <tool file="emboss_5/emboss_dan.xml" />
+ <tool file="emboss_5/emboss_dan.xml" /><tool file="emboss_5/emboss_degapseq.xml" /><tool file="emboss_5/emboss_descseq.xml" /><tool file="emboss_5/emboss_diffseq.xml" />
@@ -351,7 +356,7 @@
<tool file="emboss_5/emboss_etandem.xml" /><tool file="emboss_5/emboss_extractfeat.xml" /><tool file="emboss_5/emboss_extractseq.xml" />
- <tool file="emboss_5/emboss_freak.xml" />
+ <tool file="emboss_5/emboss_freak.xml" /><tool file="emboss_5/emboss_fuzznuc.xml" /><tool file="emboss_5/emboss_fuzzpro.xml" /><tool file="emboss_5/emboss_fuzztran.xml" />
@@ -372,7 +377,7 @@
<tool file="emboss_5/emboss_merger.xml" /><tool file="emboss_5/emboss_msbar.xml" /><tool file="emboss_5/emboss_needle.xml" />
- <tool file="emboss_5/emboss_newcpgreport.xml" />
+ <tool file="emboss_5/emboss_newcpgreport.xml" /><tool file="emboss_5/emboss_newcpgseek.xml" /><tool file="emboss_5/emboss_newseq.xml" /><tool file="emboss_5/emboss_noreturn.xml" />
@@ -400,7 +405,7 @@
<tool file="emboss_5/emboss_revseq.xml" /><tool file="emboss_5/emboss_seqmatchall.xml" /><tool file="emboss_5/emboss_seqret.xml" />
- <tool file="emboss_5/emboss_showfeat.xml" />
+ <tool file="emboss_5/emboss_showfeat.xml" /><tool file="emboss_5/emboss_shuffleseq.xml" /><tool file="emboss_5/emboss_sigcleave.xml" /><tool file="emboss_5/emboss_sirna.xml" />
@@ -422,7 +427,7 @@
<tool file="emboss_5/emboss_water.xml" /><tool file="emboss_5/emboss_wobble.xml" /><tool file="emboss_5/emboss_wordcount.xml" />
- <tool file="emboss_5/emboss_wordmatch.xml" />
- </section>
+ <tool file="emboss_5/emboss_wordmatch.xml" />
+ </section>
--></toolbox>
--- a/tools/samtools/sam_indel_filter.xml
+++ /dev/null
@@ -1,77 +0,0 @@
-<tool id="sam_indel_filter" name="Filter SAM" version="1.0.0">
- <description>for indels</description>
- <command interpreter="python">
- sam_indel_filter.py
- --input=$input1
- --quality_threshold=$quality_threshold
- --adjacent_bases=$adjacent_bases
- --output=$out_file1
- </command>
- <inputs>
- <param format="sam" name="input1" type="data" label="Select dataset to filter" />
- <param name="quality_threshold" type="integer" value="40" label="Quality threshold for adjacent bases" help="Takes Phred value assuming Sanger scale; usually between 0 and 40, but up to 93" />
- <param name="adjacent_bases" type="integer" value="1" label="The number of adjacent bases to match on either side of the indel" help="If one side is shorter than this width, it will still match if the long-enough side matches" />
- </inputs>
- <outputs>
- <data format="sam" name="out_file1" />
- </outputs>
- <tests>
- <test>
- <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
- <param name="quality_threshold" value="14"/>
- <param name="adjacent_bases" value="2"/>
- <output name="out_file1" file="sam_indel_filter_out1.sam" ftype="sam"/>
- </test>
- <test>
- <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
- <param name="quality_threshold" value="29"/>
- <param name="adjacent_bases" value="5"/>
- <output name="out_file1" file="sam_indel_filter_out2.sam" ftype="sam"/>
- </test>
- <test>
- <param name="input1" value="sam_indel_filter_in2.sam" ftype="sam"/>
- <param name="quality_threshold" value="7"/>
- <param name="adjacent_bases" value="1"/>
- <output name="out_file1" file="sam_indel_filter_out3.sam" ftype="sam"/>
- </test>
- </tests>
- <help>
-
-**What it does**
-
-Allows extracting indels from SAM. Currently it can handle SAM with alignments with only one insertion or one deletion, and will skip that alignment if it encounters one with more than one indel. It matches CIGAR strings (column 6 in the SAM file) like 5M3I5M or 4M2D10M, so there must be a match or mismatch of sufficient length on either side of the indel.
-
------
-
-**Example**
-
-Suppose you have the following::
-
- r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
- r770 181 ref 116 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$###
- r1945 177 ref 41710908 0 23M 190342418 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
- r3671 117 ref 190342418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$##
- r3671 153 ref 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
- r3824 117 ref 80324999 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/
- r3824 153 ref 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
- r4795 81 ref 26739130 0 23M 57401793 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
- r4795 161 ref 57401793 37 23M 26739130 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
- r4800 16 ref 241 255 15M1D8M * 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII
- r5377 170 ref 59090793 37 23M 26739130 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
- r5612 151 ref 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
-
-
-To select only alignments with indels, you need to determine the minimum quality you want the adjacent bases to have, as well as the number of adjacent bases to check. If you set the quality threshold to 47 and the number of bases to check to 2, you will get the following output::
-
- r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
- r4800 16 ref 241 255 15M1D8M * 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII
- r5612 151 ref 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
-
-
-For more information on SAM, please consult the `SAM format description`__.
-
-.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
-
-
- </help>
-</tool>
--- a/tools/samtools/sam_indel_filter.py
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Allows user to filter out non-indels from SAM.
-
-usage: %prog [options]
- -i, --input=i: Input SAM file to be filtered
- -q, --quality_threshold=q: Minimum quality value for adjacent bases
- -a, --adjacent_bases=a: Number of adjacent bases on each size to check qualities
- -o, --output=o: Filtered output SAM file
-"""
-
-import re, sys
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from bx.cookbook import doc_optparse
-
-
-def stop_err( msg ):
- sys.stderr.write( '%s\n' % msg )
- sys.exit()
-
-def __main__():
- #Parse Command Line
- options, args = doc_optparse.parse( __doc__ )
- # prep output file
- output = open( options.output, 'wb' )
- # patterns
- pat_indel = re.compile( '(?P<before_match>(\d+[MNSHP])*)(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M(?P<after_match>(\d+[MNSHP])*)' )
- pat_matches = re.compile( '(\d+[MIDNSHP])+' )
- try:
- qual_thresh = int( options.quality_threshold ) + 33
- if qual_thresh < 33 or qual_thresh > 126:
- raise ValueError
- except ValueError:
- stop_err( 'Your quality threshold should be an integer between 0 and 93, inclusive.' )
- try:
- adj_bases = int( options.adjacent_bases )
- if adj_bases < 1:
- raise ValueError
- except ValueError:
- stop_err( 'The number of adjacent bases should be an integer greater than 1.' )
- # record lines skipped because of more than one indel
- multi_indel_lines = 0
- # go through all lines in input file
- for i,line in enumerate(open( options.input, 'rb' )):
- if i > 1000:
- break
- if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
- split_line = line.split( '\t' )
- cigar = split_line[5]
- # find all possible matches, like 3M2D7M and 7M3I10M in 3M2D7M3I10M
- cigar_copy = cigar[:]
- matches = []
- while len( cigar_copy ) >= 6: # nMnInM or nMnDnM
- m = pat_indel.search( cigar_copy )
- if not m:
- break
- else:
- parts = m.groupdict()
- pre_left = 0
- if m.start() > 0:
- pre_left_groups = pat_matches.search( cigar_copy[ : m.start() ] )
- if pre_left_groups:
- for pl in pre_left_groups.groups():
- if pl.endswith( 'M' ) or pl.endswith( 'S' ) or pl.endswith( 'P' ):
- pre_left += pl[:-1]
- parts[ 'pre_left' ] = pre_left
- matches.append( parts )
- cigar_copy = cigar_copy[ len( parts[ 'lmatch' ] ) + 1 : ]
- # see if matches meet filter requirements
- if len( matches ) > 1:
- multi_indel_lines += 1
- elif len( matches ) == 1:
- pre_left = int( matches[0][ 'pre_left' ] )
- left = int( matches[0][ 'lmatch' ] )
- right = int( matches[0][ 'rmatch' ] )
- if matches[0][ 'ins_del' ] == 'D':
- middle = int( matches[0][ 'ins_del_width' ] )
- else:
- middle = 0
- # if there are enough adjacent bases to check, then do so
- if left >= adj_bases and right >= adj_bases:
- quals = split_line[10]
- left_quals = quals[ pre_left : pre_left + left ][ -adj_bases : ]
- middle_quals = quals[ pre_left + left : pre_left + left + middle ]
- right_quals = quals[ pre_left + left + middle : pre_left + left + middle + right ][ : adj_bases ]
- qual_thresh_met = True
- for l in left_quals:
- if ord( l ) < qual_thresh:
- qual_thresh_met = False
- break
- if qual_thresh_met:
- for r in right_quals:
- if ord( r ) < qual_thresh:
- qual_thresh_met = False
- break
- # if filter reqs met, output line
- if qual_thresh_met:
- output.write( line )
- # close out file
- output.close()
- # if skipped lines because of more than one indel, output message
- if multi_indel_lines > 0:
- sys.stdout.write( '%s alignments were skipped because they contained more than one indel.' % multi_indel_lines )
-
-if __name__=="__main__": __main__()
--- /dev/null
+++ b/test-data/indel_analysis_in1.sam
@@ -0,0 +1,22 @@
+r770 89 ref 6 37 7M1I5M = 0 0 TCGATCTTCATAG !0//110AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r1124 113 ref 4 0 23M = 0 0 CATCGTTCTGTTAGATCTACGTA PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r1231 69 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(#
+r1563 133 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$&#####$'$#$###$
+r1789 177 ref 6 0 17M = 0 0 TCGATCGCTTAGTTCTC SQQWZY]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r3671 153 ref 10 37 6M1I6M = 0 0 TCTCTTTAGGTCT /<<!"0/////// XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r3824 153 ref 5 37 8M1I7M = 0 0 ATCGATGTTCTTAGAT 4;6//11!"100110/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r4800 16 ref 7 255 5M2D6M = 0 0 CGATCTTTGAT IIIIIIIIIIC XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r5612 151 ref 5 37 8M1D9M = 0 0 ATCTATCTTTTGATCTC /<<!"0/4/*/7//B0/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r5929 151 ref 11 37 3M1I10M = 0 0 CTCCTTAGCTCTCC /<<!"0/4//7//0 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r6743 69 * 0 0 * * 0 0 TGCCGTGTCTTGCTAACGCCGATT #'#$$#$###%%##$$$$######
+r9145 115 ref 11 0 19M = 0 -1 CTCTTAGCTCTCCGAATTAG 7753:<5#"4!&=9518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+r11770 89 ref 10 37 10M2I8M = 0 0 TCTCTTAGATGGCTCCGTAT 00/02!!0/120210AA4/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r13671 153 ref 1 37 12M1I12M = 0 0 TCGCATCGATCTCCTTAGATCTCCG /<""<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r13762 133 * 0 0 * * 0 0 TGGGTGGATGTGTTGTCGTTCATG #$#$###$#$#######$#$####
+r13824 153 ref 13 37 9M1I7M = 0 0 CATAGATCTACCGGATT 4;6//11!"11100110 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r24800 16 ref 3 255 15M2D9M = 0 0 GCATCTATCTGATAGCTCCGAATT IIIIIIIII45"CCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r25612 151 ref 1 37 9M1D5M = 0 0 TCGCATCGACTCTT 0/4/*/7//00/1C XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r25786 151 ref 21 37 4M1I7M = 0 0 TGCGTTATTGGG <!"0/70/BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r27899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'#
+r29192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&%
+r29962 16 ref 20 37 4M1I7M = 0 0 CTCCGGTATGAGG <!"0/70/7BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
--- /dev/null
+++ b/test-data/indel_table_in2.interval
@@ -0,0 +1,6 @@
+chrM 410 411 D - 1
+chrM 714 715 D - 1
+chrM 995 997 D - 1
+chrM 1168 1169 I A 1
+chrM 1274 1297 D - 1
+chrM 1296 1297 D - 1
--- a/test-data/sam_indel_filter_out1.sam
+++ b/test-data/sam_indel_filter_out1.sam
@@ -1,3 +1,3 @@
-1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
1378_69_800 16 chr11.nib:1-125234658 241 255 15M1D8M * 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII
-1378_72_1612 151 chrY.nib:1-124295114 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_72_1612 151 chrY.nib:1-124295114 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
--- /dev/null
+++ b/tools/indels/indel_table.xml
@@ -0,0 +1,122 @@
+<tool id="indel_table" name="Indel Analysis Table" version="1.0.0">
+ <description>for combining indel interval data</description>
+ <command interpreter="python">
+ indel_table.py
+ --input1=$input1
+ --sum1=$sum1
+ --input2=$input2
+ --sum2=$sum2
+ --output=$output1
+ #for $i in $inputs
+ ${i.input}
+ ${i.sum}
+ #end for
+ </command>
+ <inputs>
+ <param format="interval" name="input1" type="data" label="Select first file to add" />
+ <param name="sum1" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include first file's totals in overall total" />
+ <param format="interval" name="input2" type="data" label="Select second file to add" />
+ <param name="sum2" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include second file's totals in overall total" />
+ <repeat name="inputs" title="Input Files">
+ <param name="input" label="Add file" type="data" format="interval" />
+ <param name="sum" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include file's totals in overall total" />
+ </repeat>
+ </inputs>
+ <outputs>
+ <data format="interval" name="output1" />
+ </outputs>
+ <tests>
+ <test>
+ <param name="input1" value="indel_table_in1.interval" ftype="interval" />
+ <param name="sum1" value="true"/>
+ <param name="input2" value="indel_table_in2.interval" ftype="interval" />
+ <param name="sum2" value="true" />
+ <param name="input" value="indel_table_in3.interval" ftype="interval" />
+ <param name="sum" value="true" />
+ <output name="output1" file="indel_table_out1.interval" ftype="interval" />
+ </test>
+ </tests>
+ <help>
+
+**What it does**
+
+Creates a table allowing for analysis and comparison of indel data. Combines any number of interval files that have been produced by the tool that converts indel SAM data to interval format. Includes overall total counts for all or some files. The tool has the option to not include a given file's counts in the total column. This could be useful for combined data if the counts for certain indels might be included more than once.
+
+The exact columns of the output will depend on the columns of the input. Here is the detailed specification of the output columns::
+
+ Column Description
+ ------------------------------- ----------------------------------------------------------------------------------
+ 1 ... m "Indel" All the "indel" columns, which contain the info that will be checked for equality
+ m + 1 Total Occurrences Total number of occurrences of this indel across all (included) files
+ m + 2 Occurrences for File 1 Number of occurrences of this indel for first file
+ m + 3 Occurrences for File 2 Number of occurrences of this indel for second file
+ [m + ...] [...] [Number of occurrences of this indel for ... file]
+
+The most likely columns would be from the output of the Convert SAM to Interval/BED tool, so: Chromosome, Start position, End position, I/D (Insertion/Deletion), -/<base(s)> (Deletion/Inserted base(s)), Total Occurrences (across files), Occurrences for File 1, Occurrences for File 2, etc. See below for an example.
+
+
+-----
+
+**Example**
+
+Suppose you have the following 4 files::
+
+ chrM 300 301 D - 6
+ chrM 303 304 D - 19
+ chrM 359 360 D - 1
+ chrM 410 411 D - 1
+ chrM 435 436 D - 1
+
+ chrM 410 411 D - 1
+ chrM 714 715 D - 1
+ chrM 995 997 D - 1
+ chrM 1168 1169 I A 1
+ chrM 1296 1297 D - 1
+
+ chrM 300 301 D - 8
+ chrM 525 526 D - 1
+ chrM 958 959 D - 1
+ chrM 995 996 D - 3
+ chrM 1168 1169 I C 1
+ chrM 1296 1297 D - 1
+
+ chrM 303 304 D - 22
+ chrM 410 411 D - 1
+ chrM 435 436 D - 1
+ chrM 714 715 D - 1
+ chrM 753 754 I A 1
+ chrM 1168 1169 I A 1
+
+and the fifth file::
+
+ chrM 303 304 D - 22
+ chrM 410 411 D - 2
+ chrM 435 436 D - 1
+ chrM 714 715 D - 2
+ chrM 753 754 I A 1
+ chrM 995 997 D - 1
+ chrM 1168 1169 I A 2
+ chrM 1296 1297 D - 1
+
+The following will be produced if you include the first four files in the sum, but not the fifth::
+
+ chrM 300 301 D - 14 6 0 8 0 0
+ chrM 303 304 D - 41 19 0 0 22 22
+ chrM 359 360 D - 1 1 0 0 0 0
+ chrM 410 411 D - 3 1 1 0 1 2
+ chrM 435 436 D - 2 1 0 0 1 2
+ chrM 525 526 D - 1 0 0 1 0 0
+ chrM 714 715 D - 2 0 1 0 1 2
+ chrM 753 754 I A 1 0 0 0 1 1
+ chrM 958 959 D - 1 0 0 1 0 0
+ chrM 995 996 D - 3 0 0 3 0 0
+ chrM 995 997 D - 1 0 1 0 0 1
+ chrM 1168 1169 I A 2 0 1 0 1 2
+ chrM 1168 1169 I C 1 0 0 1 0 0
+ chrM 1296 1297 D - 2 0 1 1 0 1
+
+The first numeric column includes the total or the next four columns, but not the fifth.
+
+
+ </help>
+</tool>
--- /dev/null
+++ b/tools/indels/sam_indel_filter.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+"""
+Allows user to filter out non-indels from SAM.
+
+usage: %prog [options]
+ -i, --input=i: Input SAM file to be filtered
+ -q, --quality_threshold=q: Minimum quality value for adjacent bases
+ -a, --adjacent_bases=a: Number of adjacent bases on each size to check qualities
+ -o, --output=o: Filtered output SAM file
+"""
+
+import re, sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+
+def stop_err( msg ):
+ sys.stderr.write( '%s\n' % msg )
+ sys.exit()
+
+def __main__():
+ #Parse Command Line
+ options, args = doc_optparse.parse( __doc__ )
+ # prep output file
+ output = open( options.output, 'wb' )
+ # patterns
+ pat_indel = re.compile( '(?P<before_match>(\d+[MNSHP])*)(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M(?P<after_match>(\d+[MNSHP])*)' )
+ pat_matches = re.compile( '(\d+[MIDNSHP])+' )
+ try:
+ qual_thresh = int( options.quality_threshold ) + 33
+ if qual_thresh < 33 or qual_thresh > 126:
+ raise ValueError
+ except ValueError:
+ stop_err( 'Your quality threshold should be an integer between 0 and 93, inclusive.' )
+ try:
+ adj_bases = int( options.adjacent_bases )
+ if adj_bases < 1:
+ raise ValueError
+ except ValueError:
+ stop_err( 'The number of adjacent bases should be an integer greater than 1.' )
+ # record lines skipped because of more than one indel
+ multi_indel_lines = 0
+ # go through all lines in input file
+ for i,line in enumerate(open( options.input, 'rb' )):
+ if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
+ split_line = line.split( '\t' )
+ cigar = split_line[5]
+ # find all possible matches, like 3M2D7M and 7M3I10M in 3M2D7M3I10M
+ cigar_copy = cigar[:]
+ matches = []
+ while len( cigar_copy ) >= 6: # nMnInM or nMnDnM
+ m = pat_indel.search( cigar_copy )
+ if not m:
+ break
+ else:
+ parts = m.groupdict()
+ if parts[ 'lmatch' ] and parts[ 'ins_del_width' ] and parts[ 'rmatch' ]:
+ pre_left = 0
+ if m.start() > 0:
+ pre_left_groups = pat_matches.search( cigar_copy[ : m.start() ] )
+ if pre_left_groups:
+ for pl in pre_left_groups.groups():
+ if pl.endswith( 'M' ) or pl.endswith( 'S' ) or pl.endswith( 'P' ):
+ pre_left += pl[:-1]
+ parts[ 'pre_left' ] = pre_left
+ matches.append( parts )
+ cigar_copy = cigar_copy[ len( parts[ 'lmatch' ] ) + 1 : ]
+ # see if matches meet filter requirements
+ if len( matches ) > 1:
+ multi_indel_lines += 1
+ elif len( matches ) == 1:
+ pre_left = int( matches[0][ 'pre_left' ] )
+ left = int( matches[0][ 'lmatch' ] )
+ right = int( matches[0][ 'rmatch' ] )
+ if matches[0][ 'ins_del' ] == 'D':
+ middle = int( matches[0][ 'ins_del_width' ] )
+ else:
+ middle = 0
+ # if there are enough adjacent bases to check, then do so
+ if left >= adj_bases and right >= adj_bases:
+ quals = split_line[10]
+ left_quals = quals[ pre_left : pre_left + left ][ -adj_bases : ]
+ middle_quals = quals[ pre_left + left : pre_left + left + middle ]
+ right_quals = quals[ pre_left + left + middle : pre_left + left + middle + right ][ : adj_bases ]
+ qual_thresh_met = True
+ for l in left_quals:
+ if ord( l ) < qual_thresh:
+ qual_thresh_met = False
+ break
+ if qual_thresh_met:
+ for r in right_quals:
+ if ord( r ) < qual_thresh:
+ qual_thresh_met = False
+ break
+ # if filter reqs met, output line
+ if qual_thresh_met:
+ output.write( line )
+ # close out file
+ output.close()
+ # if skipped lines because of more than one indel, output message
+ if multi_indel_lines > 0:
+ sys.stdout.write( '%s alignments were skipped because they contained more than one indel.' % multi_indel_lines )
+
+if __name__=="__main__": __main__()
--- /dev/null
+++ b/test-data/indel_sam2interval_out2.bed
@@ -0,0 +1,4 @@
+ref 133 134
+ref 48819784 48819785
+ref 87824726 87824727
+ref 188841437 188841438
--- /dev/null
+++ b/test-data/indel_sam2interval_out3.bed
@@ -0,0 +1,2 @@
+ref 256 258
+ref 190341171 190341172
--- a/test-data/sam_indel_filter_out3.sam
+++ b/test-data/sam_indel_filter_out3.sam
@@ -1,10 +1,10 @@
-081017-and-081020:1:6:774:1836 0 PHIX174 4973 37 26M1I9M * 0 0 GCTTAAAGCTACCAGTTATATGGCTGTTTGGTTTTT IIIIIIIIIIIIIIIIIIIIII@III/IE;%II;I= XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:33C1
-081017-and-081020:1:6:1193:793 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIII7IIIDIIIIIIII,=(>%II? XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
-081017-and-081020:1:8:753:970 0 PHIX174 2974 37 29M1I6M * 0 0 GTGGCGCCATGTCTAAATTGTTTGGAGGCGGGTCAA IIIIIIIIIIIIIIII4IIIIII3I&IIIII*%%&' XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:35
-081017-and-081020:1:18:1164:1678 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIIIIIIIII;IIII1I0I)II.I- XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
-081017-and-081020:1:20:754:1256 16 PHIX174 4772 37 4M1I31M * 0 0 CCCTGGCGGTGCATTTTATGCGGACACTTCCTACAG &II(IIIII3IIIII7II,IIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:2A32
-081017-and-081020:1:24:1326:917 0 PHIX174 188 37 25M1D11M * 0 0 TTCGCCATCAACTAACGATTCTGTCAAACCTGACGC IIIIIIIIIIIIIIIIIIIIIIIIII2/&II>'IEI XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:25^A3A7
-081017-and-081020:1:25:1466:511 16 PHIX174 1179 37 21M1D15M * 0 0 CTATTGACTCTACTGTAGACATTTTACTTTTTATGT :I<=IIIIII5IIGI5IIIIIIIIIIIIIIIIIIII XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:21^T15
-081017-and-081020:1:27:1267:1275 0 PHIX174 3716 37 28M1D8M * 0 0 TCATCAGCAAACGCAGAATCAGCGGTATGCTCTTCT IIIIIIIIIIIIIIIII;IIIIIII87III%I(@I. XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:28^G8
-081017-and-081020:1:94:1649:147 16 PHIX174 2755 37 15M2D21M * 0 0 TATACCGTCAAGGACTGTGACTATTGACGTCCTTCC 4IIIIII@I7IIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:15^TG21
-081017-and-081020:1:95:74:43 0 PHIX174 4038 37 29M1I6M * 0 0 ATCGAGGCTCTTAAACCTGCTATTGAGGCTTTTTGG IIIIIIIIIIIIIIIIICI;8I,I>IIIIII1I%5& XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:6:774:1836 0 PHIX174 4973 37 26M1I9M * 0 0 GCTTAAAGCTACCAGTTATATGGCTGTTTGGTTTTT IIIIIIIIIIIIIIIIIIIIII@III/IE;%II;I= XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:33C1
+081017-and-081020:1:6:1193:793 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIII7IIIDIIIIIIII,=(>%II? XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:8:753:970 0 PHIX174 2974 37 29M1I6M * 0 0 GTGGCGCCATGTCTAAATTGTTTGGAGGCGGGTCAA IIIIIIIIIIIIIIII4IIIIII3I&IIIII*%%&' XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:35
+081017-and-081020:1:18:1164:1678 0 PHIX174 4971 37 27M1I8M * 0 0 CCGCTTAAAGCTACCAGTTATATGGCTGGTTGTTTT IIIIIIIIIIIIIIIIIIIII;IIII1I0I)II.I- XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
+081017-and-081020:1:20:754:1256 16 PHIX174 4772 37 4M1I31M * 0 0 CCCTGGCGGTGCATTTTATGCGGACACTTCCTACAG &II(IIIII3IIIII7II,IIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:2A32
+081017-and-081020:1:24:1326:917 0 PHIX174 188 37 25M1D11M * 0 0 TTCGCCATCAACTAACGATTCTGTCAAACCTGACGC IIIIIIIIIIIIIIIIIIIIIIIIII2/&II>'IEI XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:25^A3A7
+081017-and-081020:1:25:1466:511 16 PHIX174 1179 37 21M1D15M * 0 0 CTATTGACTCTACTGTAGACATTTTACTTTTTATGT :I<=IIIIII5IIGI5IIIIIIIIIIIIIIIIIIII XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:21^T15
+081017-and-081020:1:27:1267:1275 0 PHIX174 3716 37 28M1D8M * 0 0 TCATCAGCAAACGCAGAATCAGCGGTATGCTCTTCT IIIIIIIIIIIIIIIII;IIIIIII87III%I(@I. XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:28^G8
+081017-and-081020:1:94:1649:147 16 PHIX174 2755 37 15M2D21M * 0 0 TATACCGTCAAGGACTGTGACTATTGACGTCCTTCC 4IIIIII@I7IIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:15^TG21
+081017-and-081020:1:95:74:43 0 PHIX174 4038 37 29M1I6M * 0 0 ATCGAGGCTCTTAAACCTGCTATTGAGGCTTTTTGG IIIIIIIIIIIIIIIIICI;8I,I>IIIIII1I%5& XT:A:U NM:i:2 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:31G3
--- /dev/null
+++ b/tools/indels/indel_analysis.xml
@@ -0,0 +1,150 @@
+<tool id="indel_analysis" name="Indel Analysis" version="1.0.0">
+ <description></description>
+ <command interpreter="python">
+ indel_analysis.py
+ --input=$input1
+ --threshold=$threshold
+ --out_ins=$out_ins
+ --out_del=$out_del
+ </command>
+ <inputs>
+ <param format="sam" name="input1" type="data" label="Select sam file to analyze" />
+ <param name="threshold" type="float" value="0.015" size="5" label="Frequency threshold" help="Cutoff" />
+ </inputs>
+ <outputs>
+ <data format="interval" name="out_del" />
+ <data format="interval" name="out_ins" />
+ </outputs>
+ <tests>
+ <test>
+ <param name="input1" value="indel_analysis_in1.sam" ftype="sam"/>
+ <param name="threshold" value="0.017"/>
+ <output name="out_del" file="indel_analysis_out1.interval" ftype="interval"/>
+ <output name="out_ins" file="indel_analysis_out2.interval" ftype="interval"/>
+ </test>
+ <test>
+ <param name="input1" value="indel_analysis_in2.sam" ftype="sam"/>
+ <param name="threshold" value="0.08"/>
+ <output name="out_del" file="indel_analysis_out3.interval" ftype="interval"/>
+ <output name="out_ins" file="indel_analysis_out4.interval" ftype="interval"/>
+ </test>
+ </tests>
+ <help>
+
+**What it does**
+
+Given an input sam file, this tool provides analysis of the indels. It filters out matches that do not meet the frequency threshold. The way this frequency of occurence is calculated is different for deletions and insertions. The CIGAR string's "M" can indicate an exact match or a mismatch. For SAM containing the following bits of information (assuming the reference "ACTGCTCGAT")::
+
+ CHROM POS CIGAR SEQ
+ ref 3 2M1I3M TATCTC
+ ref 2 3M1D2M ATGTC
+ ref 4 2M2I3M GTTGAAG
+ ref 1 2M2D2M ACCT
+ ref 2 3M1I2M TCCATC
+ ref 7 4M CTAT
+ ref 5 5M CGTGA
+
+The following totals would be calculated::
+
+ Counts for chromosome "ref", where "-" indicates a deletion and
+ "+" an insertion
+ ----------------------------------------------------------------
+ POS BASE NUMREADS DELPROPCALC DELPROP INSPROPCALC INSPROP
+ 1 A 1 1/1 1.00 -- --
+ 2 A 1 1/2 0.50 -- --
+ C 1 1/2 0.50 -- --
+ 3 T 3 3/4 0.75 -- --
+ -- 1 1/4 0.25 -- --
+ 4 A 1 1/5 0.20 -- --
+ G 2 2/5 0.40 -- --
+ C 1 1/5 0.20 -- --
+ - 1 1/5 0.20 -- --
+ 5 C 4 4/5 0.80 -- --
+ T 1 1/5 0.20 -- --
+ +A 1 --- --- 1/6 0.17
+ +T 1 --- --- 1/6 0.17
+ 6 A 1 1/6 0.17 -- --
+ C 1 1/6 0.17 -- --
+ T 4 4/6 0.67 -- --
+ +TG 1 --- --- 1/7 0.14
+ 7 A 1 1/6 0.17 -- --
+ C 3 3/6 0.50 -- --
+ G 1 1/6 0.17 -- --
+ T 1 1/6 0.17 -- --
+ 8 G 2 2/3 0.67 -- --
+ T 1 1/3 0.33 -- --
+ 9 A 2 2/2 1.00 -- --
+ 10 T 1 1/1 1.00 -- --
+
+Note that the way these are calculated may not be immediately clear. First, the basic total number of reads at a given position is the number of reads with a particular base plus the number of reads with that a deletion at that given position. Note that deletions of two bases and one base would be counted separately. Insertions are not counted in this total, which is used to calculate the proportion of occurrences of each individual base and deletion. For position 4 above, the reference base is G, and there are 2 occurrences of it along with one each of mismatching bases A and C. Also, there is on 1-base deletion. So there are a total of 5 matches/mismatches/deletions, and the proportions for each base are either 1/5 = 0.20 or 2/5 = 0.40, and for the deletion it is 1/5 = 0.20. Insertions are slightly more complicated. Each insertion is regarded individually, and the total number of occurrences of that insertion is divided by the sum of the number of its occurrences and the b
asic total. So for position 5, there are a total of 5 matches/mismatches/deletions, and two insertions that each occur once, so each has a insertion has a proportion of 1/6 = 0.17.
+
+The DELPROP or INSPROP needs to be greater than the threshold frequency specified by the user.
+
+The output varies for deletions and insertions, though for both, the first three columns are chromosome, start position, and end position.
+
+Columns in the deletions file::
+
+ Column Description
+ ---------------------------- ------------------------------------------------------------------------------------
+ 1 Chrom Chromosome
+ 2 Start Starting position
+ 3 End Ending position
+ 4 Number of Deleted Base(s) The number of bases deleted at Start position
+ 5 Frequency Percentage Frequency of this exact deletion (2 and 1 are mutually exclusive), as percentage (%)
+
+Columns in the insertions file::
+
+ Column Description
+ -------------------------- -------------------------------------------------------------------------------------------------------------
+ 1 Chrom Chromosome
+ 2 Start Starting position
+ 3 End Ending position (always Start + 1 for insertions)
+ 4 Inserted Base(s) The exact base(s) inserted at Start position
+ 5 Freq. Perc. at Start Frequency of this exact insertion given Start position ("GG" and "G" are considered distinct), as percentage (%)
+ 6 Freq. Perc. at End Frequency of this exact insertion given End position ("GG" and "G" are considered distinct), as percentage (%)
+
+Before using this tool, you probably will want to use the Filter SAM for indels tool to filter out indels on bases with insufficient quality scores, but this is not required.
+
+
+-----
+
+**Example**
+
+If you set the threshold to 0.0 and have the following SAM file::
+
+ r770 89 ref 6 37 7M1I5M = 0 0 TGGATCTTCATAG !0//110AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r1124 113 ref 4 0 23M = 0 0 CATCGTTCTGTTAGATCTACGTA PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r1789 177 ref 6 0 17M = 0 0 TCGATCGCTTAGTTCTC SQQWZY]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r3671 153 ref 10 37 6M1I6M = 0 0 TCTCTTTAGGTCT /<<!"0/////// XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r3824 153 ref 5 37 8M1I7M = 0 0 ATTGATGTTCTTAGAT 4;6//11!"100110/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r4800 16 ref 7 255 5M2D6M = 0 0 CGATCTTTGAT IIIIIIIIIIC XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5612 151 ref 5 37 8M1D9M = 0 0 ATCTATCTTTTGATCTC /<<!"0/4/*/7//B0/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5612 151 ref 11 37 3M1I10M = 0 0 CTCCTTAGCTCTCC /<<!"0/4//7//0 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r9145 115 ref 11 0 19M = 0 -1 CTCTTAGCTCTCCGAATTAG 7753:<5#"4!&=9518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+ r11770 89 ref 10 37 10M2I8M = 0 0 TCTCTTAGATGGCTCCGTAT 00/02!!0/120210AA4/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r13671 153 ref 1 37 12M1I12M = 0 0 TCGCATCGATCTCCGTAGATCTCCG /<""<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r13824 153 ref 13 37 9M1I7M = 0 0 CATAGATCTACCGGATT 4;6//11!"11100110 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r24800 16 ref 3 255 15M2D9M = 0 0 GCATCTATCTGATAGCTCCGAATT IIIIIIIII45"CCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r25612 151 ref 1 37 9M1D5M = 0 0 TCGCATCGACTCTT 0/4/*/7//00/1C XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r25612 151 ref 21 37 4M1I7M = 0 0 TGCGTTATTGGG <!"0/70/BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r29962 16 ref 20 37 4M1I7M = 0 0 CTCCGGTATGAGG <!"0/70/7BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+
+The following will be produced (deletions file followed by insertions file)::
+
+ ref 10 11 1 1 9.09
+ ref 12 14 2 1 7.69
+ ref 13 14 1 1 7.69
+ ref 18 20 2 1 8.33
+
+ ref 13 14 C 1 6.25 6.67
+ ref 13 14 T 2 12.50 13.33
+ ref 14 15 C 1 6.67 7.14
+ ref 16 17 T 1 7.14 7.69
+ ref 20 21 GG 1 8.33 8.33
+ ref 22 23 A 1 8.33 11.11
+ ref 24 25 G 1 11.11 12.50
+ ref 25 26 T 1 12.50 14.29
+
+
+ </help>
+</tool>
--- /dev/null
+++ b/test-data/indel_analysis_out2.interval
@@ -0,0 +1,8 @@
+ref 13 14 C 1 7.14 7.14
+ref 13 14 T 2 13.33 14.29
+ref 14 15 C 1 6.67 7.14
+ref 16 17 T 1 7.14 7.69
+ref 20 21 GG 1 8.33 8.33
+ref 22 23 A 1 8.33 11.11
+ref 24 25 G 1 11.11 14.29
+ref 25 26 T 1 12.50 14.29
--- /dev/null
+++ b/test-data/indel_analysis_in2.sam
@@ -0,0 +1,16 @@
+r770 89 ref 6 37 7M1I5M = 0 0 TCGATCTTCATAG !0//110AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r1124 113 ref 4 0 23M = 0 0 CATCGTTCTGTTAGATCTACGTA PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r1789 177 ref 6 0 17M = 0 0 TCGATCGCTTAGTTCTC SQQWZY]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r3671 153 ref 10 37 6M1I6M = 0 0 TCTCTTTAGGTCT /<<!"0/////// XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r3824 153 ref 5 37 8M1I7M = 0 0 ATCGATGTTCTTAGAT 4;6//11!"100110/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r4800 16 ref 7 255 5M2D6M = 0 0 CGATCTTTGAT IIIIIIIIIIC XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r5612 151 ref 5 37 8M1D9M = 0 0 ATCTATCTTTTGATCTC /<<!"0/4/*/7//B0/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r5929 151 ref 11 37 3M1I10M = 0 0 CTCCTTAGCTCTCC /<<!"0/4//7//0 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r9145 115 ref 11 0 19M = 0 -1 CTCTTAGCTCTCCGAATTAG 7753:<5#"4!&=9518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+r11770 89 ref 10 37 10M2I8M = 0 0 TCTCTTAGATGGCTCCGTAT 00/02!!0/120210AA4/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r13671 153 ref 1 37 12M1I12M = 0 0 TCGCATCGATCTCCTTAGATCTCCG /<""<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r13824 153 ref 13 37 9M1I7M = 0 0 CATAGATCTACCGGATT 4;6//11!"11100110 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r24800 16 ref 3 255 15M2D9M = 0 0 GCATCTATCTGATAGCTCCGAATT IIIIIIIII45"CCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r25612 151 ref 1 37 9M1D5M = 0 0 TCGCATCGACTCTT 0/4/*/7//00/1C XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r25786 151 ref 21 37 4M1I7M = 0 0 TGCGTTATTGGG <!"0/70/BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r29962 16 ref 20 37 4M1I7M = 0 0 CTCCGGTATGAGG <!"0/70/7BC01 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
--- a/test-data/sam_indel_filter_in1.sam
+++ b/test-data/sam_indel_filter_in1.sam
@@ -1,15 +1,15 @@
-1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_28_770 89 chr11.nib:1-134452384 72131356 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
1378_28_770 181 chr11.nib:1-134452384 72131356 0 13M2I6M1D5M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$###
-1378_33_1945 113 chr2.nib:1-242951149 181247988 0 23M chr12.nib:1-132349534 41710908 0 GAGAGAGAGAGAGAGAGAGAGAG PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
-1378_33_1945 177 chr12.nib:1-132349534 41710908 0 23M chr2.nib:1-242951149 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
-1378_35_263 115 chr16.nib:1-88827254 19671878 0 23M = 19671877 -1 AGAGAGAGAGAGAGAGAGAGTCT 77543:<55#"4!&=964518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
-1378_35_263 179 chr16.nib:1-88827254 19671877 0 13M1I4M1I5M = 19671878 1 GAGAGAGAGAGAGAGAGAGAGTC LE7402DD34FL:27AKE>;432 XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:265 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+1378_33_1945 113 chr2.nib:1-242951149 181247988 0 23M chr12.nib:1-132349534 41710908 0 GAGAGAGAGAGAGAGAGAGAGAG PQRVUMNXYRPUXYXWXSOSZ]M XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+1378_33_1945 177 chr12.nib:1-132349534 41710908 0 23M chr2.nib:1-242951149 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+1378_35_263 115 chr16.nib:1-88827254 19671878 0 23M = 19671877 -1 AGAGAGAGAGAGAGAGAGAGTCT 77543:<55#"4!&=964518A> XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:137 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+1378_35_263 179 chr16.nib:1-88827254 19671877 0 13M1I4M1I5M = 19671878 1 GAGAGAGAGAGAGAGAGAGAGTC LE7402DD34FL:27AKE>;432 XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:265 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
1378_51_1671 117 chr2.nib:1-242951149 190342418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$##
-1378_51_1671 153 chr2.nib:1-242951149 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_51_1671 153 chr2.nib:1-242951149 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
1378_56_324 117 chr2.nib:1-242951149 80324999 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/
-1378_56_324 153 chr2.nib:1-242951149 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
-1378_67_1795 81 chr16.nib:1-88827254 26739130 0 23M chrY.nib:1-57772954 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
-1378_67_1795 161 chrY.nib:1-57772954 57401793 37 23M chr16.nib:1-88827254 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+1378_56_324 153 chr2.nib:1-242951149 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_67_1795 81 chr16.nib:1-88827254 26739130 0 23M chrY.nib:1-57772954 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+1378_67_1795 161 chrY.nib:1-57772954 57401793 37 23M chr16.nib:1-88827254 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
1378_69_800 16 chr11.nib:1-125234658 241 255 15M1D8M * 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII
-1378_69_1777 170 chrX.nib:1-59090954 59090793 37 23M chr16.nib:1-88827254 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
-1378_72_1612 151 chrY.nib:1-124295114 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+1378_69_1777 170 chrX.nib:1-59090954 59090793 37 23M chr16.nib:1-88827254 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+1378_72_1612 151 chrY.nib:1-124295114 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
--- /dev/null
+++ b/test-data/indel_analysis_out1.interval
@@ -0,0 +1,4 @@
+ref 10 11 1 1 9.09
+ref 12 14 2 1 7.69
+ref 13 14 1 1 7.69
+ref 18 20 2 1 8.33
--- /dev/null
+++ b/tools/indels/indel_sam2interval.xml
@@ -0,0 +1,121 @@
+<tool id="indel_sam2interval" name="Convert SAM to interval/BED" version="1.0.0">
+ <description>for indels</description>
+ <command interpreter="python">
+ indel_sam2interval.py
+ --input=$input1
+ --include_base=$include_base
+ --collapse=$collapse
+ --int_out=$output1
+ #if $ins_out.include_ins_out == "true"
+ --bed_ins_out=$output2
+ #else
+ --bed_ins_out="None"
+ #end if
+ #if $del_out.include_del_out == "true"
+ --bed_del_out=$output3
+ #else
+ --bed_del_out="None"
+ #end if
+ </command>
+ <inputs>
+ <param format="sam" name="input1" type="data" label="Select dataset to convert" />
+ <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
+ <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
+ <conditional name="ins_out">
+ <param name="include_ins_out" type="select" label="Include insertions output bed file?">
+ <option value="true">Yes</option>
+ <option value="false">No</option>
+ </param>
+ <when value="true" />
+ <when value="false" />
+ </conditional>
+ <conditional name="del_out">
+ <param name="include_del_out" type="select" label="Include insertions output bed file?">
+ <option value="true">Yes</option>
+ <option value="false">No</option>
+ </param>
+ <when value="true" />
+ <when value="false" />
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="interval" name="output1" />
+ <data format="bed" name="output2">
+ <filter>ins_out[ "include_ins_out" ] = "true"</filter>
+ </data>
+ <data format="bed" name="output3">
+ <filter>del_out[ "include_del_out" ] = "true"</filter>
+ </data>
+ </outputs>
+ <tests>
+ <test>
+ <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
+ <param name="include_base" value="true"/>
+ <param name="collapse" value="true"/>
+ <param name="include_ins_out" value="true" />
+ <param name="include_del_out" value="true" />
+ <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
+ <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
+ <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
+ </test>
+ </tests>
+ <help>
+
+**What it does**
+
+Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
+
+-----
+
+**Example**
+
+Suppose you have the following::
+
+ r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r780 181 ref 4567 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$### XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r1231 69 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(#
+ r1563 133 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$&#####$'$#$###$
+ r1945 177 ref 71908 0 23M 190342418 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r3671 117 ref 31903418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$## XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r3673 153 ref 48819768 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r3824 117 ref 80729921 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r3911 153 ref 87824718 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r4795 81 ref 126739130 0 23M 57401793 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+ r4797 161 ref 57401793 37 23M 26739130 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r4800 16 ref 241 255 15M2D8M = 1550 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5377 170 ref 52090793 37 23M 26739130 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r5612 151 ref 190341152 37 19M1D4M = 190342418 0 TCTAACTTAGCCTCATAATGCTAA /<<!"0/4/*/7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5623 151 ref 188841418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r7899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'#
+ r9192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&%
+
+
+The following three files will be produced (Interval, Insertions BED and Deletions BED)::
+
+ ref 133 134 I C 1
+ ref 256 258 D - 1
+ ref 48819784 48819785 I A 1
+ ref 87824726 87824727 I G 1
+ ref 188841437 188841419 I T 1
+ ref 190341171 190341172 D - 1
+
+ ref 133 134
+ ref 48819784 48819785
+ ref 87824726 87824727
+ ref 188841437 188841419
+
+ ref 256 258
+ ref 190341171 190341172
+
+
+
+
+
+
+For more information on SAM, please consult the `SAM format description`__.
+
+.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
+
+
+ </help>
+</tool>
--- /dev/null
+++ b/tools/indels/indel_sam2interval.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+"""
+Allows user to filter out non-indels from SAM.
+
+usage: %prog [options]
+ -i, --input=i: The input SAM file
+ -u, --include_base=u: Whether or not to include the base for insertions
+ -c, --collapse=c: Wheter to collapse multiple occurrences of a location with counts shown
+ -o, --int_out=o: The interval output file for the converted SAM file
+ -b, --bed_ins_out=b: The bed output file with insertions only for the converted SAM file
+ -d, --bed_del_out=d: The bed output file with deletions only for the converted SAM file
+"""
+
+import re, sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+
+def stop_err( msg ):
+ sys.stderr.write( '%s\n' % msg )
+ sys.exit()
+
+def numeric_sort( text1, text2 ):
+ """
+ For two items containing space-separated text, compares equivalent pieces
+ numerically if both numeric or as text otherwise
+ """
+ pieces1 = text1.split()
+ pieces2 = text2.split()
+ if len( pieces1 ) == 0:
+ return 1
+ if len( pieces2 ) == 0:
+ return -1
+ for i, pc1 in enumerate( pieces1 ):
+ if i == len( pieces2 ):
+ return 1
+ if not pieces2[i].isdigit():
+ if pc1.isdigit():
+ return -1
+ else:
+ if pc1 > pieces2[i]:
+ return 1
+ elif pc1 < pieces2[i]:
+ return -1
+ else:
+ if not pc1.isdigit():
+ return 1
+ else:
+ if int( pc1 ) > int( pieces2[i] ):
+ return 1
+ elif int( pc1 ) < int( pieces2[i] ):
+ return -1
+ if i < len( pieces2 ) - 1:
+ return -1
+ return 0
+
+def __main__():
+ #Parse Command Line
+ options, args = doc_optparse.parse( __doc__ )
+
+ # open up output files
+ output = open( options.int_out, 'wb' )
+ if options.bed_ins_out != 'None':
+ output_bed_ins = open( options.bed_ins_out, 'wb' )
+ else:
+ output_bed_ins = None
+ if options.bed_del_out != 'None':
+ output_bed_del = open( options.bed_del_out, 'wb' )
+ else:
+ output_bed_del = None
+
+ # the pattern to match, assuming just one indel per cigar string
+ pat_indel = re.compile( '(?P<lmatch>\d+)M(?P<ins_del_width>\d+)(?P<ins_del>[ID])(?P<rmatch>\d+)M' )
+ pat_multi = re.compile( '(\d+[MIDNSHP])(\d+[MIDNSHP])(\d+[MIDNSHP])+' )
+
+ # go through all lines in input file
+ out_data = []
+ multi_indel_lines = 0
+ for line in open( options.input, 'rb' ):
+ if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
+ split_line = line.split( '\t' )
+ if split_line < 12:
+ continue
+ # grab relevant pieces
+ cigar = split_line[5]
+ pos = int( split_line[3] )
+ chr = split_line[2]
+ base_string = split_line[9]
+ # parse cigar string
+ m = pat_indel.search( cigar )
+ if not m:
+ m = pat_multi.search( cigar )
+ # skip this line if no match
+ if not m:
+ continue
+ # account for multiple indels or operations we don't process
+ else:
+ multi_indel_lines += 1
+ continue
+ else:
+ match = m.groupdict()
+ left = int( match[ 'lmatch' ] )
+ middle = int( match[ 'ins_del_width' ] )
+ middle_type = match[ 'ins_del' ]
+ bases = base_string[ left : left + middle ]
+ # calculate start and end positions, and output to insertion or deletion file
+ start = left + pos
+ if middle_type == 'D':
+ end = start + middle
+ d = [ chr, start, end, 'D' ]
+ if options.include_base == "true":
+ d.append( '-' )
+ out_data.append( tuple( d ) )
+ if output_bed_del:
+ output_bed_del.write( '%s\t%s\t%s\n' % ( chr, start, end ) )
+ else:
+ end = start + 1#+ middle
+ d = [ chr, start, end, 'I' ]
+ if options.include_base == "true":
+ d.append( bases )
+ out_data.append( tuple( d ) )
+ if output_bed_ins:
+ output_bed_ins.write( '%s\t%s\t%s\n' % ( chr, start, end ) )
+ # output to interval file
+ if options.collapse == 'true':
+ out_dict = {}
+ # first collapse and get counts
+ for data in out_data:
+ location = ' '.join( [ '%s' % d for d in data ] )
+ try:
+ out_dict[ location ].append( data )
+ except KeyError:
+ out_dict[ location ] = [ data ]
+ locations = out_dict.keys()
+ locations.sort( numeric_sort )
+ for loc in locations:
+ output.write( '%s\t%s\n' % ( '\t'.join( [ '%s' % d for d in out_dict[ loc ][0] ] ), len( out_dict[ loc ] ) ) )
+ else:
+ for data in out_data:
+ output.write( '%s\n' % '\t'.join( [ '%s' % d for d in data ] ) )
+
+ # cleanup, close files
+ if output_bed_ins:
+ output_bed_ins.close()
+ if output_bed_del:
+ output_bed_del.close()
+ output.close()
+
+ # if skipped lines because of more than one indel, output message
+ if multi_indel_lines > 0:
+ sys.stdout.write( '%s alignments were skipped because they contained more than one indel or had unhandled operations (N/S/H/P).' % multi_indel_lines )
+
+if __name__=="__main__": __main__()
--- /dev/null
+++ b/tools/indels/indel_table.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+"""
+Combines several interval files containing indels with counts. All input files need to have the same number of columns.
+
+usage: %prog [options] [input3 sum3[ input4 sum4[ input5 sum5[...]]]]
+ -1, --input1=1: The first input file
+ -s, --sum1=s: Whether or not to include the totals from first file in overall total
+ -2, --input2=2: The second input file
+ -S, --sum2=S: Whether or not to include the totals from second file in overall total
+ -o, --output=o: The interval output file for the combined files
+"""
+
+import re, sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+
+def stop_err( msg ):
+ sys.stderr.write( '%s\n' % msg )
+ sys.exit()
+
+def numeric_sort( text1, text2 ):
+ """
+ For two items containing space-separated text, compares equivalent pieces
+ numerically if both numeric or as text otherwise
+ """
+ pieces1 = text1.split()
+ pieces2 = text2.split()
+ if len( pieces1 ) == 0:
+ return 1
+ if len( pieces2 ) == 0:
+ return -1
+ for i, pc1 in enumerate( pieces1 ):
+ if i == len( pieces2 ):
+ return 1
+ if not pieces2[i].isdigit():
+ if pc1.isdigit():
+ return -1
+ else:
+ if pc1 > pieces2[i]:
+ return 1
+ elif pc1 < pieces2[i]:
+ return -1
+ else:
+ if not pc1.isdigit():
+ return 1
+ else:
+ if int( pc1 ) > int( pieces2[i] ):
+ return 1
+ elif int( pc1 ) < int( pieces2[i] ):
+ return -1
+ if i < len( pieces2 ) - 1:
+ return -1
+ return 0
+
+def __main__():
+ # Parse Command Line
+ options, args = doc_optparse.parse( __doc__ )
+ inputs = [ options.input1, options.input2 ]
+ includes = [ options.sum1, options.sum2 ]
+ inputs.extend( [ a for i, a in enumerate( args ) if i % 2 == 0 ] )
+ includes.extend( [ a for i, a in enumerate( args ) if i % 2 == 1 ] )
+ num_cols = 0
+ counts = {}
+ # read in data from all files and get total counts
+ try:
+ for i, input in enumerate( inputs ):
+ for line in open( input, 'rb' ):
+ sp_line = line.strip().split( '\t' )
+ # set num_cols on first pass
+ if num_cols == 0:
+ if len( sp_line ) < 4:
+ raise Exception, 'There need to be at least 4 columns in the file: Chrom, Start, End, and Count'
+ num_cols = len( sp_line )
+ # deal with differing number of columns
+ elif len( sp_line ) != num_cols:
+ raise Exception, 'All of the files need to have the same number of columns (current %s != %s of first line)' % ( len( sp_line ), num_cols )
+ # get actual counts for each indel
+ indel = '\t'.join( sp_line[:-1] )
+ try:
+ count = int( sp_line[-1] )
+ except ValueError, e:
+ raise Exception, 'The last column of each file must be numeric, with the count of the number of instances of that indel: %s' % str( e )
+ # total across all included files
+ if includes[i] == "true":
+ try:
+ counts[ indel ]['tot'] += count
+ except ( IndexError, KeyError ):
+ counts[ indel ] = { 'tot': count }
+ # counts for ith file
+ counts[ indel ][i] = count
+ except Exception, e:
+ stop_err( 'Failed to read all input files:\n%s' % str( e ) )
+ # output combined results to table file
+ try:
+ output = open( options.output, 'wb' )
+ count_keys = counts.keys()
+ count_keys.sort( numeric_sort )
+ for indel in count_keys:
+ count_out = [ str( counts[ indel ][ 'tot' ] ) ]
+ for i in range( len( inputs ) ):
+ try:
+ count_out.append( str( counts[ indel ][i] ) )
+ except KeyError:
+ count_out.append( '0' )
+ output.write( '%s\t%s\n' % ( indel, '\t'.join( count_out ) ) )
+ output.close()
+ except Exception, e:
+ stop_err( 'Failed to output data: %s' % str( e ) )
+
+if __name__=="__main__": __main__()
--- /dev/null
+++ b/test-data/indel_analysis_out3.interval
@@ -0,0 +1,2 @@
+ref 10 11 1 1 9.09
+ref 18 20 2 1 8.33
--- /dev/null
+++ b/test-data/indel_table_in3.interval
@@ -0,0 +1,10 @@
+chrM 300 301 D - 8
+chrM 303 304 D - 22
+chrM 410 411 D - 2
+chrM 435 436 D - 1
+chrM 525 526 D - 1
+chrM 753 754 I A 1
+chrM 958 959 D - 1
+chrM 995 996 D - 3
+chrM 1168 1169 I C 1
+chrM 1296 1297 D - 1
--- /dev/null
+++ b/test-data/indel_sam2interval_in1.sam
@@ -0,0 +1,17 @@
+r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r780 181 ref 4567 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$### XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r1231 69 * 0 0 * * 0 0 AGACCGGGCGGGGTGGCGTTCGGT %##+'#######%###$#$##$(#
+r1563 133 * 0 0 * * 0 0 GTTCGTGGCCGGTGGGTGTTTGGG ###$$#$#$&#####$'$#$###$
+r1945 177 ref 71908 0 23M 190342418 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r3671 117 ref 31903418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$##
+r3673 153 ref 48819768 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r3824 117 ref 80729921 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/
+r3911 153 ref 87824718 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r4795 81 ref 126739130 0 23M 57401793 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+r4797 161 ref 57401793 37 23M 26739130 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r4800 16 ref 241 255 15M2D8M = 1550 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII
+r5377 170 ref 52090793 37 23M 26739130 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+r5612 151 ref 190341152 37 19M1D4M = 190342418 0 TCTAACTTAGCCTCATAATGCTAA /<<!"0/4/*/7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r5623 151 ref 188841418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+r7899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'#
+r9192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&%
--- /dev/null
+++ b/tools/indels/sam_indel_filter.xml
@@ -0,0 +1,77 @@
+<tool id="sam_indel_filter" name="Filter SAM" version="1.0.0">
+ <description>for indels</description>
+ <command interpreter="python">
+ sam_indel_filter.py
+ --input=$input1
+ --quality_threshold=$quality_threshold
+ --adjacent_bases=$adjacent_bases
+ --output=$out_file1
+ </command>
+ <inputs>
+ <param format="sam" name="input1" type="data" label="Select dataset to filter" />
+ <param name="quality_threshold" type="integer" value="40" label="Quality threshold for adjacent bases" help="Takes Phred value assuming Sanger scale; usually between 0 and 40, but up to 93" />
+ <param name="adjacent_bases" type="integer" value="1" label="The number of adjacent bases to match on either side of the indel" help="If one side is shorter than this width, the read will be excluded" />
+ </inputs>
+ <outputs>
+ <data format="sam" name="out_file1" />
+ </outputs>
+ <tests>
+ <test>
+ <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
+ <param name="quality_threshold" value="14"/>
+ <param name="adjacent_bases" value="2"/>
+ <output name="out_file1" file="sam_indel_filter_out1.sam" ftype="sam"/>
+ </test>
+ <test>
+ <param name="input1" value="sam_indel_filter_in1.sam" ftype="sam"/>
+ <param name="quality_threshold" value="29"/>
+ <param name="adjacent_bases" value="5"/>
+ <output name="out_file1" file="sam_indel_filter_out2.sam" ftype="sam"/>
+ </test>
+ <test>
+ <param name="input1" value="sam_indel_filter_in2.sam" ftype="sam"/>
+ <param name="quality_threshold" value="7"/>
+ <param name="adjacent_bases" value="1"/>
+ <output name="out_file1" file="sam_indel_filter_out3.sam" ftype="sam"/>
+ </test>
+ </tests>
+ <help>
+
+**What it does**
+
+Allows extracting indels from SAM produced by BWA. Currently it can handle SAM with alignments that have only one insertion or one deletion, and will skip that alignment if it encounters one with more than one indel. It matches CIGAR strings (column 6 in the SAM file) like 5M3I5M or 4M2D10M, so there must be a match or mismatch of sufficient length on either side of the indel.
+
+-----
+
+**Example**
+
+Suppose you have the following::
+
+ r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r770 181 ref 116 0 24M = 72131356 0 TTGGTGCGCGCGGTTGAGGGTTGG $$(#%%#$%#%####$%%##$###
+ r1945 177 ref 41710908 0 23M 190342418 181247988 0 AGAGAGAGAGAGAGAGAGAGAGA SQQWZYURVYWX]]YXTSY]]ZM XT:A:R CM:i:0 SM:i:0 AM:i:0 X0:i:163148 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r3671 117 ref 190342418 0 24M = 190342418 0 CTGGCGTTCTCGGCGTGGATGGGT #####$$##$#%#%%###%$#$##
+ r3671 153 ref 190342418 37 16M1I6M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0///////00/!!0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r3824 117 ref 80324999 0 24M = 80324999 0 TCCAGTCGCGTTGTTAGGTTCGGA #$#$$$#####%##%%###**#+/
+ r3824 153 ref 80324999 37 8M1I14M = 80324999 0 TTTAGCCCGAAATGCCTAGAGCA 4;6//11!"11100110////00 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r4795 81 ref 26739130 0 23M 57401793 57401793 0 TGGCATTCCTGTAGGCAGAGAGG AZWWZS]!"QNXZ]VQ]]]/2]] XT:A:R CM:i:2 SM:i:0 AM:i:0 X0:i:3 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:23
+ r4795 161 ref 57401793 37 23M 26739130 26739130 0 GATCACCCAGGTGATGTAACTCC ]WV]]]]WW]]]]]]]]]]PU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r4800 16 ref 241 255 15M1D8M = 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5377 170 ref 59090793 37 23M 26739130 26739130 0 TATCAATAAGGTGATGTAACTCG ]WV]ABAWW]]]]]P]P//GU]] XT:A:U CM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:23
+ r5612 151 ref 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+
+
+To select only alignments with indels, you need to determine the minimum quality you want the adjacent bases to have, as well as the number of adjacent bases to check. If you set the quality threshold to 47 and the number of bases to check to 2, you will get the following output::
+
+ r770 89 ref 116 37 17M1I5M = 72131356 0 CACACTGTGACAGACAGCGCAGC 00/02!!0//1200210AA44/1 XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r4800 16 ref 241 255 15M1D8M = 0 0 CGTGGCCGGCGGGCCGAAGGCAT IIIIIIIIIICCCCIII?IIIII XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+ r5612 151 ref 190342418 37 19M1I3M = 190342418 0 TCTAACTTAGCCTCATAATAGCT /<<!"0/4//7//00/BC0121/ XT:A:U CM:i:2 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:22
+
+
+For more information on SAM, please consult the `SAM format description`__.
+
+.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
+
+
+ </help>
+</tool>
1
0
galaxy-dist commit 3225b99dd493: Enhance VCF to MAF error message when no input file is provided.
by commits-noreply@bitbucket.org 16 Jul '10
by commits-noreply@bitbucket.org 16 Jul '10
16 Jul '10
# HG changeset patch -- Bitbucket.org
# Project galaxy-dist
# URL http://bitbucket.org/galaxy/galaxy-dist/overview
# User Dan Blankenberg <dan(a)bx.psu.edu>
# Date 1279032285 14400
# Node ID 3225b99dd49318ac798fc6ffdc99282f89225e83
# Parent 3aaca346bc07a6e13ce48bdee8c27dfd32a3d00b
Enhance VCF to MAF error message when no input file is provided.
--- a/tools/maf/vcf_to_maf_customtrack.py
+++ b/tools/maf/vcf_to_maf_customtrack.py
@@ -1,5 +1,6 @@
#Dan Blankenberg
from optparse import OptionParser
+import sys
import galaxy_utils.sequence.vcf
from galaxy import eggs
@@ -57,11 +58,14 @@ def main():
parser.add_option( "-p", "--population", action="store_true", dest="population", default=False, help="Create MAF on a per population basis")
parser.add_option( "-s", "--sample", action="store_true", dest="sample", default=False, help="Create MAF on a per sample basis")
parser.add_option( "-n", "--name", dest="name", default='Unknown Custom Track', help="Name for Custom Track")
+ parser.add_option( "-g", "--galaxy", action="store_true", dest="galaxy", default=False, help="Tool is being executed by Galaxy (adds extra error messaging).")
( options, args ) = parser.parse_args()
if len ( args ) < 3:
+ if options.galaxy:
+ print >>sys.stderr, "It appears that you forgot to specify an input VCF file, click 'Add new VCF...' to add at least input.\n"
parser.error( "Need to specify an output file, a dbkey and at least one input file" )
if not ( options.population ^ options.sample ):
--- a/tools/maf/vcf_to_maf_customtrack.xml
+++ b/tools/maf/vcf_to_maf_customtrack.xml
@@ -1,13 +1,19 @@
<tool id="vcf_to_maf_customtrack1" name="VCF to MAF Custom Track"><description>for display at UCSC</description>
- <command interpreter="python">vcf_to_maf_customtrack.py $out_file1 ${vcf_source_type.vcf_file[0].vcf_input.dbkey} ${vcf_source_type.vcf_source} -n '$track_name'
- ##
+ <command interpreter="python">vcf_to_maf_customtrack.py '$out_file1'
+ #if $vcf_source_type.vcf_file
+ '${vcf_source_type.vcf_file[0].vcf_input.dbkey}'
+ #else
+ '?'
+ #end if
+ ${vcf_source_type.vcf_source} -n '$track_name'
#for $vcf_repeat in $vcf_source_type.vcf_file
'${vcf_repeat.vcf_input}'
#if $vcf_source_type.vcf_source == '-p'
'${vcf_repeat.population_name}'
#end if
#end for
+ -g
</command><inputs><param name="track_name" type="text" label="Custom Track Name" value="Galaxy Custom Track" size="30" />
1
0