[hg] galaxy 2662: First pass at allowing MAF tools to deal with ...

9 Sep 2009

details:   http://www.bx.psu.edu/hg/galaxy/rev/e929a2d803e4
changeset: 2662:e929a2d803e4
user:      Dan Blankenberg <dan@bx.psu.edu>
date:      Fri Sep 04 10:40:16 2009 -0400
description:
First pass at allowing MAF tools to deal with multiple occurrences of a species within a block.  Tool versions have been incremented as necessary.

These changes should only affect output when an input block has a species appearing more than once, with the exception being the MAF to multiple FASTA blocks converters: the FASTA headers have been revised to included the sequence index for a species in a block as well as the block index.

A new tool "Split MAF Blocks by Species" has been added that will split MAF blocks into the complete combination of multiple blocks when a species appears more than once.

29 file(s) affected in this change:

lib/galaxy/datatypes/converters/maf_to_fasta_converter.py
lib/galaxy/datatypes/converters/maf_to_fasta_converter.xml
lib/galaxy/datatypes/converters/maf_to_interval_converter.py
lib/galaxy/datatypes/converters/maf_to_interval_converter.xml
lib/galaxy/datatypes/sequence.py
lib/galaxy/tools/util/maf_utilities.py
test-data/cf_maf2fasta_new.dat
test-data/maf_split_by_species_collapsed_out.maf
test-data/maf_split_by_species_in.maf
test-data/maf_split_by_species_not_collapsed_out.maf
tool_conf.xml.sample
tools/annotation_profiler/annotation_profiler_for_interval.py
tools/maf/genebed_maf_to_fasta.xml
tools/maf/interval2maf.py
tools/maf/interval2maf.xml
tools/maf/interval2maf_pairwise.xml
tools/maf/interval_maf_to_merged_fasta.py
tools/maf/interval_maf_to_merged_fasta.xml
tools/maf/maf_filter.py
tools/maf/maf_limit_size.py
tools/maf/maf_limit_size.xml
tools/maf/maf_limit_to_species.py
tools/maf/maf_split_by_species.py
tools/maf/maf_split_by_species.xml
tools/maf/maf_stats.py
tools/maf/maf_stats.xml
tools/maf/maf_to_fasta.xml
tools/maf/maf_to_fasta_concat.py
tools/maf/maf_to_fasta_multiple_sets.py

diffs (1927 lines):

diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/datatypes/converters/maf_to_fasta_converter.py

--- a/lib/galaxy/datatypes/converters/maf_to_fasta_converter.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/datatypes/converters/maf_to_fasta_converter.py	Fri Sep 04 10:40:16 2009 -0400
@@ -14,12 +14,15 @@
     input_name = sys.argv.pop(1)
     out = open( output_name, 'w' )
     count = 0
-    for count, maf in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ):
-        for c in maf.components:
-            spec, chrom = bx.align.maf.src_split( c.src )
-            if not spec or not chrom:
-                    spec = chrom = c.src
-            out.write( "%s\n" % maf_utilities.get_fasta_header( c, suffix = "%s_%i" % ( spec, count ) ) )
+    for count, block in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ):
+        spec_counts = {}
+        for c in block.components:
+            spec, chrom = maf_utilities.src_split( c.src )
+            if spec not in spec_counts:
+                spec_counts[ spec ] = 0
+            else:
+                spec_counts[ spec ] += 1
+            out.write( "%s\n" % maf_utilities.get_fasta_header( c, { 'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, count, spec_counts[ spec ] ) ) )
             out.write( "%s\n" % c.text )
         out.write( "\n" )
     out.close()
@@ -27,3 +30,12 @@
 
 
 if __name__ == "__main__": __main__()
+
+        for component in block.components:
+            spec, chrom = maf_utilities.src_split( component.src )
+            if spec not in spec_counts:
+                spec_counts[ spec ] = 0
+            else:
+                spec_counts[ spec ] += 1
+            file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
+            file_out.write( "%s\n" % component.text )
diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/datatypes/converters/maf_to_fasta_converter.xml
--- a/lib/galaxy/datatypes/converters/maf_to_fasta_converter.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/datatypes/converters/maf_to_fasta_converter.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="CONVERTER_maf_to_fasta_0" name="Convert MAF to Fasta">
+<tool id="CONVERTER_maf_to_fasta_0" name="Convert MAF to Fasta" version="1.0.1">
 <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
   <command interpreter="python">maf_to_fasta_converter.py $output1 $input1</command>
   <inputs>
diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/datatypes/converters/maf_to_interval_converter.py
--- a/lib/galaxy/datatypes/converters/maf_to_interval_converter.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/datatypes/converters/maf_to_interval_converter.py	Fri Sep 04 10:40:16 2009 -0400
@@ -4,7 +4,8 @@
 import sys
 from galaxy import eggs
 import pkg_resources; pkg_resources.require( "bx-python" )
-import bx.align.maf
+import bx.align.maf
+from galaxy.tools.util import maf_utilities
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
@@ -17,15 +18,15 @@
     #write interval header line
     out.write( "#chrom\tstart\tend\tstrand\n" )
     try:
-        for maf in bx.align.maf.Reader( open(input_name, 'r') ):
-            c = maf.get_component_by_src_start(species)
-            if c is not None:
-                out.write( "%s\t%i\t%i\t%s\n" % (bx.align.src_split(c.src)[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand) )
-                count += 1
+        for block in bx.align.maf.Reader( open( input_name, 'r' ) ):
+            for c in maf_utilities.iter_components_by_src_start( block, species ):
+                if c is not None:
+                    out.write( "%s\t%i\t%i\t%s\n" %  ( bx.align.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) )
+                    count += 1
     except Exception, e:
         print >> sys.stderr, "There was a problem processing your input: %s" % e
     out.close()
-    print "%i MAF blocks converted to Genomic Intervals for species %s." % (count, species)
+    print "%i MAF blocks converted to Genomic Intervals for species %s." % ( count, species )
 
 
 if __name__ == "__main__": __main__()
diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/datatypes/converters/maf_to_interval_converter.xml
--- a/lib/galaxy/datatypes/converters/maf_to_interval_converter.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/datatypes/converters/maf_to_interval_converter.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="CONVERTER_maf_to_interval_0" name="Convert MAF to Genomic Intervals">
+<tool id="CONVERTER_maf_to_interval_0" name="Convert MAF to Genomic Intervals" version="1.0.1">
 <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
   <command interpreter="python">maf_to_interval_converter.py $output1 $input1 ${input1.metadata.dbkey}</command>
   <inputs>
diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/datatypes/sequence.py
--- a/lib/galaxy/datatypes/sequence.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/datatypes/sequence.py	Fri Sep 04 10:40:16 2009 -0400
@@ -22,7 +22,7 @@
         pass
 
 class Alignment( Sequence ):
-    """Class describing an alignmnet"""
+    """Class describing an alignment"""
 
     """Add metadata elements"""
     MetadataElement( name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True, no_value=None )
@@ -316,6 +316,78 @@
     import bx.align.maf
 except:
     pass
+#trying to import maf_utilities here throws an ImportError due to a circular import between jobs and tools:
+#from galaxy.tools.util.maf_utilities import build_maf_index_species_chromosomes
+#Traceback (most recent call last):
+#  File "./scripts/paster.py", line 27, in <module>
+#    command.run()
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/script/command.py", line 78, in run
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/script/command.py", line 117, in invoke
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/script/command.py", line 212, in run
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/script/serve.py", line 227, in command
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/script/serve.py", line 250, in loadapp
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 193, in loadapp
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 213, in loadobj
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 237, in loadcontext
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 267, in _loadconfig
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 397, in get_context
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 439, in _context_from_explicit
+#  File "build/bdist.solaris-2.11-i86pc/egg/paste/deploy/loadwsgi.py", line 18, in import_string
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/pkg_resources.py", line 1912, in load
+#    entry = __import__(self.module_name, globals(),globals(), ['__name__'])
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/web/buildapp.py", line 18, in <module>
+#    from galaxy import config, jobs, util, tools
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/jobs/__init__.py", line 3, in <module>
+#    from galaxy import util, model
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/model/__init__.py", line 13, in <module>
+#    import galaxy.datatypes.registry
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/datatypes/registry.py", line 6, in <module>
+#    import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/datatypes/sequence.py", line 344, in <module>
+#    from galaxy.tools.util.maf_utilities import build_maf_index_species_chromosomes
+#  File "/afs/bx.psu.edu/home/dan/galaxy/central/lib/galaxy/tools/__init__.py", line 15, in <module>
+#    from galaxy import util, jobs, model
+#ImportError: cannot import name jobs
+#so we'll copy and paste for now...terribly icky
+#*** ANYCHANGE TO THIS METHOD HERE OR IN maf_utilities MUST BE PROPAGATED ***
+def COPIED_build_maf_index_species_chromosomes( filename, index_species = None ):
+    species = []
+    species_chromosomes = {}
+    indexes = bx.interval_index_file.Indexes()
+    try:
+        maf_reader = bx.align.maf.Reader( open( filename ) )
+        while True:
+            pos = maf_reader.file.tell()
+            block = maf_reader.next()
+            if block is None: break
+            for c in block.components:
+                spec = c.src
+                chrom = None
+                if "." in spec:
+                    spec, chrom = spec.split( ".", 1 )
+                if spec not in species: 
+                    species.append( spec )
+                    species_chromosomes[spec] = []
+                if chrom and chrom not in species_chromosomes[spec]:
+                    species_chromosomes[spec].append( chrom )
+                if index_species is None or spec in index_species:
+                    forward_strand_start = c.forward_strand_start
+                    forward_strand_end = c.forward_strand_end
+                    try:
+                        forward_strand_start = int( forward_strand_start )
+                        forward_strand_end = int( forward_strand_end )
+                    except ValueError:
+                        continue #start and end are not integers, can't add component to index, goto next component
+                        #this likely only occurs when parse_e_rows is True?
+                        #could a species exist as only e rows? should the
+                    if forward_strand_end > forward_strand_start:
+                        #require positive length; i.e. certain lines have start = end = 0 and cannot be indexed
+                        indexes.add( c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size )
+    except Exception, e:
+        #most likely a bad MAF
+        log.debug( 'Building MAF index on %s failed: %s' % ( filename, e ) )
+        return ( None, [], {} )
+    return ( indexes, species, species_chromosomes )
 
 class Maf( Alignment ):
     """Class describing a Maf alignment"""
@@ -333,38 +405,8 @@
         Parses and sets species, chromosomes, index from MAF file.
         """
         #these metadata values are not accessable by users, always overwrite
+        indexes, species, species_chromosomes = COPIED_build_maf_index_species_chromosomes( dataset.file_name )
         
-        try:
-            maf_reader = bx.align.maf.Reader( open( dataset.file_name ) )
-        except:
-            return #not a maf file
-        species = []
-        species_chromosomes = {}
-        indexes = bx.interval_index_file.Indexes()
-        while True:
-            pos = maf_reader.file.tell()
-            block = maf_reader.next()
-            if block is None: break
-            for c in block.components:
-                spec = c.src
-                chrom = None
-                if "." in spec:
-                    spec, chrom = spec.split( ".", 1 )
-                if spec not in species: 
-                    species.append(spec)
-                    species_chromosomes[spec] = []
-                if chrom and chrom not in species_chromosomes[spec]:
-                    species_chromosomes[spec].append( chrom )
-                forward_strand_start = c.forward_strand_start
-                forward_strand_end = c.forward_strand_end
-                try:
-                    forward_strand_start = int( forward_strand_start )
-                    forward_strand_end = int( forward_strand_end )
-                except ValueError:
-                    continue #start and end are not integers, can't add component to index, goto next component
-                if forward_strand_end > forward_strand_start:
-                    #require positive length; i.e. certain lines have start = end = 0 and cannot be indexed
-                    indexes.add( c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size )
         dataset.metadata.species = species
         #only overwrite the contents if our newly determined chromosomes don't match stored
         chrom_file = dataset.metadata.species_chromosomes
diff -r 99dcba7af5b6 -r e929a2d803e4 lib/galaxy/tools/util/maf_utilities.py
--- a/lib/galaxy/tools/util/maf_utilities.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/lib/galaxy/tools/util/maf_utilities.py	Fri Sep 04 10:40:16 2009 -0400
@@ -7,10 +7,41 @@
 import bx.align.maf
 import bx.intervals
 import bx.interval_index_file
-import sys, os, string, tempfile
+import sys, os, string, tempfile
+import logging
+from copy import deepcopy
 
 assert sys.version_info[:2] >= ( 2, 4 )
-
+
+log = logging.getLogger(__name__)
+
+
+GAP_CHARS = [ '-' ]
+SRC_SPLIT_CHAR = '.'
+
+def src_split( src ):
+    spec, chrom = bx.align.maf.src_split( src )
+    if None in [ spec, chrom ]:
+        spec = chrom = src
+    return spec, chrom
+
+def src_merge( spec, chrom, contig = None ):
+    if None in [ spec, chrom ]:
+        spec = chrom = spec or chrom
+    return bx.align.maf.src_merge( spec, chrom, contig )
+
+def get_species_in_block( block ):
+    species = []
+    for c in block.components:
+        spec, chrom = src_split( c.src )
+        if spec not in species:
+            species.append( spec )
+    return species
+
+def tool_fail( msg = "Unknown Error" ):
+    print >> sys.stderr, "Fatal Error: %s" % msg
+    sys.exit()
+
 #an object corresponding to a reference layered alignment
 class RegionAlignment( object ):
     
@@ -153,69 +184,187 @@
     except:
         return build_maf_index( maf_file, species = species )
     
+#*** ANYCHANGE TO THIS METHOD HERE OR IN galaxy.datatypes.sequences MUST BE PROPAGATED ***
+def build_maf_index_species_chromosomes( filename, index_species = None ):
+    species = []
+    species_chromosomes = {}
+    indexes = bx.interval_index_file.Indexes()
+    try:
+        maf_reader = bx.align.maf.Reader( open( filename ) )
+        while True:
+            pos = maf_reader.file.tell()
+            block = maf_reader.next()
+            if block is None: break
+            for c in block.components:
+                spec = c.src
+                chrom = None
+                if "." in spec:
+                    spec, chrom = spec.split( ".", 1 )
+                if spec not in species: 
+                    species.append( spec )
+                    species_chromosomes[spec] = []
+                if chrom and chrom not in species_chromosomes[spec]:
+                    species_chromosomes[spec].append( chrom )
+                if index_species is None or spec in index_species:
+                    forward_strand_start = c.forward_strand_start
+                    forward_strand_end = c.forward_strand_end
+                    try:
+                        forward_strand_start = int( forward_strand_start )
+                        forward_strand_end = int( forward_strand_end )
+                    except ValueError:
+                        continue #start and end are not integers, can't add component to index, goto next component
+                        #this likely only occurs when parse_e_rows is True?
+                        #could a species exist as only e rows? should the
+                    if forward_strand_end > forward_strand_start:
+                        #require positive length; i.e. certain lines have start = end = 0 and cannot be indexed
+                        indexes.add( c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size )
+    except Exception, e:
+        #most likely a bad MAF
+        log.debug( 'Building MAF index on %s failed: %s' % ( filename, e ) )
+        return ( None, [], {} )
+    return ( indexes, species, species_chromosomes )
 
 #builds and returns ( index, index_filename ) for specified maf_file
 def build_maf_index( maf_file, species = None ):
-    indexes = bx.interval_index_file.Indexes()
-    try:
-        maf_reader = bx.align.maf.Reader( open( maf_file ) )
-        # Need to be a bit tricky in our iteration here to get the 'tells' right
-        while True:
-            pos = maf_reader.file.tell()
-            block = maf_reader.next()
-            if block is None: break
-            for c in block.components:
-                if species is not None and c.src.split( "." )[0] not in species:
-                    continue
-                indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos )
+    indexes, found_species, species_chromosomes = build_maf_index_species_chromosomes( maf_file, species )
+    if indexes is not None:
         fd, index_filename = tempfile.mkstemp()
         out = os.fdopen( fd, 'w' )
         indexes.write( out )
         out.close()
-        return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), index_filename )
-    except:
-        return ( None, None )
-
-def chop_block_by_region( block, src, region, species = None, mincols = 0, force_strand = None ):
-    ref = block.get_component_by_src( src )
-    #We want our block coordinates to be from positive strand
-    if ref.strand == "-":
-        block = block.reverse_complement()
-        ref = block.get_component_by_src( src )
+        return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), index_filename )
+    return ( None, None )
+
+def component_overlaps_region( c, region ):
+    if c is None: return False
+    start, end = c.get_forward_strand_start(), c.get_forward_strand_end()
+    if region.start >= end or region.end <= start:
+        return False
+    return True
+
+def chop_block_by_region( block, src, region, species = None, mincols = 0 ):
+    # This chopping method was designed to maintain consistency with how start/end padding gaps have been working in Galaxy thus far:
+    #   behavior as seen when forcing blocks to be '+' relative to src sequence (ref) and using block.slice_by_component( ref, slice_start, slice_end )
+    #   whether-or-not this is the 'correct' behavior is questionable, but this will at least maintain consistency
+    # comments welcome
+    slice_start = block.text_size #max for the min()
+    slice_end = 0 #min for the max()
+    old_score = block.score #save old score for later use
+    # We no longer assume only one occurance of src per block, so we need to check them all
+    for c in iter_components_by_src( block, src ):
+        if component_overlaps_region( c, region ):
+            if c.text is not None:
+                rev_strand = False
+                if c.strand == "-":
+                    #We want our coord_to_col coordinates to be returned from positive stranded component
+                    rev_strand = True
+                    c = c.reverse_complement()
+                start = max( region.start, c.start )
+                end = min( region.end, c.end )
+                start = c.coord_to_col( start )
+                end = c.coord_to_col( end )
+                if rev_strand:
+                    #need to orient slice coordinates to the original block direction
+                    slice_len = end - start
+                    end = len( c.text ) - start
+                    start = end - slice_len
+                slice_start = min( start, slice_start )
+                slice_end = max( end, slice_end )
+    
+    if slice_start < slice_end:
+        block = block.slice( slice_start, slice_end )
+        if block.text_size > mincols:
+            # restore old score, may not be accurate, but it is better than 0 for everything?
+            block.score = old_score
+            if species is not None:
+                block = block.limit_to_species( species )
+                block.remove_all_gap_columns()
+            return block
+    return None
     
-    #save old score here for later use
-    old_score =  block.score
-    slice_start = max( region.start, ref.start )
-    slice_end = min( region.end, ref.end )
-    
-    #slice block by reference species at determined limits
-    block = block.slice_by_component( ref, slice_start, slice_end )
-    
-    if block.text_size > mincols:
-        if ( force_strand is None and region.strand != ref.strand ) or ( force_strand is not None and force_strand != ref.strand ):
-            block = block.reverse_complement()
-        # restore old score, may not be accurate, but it is better than 0 for everything
-        block.score = old_score
-        if species is not None:
-            block = block.limit_to_species( species )
-            block.remove_all_gap_columns()
-        return block
-    return None
+def orient_block_by_region( block, src, region, force_strand = None ):
+    #loop through components matching src,
+    #make sure each of these components overlap region
+    #cache strand for each of overlaping regions
+    #if force_strand / region.strand not in strand cache, reverse complement
+    ### we could have 2 sequences with same src, overlapping region, on different strands, this would cause no reverse_complementing
+    strands = [ c.strand for c in iter_components_by_src( block, src ) if component_overlaps_region( c, region ) ]
+    if strands and ( force_strand is None and region.strand not in strands ) or ( force_strand is not None and force_strand not in strands ):
+        block = block.reverse_complement()
+    return block
+
+def get_oriented_chopped_blocks_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
+    for block, idx, offset in get_oriented_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols, force_strand ):
+        yield block
+def get_oriented_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
+    for block, idx, offset in get_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols ):
+        yield orient_block_by_region( block, src, region, force_strand ), idx, offset
+
+#split a block with multiple occurances of src into one block per src
+def iter_blocks_split_by_src( block, src ):
+    for src_c in iter_components_by_src( block, src ):
+        new_block = bx.align.Alignment( score=block.score, attributes=deepcopy( block.attributes ) )
+        new_block.text_size = block.text_size
+        for c in block.components:
+            if c == src_c or c.src != src:
+                new_block.add_component( deepcopy( c ) ) #components have reference to alignment, dont want to loose reference to original alignment block in original components
+        yield new_block
+
+#split a block into multiple blocks with all combinations of a species appearing only once per block
+def iter_blocks_split_by_species( block, species = None ):
+    def __split_components_by_species( components_by_species, new_block ):
+        if components_by_species:
+            #more species with components to add to this block
+            components_by_species = deepcopy( components_by_species )
+            spec_comps = components_by_species.pop( 0 )
+            for c in spec_comps:
+                newer_block = deepcopy( new_block )
+                newer_block.add_component( deepcopy( c ) )
+                for value in __split_components_by_species( components_by_species, newer_block ):
+                    yield value
+        else:
+            #no more components to add, yield this block
+            yield new_block
+    
+    #divide components by species    
+    spec_dict = {}
+    if not species:
+        species = []
+        for c in block.components:
+            spec, chrom = src_split( c.src )
+            if spec not in spec_dict:
+                spec_dict[ spec ] = []
+                species.append( spec )
+            spec_dict[ spec ].append( c )
+    else:
+        for spec in species:
+            spec_dict[ spec ] = []
+            for c in iter_components_by_src_start( block, spec ):
+                spec_dict[ spec ].append( c )
+    
+    empty_block = bx.align.Alignment( score=block.score, attributes=deepcopy( block.attributes ) ) #should we copy attributes?
+    empty_block.text_size = block.text_size
+    #call recursive function to split into each combo of spec/blocks
+    for value in __split_components_by_species( spec_dict.values(), empty_block ):
+        sort_block_components_by_block( value, block ) #restore original component order
+        yield value
+
+
 #generator yielding only chopped and valid blocks for a specified region
-def get_chopped_blocks_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
-    for block, idx, offset in get_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols, force_strand ):
+def get_chopped_blocks_for_region( index, src, region, species = None, mincols = 0 ):
+    for block, idx, offset in get_chopped_blocks_with_index_offset_for_region( index, src, region, species, mincols ):
         yield block
-def get_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
+def get_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0 ):
     for block, idx, offset in index.get_as_iterator_with_index_and_offset( src, region.start, region.end ):
-        block = chop_block_by_region( block, src, region, species, mincols, force_strand )
+        block = chop_block_by_region( block, src, region, species, mincols )
         if block is not None:
             yield block, idx, offset
 
 #returns a filled region alignment for specified regions
-def get_region_alignment( index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0 ):
+def get_region_alignment( index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
     if species is not None: alignment = RegionAlignment( end - start, species )
     else: alignment = RegionAlignment( end - start, primary_species )
-    return fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand, species, mincols )
+    return fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand, species, mincols, overwrite_with_gaps )
 
 #reduces a block to only positions exisiting in the src provided
 def reduce_block_by_primary_genome( block, species, chromosome, region_start ):
@@ -237,13 +386,11 @@
     return ( start_offset, species_texts )
 
 #fills a region alignment 
-def fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0 ):
+def fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
     region = bx.intervals.Interval( start, end )
     region.chrom = chrom
     region.strand = strand
     primary_src = "%s.%s" % ( primary_species, chrom )
-    
-
     
     #Order blocks overlaping this position by score, lowest first
     blocks = []
@@ -255,28 +402,40 @@
                 break
         else:
             blocks.append( ( score, idx, offset ) )
-    
+    
+    gap_chars_tuple = tuple( GAP_CHARS )
+    gap_chars_str = ''.join( GAP_CHARS )
     #Loop through ordered blocks and layer by increasing score
-    for block_dict in blocks:
-        block = chop_block_by_region( block_dict[1].get_at_offset( block_dict[2] ), primary_src, region, species, mincols, strand )
-        if block is None: continue
-        start_offset, species_texts = reduce_block_by_primary_genome( block, primary_species, chrom, start )
-        for spec, text in species_texts.items():
-            try:
-                alignment.set_range( start_offset, spec, text )
-            except:
-                #species/sequence for species does not exist
-                pass
-    
+    for block_dict in blocks:
        for block in iter_blocks_split_by_species( block_dict[1].get_at_offset( block_dict[2] ) ): #need to handle each occurance of sequence in block seperately
+            if component_overlaps_region( block.get_component_by_src( primary_src ), region ):
+                block = chop_block_by_region( block, primary_src, region, species, mincols ) #chop block
+                block = orient_block_by_region( block, primary_src, region ) #orient block
+                start_offset, species_texts = reduce_block_by_primary_genome( block, primary_species, chrom, start )
+                for spec, text in species_texts.items():
+                    #we should trim gaps from both sides, since these are not positions in this species genome (sequence)
+                    text = text.rstrip( gap_chars_str )
+                    gap_offset = 0
+                    while text.startswith( gap_chars_tuple ):
+                        gap_offset += 1
+                        text = text[1:]
+                        if not text:
+                            break
+                    if text:
+                        if overwrite_with_gaps:
+                            alignment.set_range( start_offset + gap_offset, spec, text )
+                        else:
+                            for i, char in enumerate( text ):
+                                if char not in GAP_CHARS:
+                                    alignment.set_position( start_offset + gap_offset + i, spec, char )
     return alignment
 
 #returns a filled spliced region alignment for specified region with start and end lists
-def get_spliced_region_alignment( index, primary_species, chrom, starts, ends, strand = '+', species = None, mincols = 0 ):
+def get_spliced_region_alignment( index, primary_species, chrom, starts, ends, strand = '+', species = None, mincols = 0, overwrite_with_gaps = True ):
     #create spliced alignment object
     if species is not None: alignment = SplicedAlignment( starts, ends, species )
     else: alignment = SplicedAlignment( starts, ends, [primary_species] )
     for exon in alignment.exons:
-        fill_region_alignment( exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols)
+        fill_region_alignment( exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols, overwrite_with_gaps )
     return alignment
 
 #loop through string array, only return non-commented lines
@@ -319,29 +478,36 @@
             starts.append( start )
             ends.append( end )
     return ( starts, ends, fields )
-    
-def get_species_in_maf( maf_filename ):
-        try:
-            species={}
-            
-            file_in = open( maf_filename, 'r' )
-            maf_reader = maf.Reader( file_in )
-            
-            for i, m in enumerate( maf_reader ):
-                l = m.components
-                for c in l:
-                    spec, chrom = maf.src_split( c.src )
-                    if not spec or not chrom:
-                        spec = chrom = c.src
-                    species[spec] = spec
-            
-            file_in.close()
-            
-            species = species.keys()
-            species.sort()
-            return species
-        except:
-            return []
+
+def iter_components_by_src( block, src ):
+    for c in block.components:
+        if c.src == src:
+            yield c
+
+def get_components_by_src( block, src ):
+    return [ value for value in iter_components_by_src( block, src ) ]
+
+def iter_components_by_src_start( block, src ):
+    for c in block.components:
+        if c.src.startswith( src ):
+            yield c
+
+def get_components_by_src_start( block, src ):
+    return [ value for value in iter_components_by_src_start( block, src ) ]
+
+def sort_block_components_by_block( block1, block2 ):
+    #orders the components in block1 by the index of the component in block2
+    #block1 must be a subset of block2
+    #occurs in-place
+    return block1.components.sort( cmp = lambda x, y: block2.components.index( x ) - block2.components.index( y ) )
+
+def get_species_in_maf( maf_filename ):
+    species = []
+    for block in maf.Reader( open( maf_filename ) ):
+        for spec in get_species_in_block( block ):
+            if spec not in species:
+                species.append( spec )
+    return species
 
 def parse_species_option( species ):
     if species:
diff -r 99dcba7af5b6 -r e929a2d803e4 test-data/cf_maf2fasta_new.dat
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cf_maf2fasta_new.dat	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,134 @@
+>hg17.chr7(+):127471195-127471526|sequence_index=0|block_index=0|species=hg17|hg17_0_0
+gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>panTro1.chr6(+):129885076-129885407|sequence_index=0|block_index=0|species=panTro1|panTro1_0_0
+gtttgccatcttttgctgctcttgggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTGAAACTTCCCAAATACTGCCACTGATGTCCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>rheMac2.chr3(+):165787989-165788319|sequence_index=0|block_index=0|species=rheMac2|rheMac2_0_0
+gcttgccatcttttgatgctcttgggaatccagcagctgtcaccat-taaacaagcccaggctagaccaGTTACCCTCATC---ATCTTAGCTGATAGCCAGCCAGCCACCATAGGCAtgagtcaggccatagtgctggacccacagaattatgagctaaataagtagtgttgggttaagtcactaagttttaggcatagtgtgttatgtagcTCACAAACATATAAGACTGTGTGTTTTTTGACTGGAGGAAGAGATGCCATAAAGACCACCTTTTGAAACTTCTCAAATACTGCCATTGATGTGCTGATGGAGG-------------------------------------------------------TATGAA---------AACATCCACTAA
+>rn3.chr4(+):56178191-56178473|sequence_index=0|block_index=0|species=rn3|rn3_0_0
+CTTCACTCTCATTTGCTGTT----------------CTGTCACTATGGAGACAAACACAGGCTAGCCCAGTTACTATCTTGATCACAGCAGCTGT----CAGCTAGCTGCCACTCACAGGAATAAGGCCATACCATT-GATCCACTGAACCTTGATCTAGGAATTTGGC----------------------TGGGGCCAGTTTGCGGTGTCACTCATGA--CTCTAAGATTGTGTGTTTG----CTCCAGGAAGAGACGGCAAGAGGATTACCTTTAAAAGGTTCGG-AGTCTAGCTGTAGACAGCCCAATGGG---------------------------------------------------------TATAAC---------AATACTCACTAA
+>mm7.chr6(+):28984529-28984886|sequence_index=0|block_index=0|species=mm7|mm7_0_0
+CTCCACTCTCGTTTGCTGTT----------------CTGTCACCATGGAAACAAACG-AGGGTGGTCCAGTTACTATCTTG---ACTGCAGCTGG----CAGTCAGTTGCCACT--CAGGAATAAGGCTATGCCATT-GATCCACTGAACCGTGATCTGGAAACCTGGCTGTTGTTT-------CAAGCCTTGGGGCCAGTTTGCGGTGTTACTCATGA--CTCTAAGATCGTGTGCTTG----CTGCAGGAAGAGACAGCAAGGGGGTTACATTTAAAAAGCCCCC-AGTTTAGCTATAGGCAGGCCAACAGGTGTAAAAATACTCACTAGTAATGGGCTGAACTCATGGAGGTAGCATTAGTGAGACACTGTAACTGTTTTTTTAAAAATCACTAA
+
+>hg17.chr7(+):127471526-127471584|sequence_index=0|block_index=1|species=hg17|hg17_1_0
+AATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+>mm7.chr6(+):28984886-28984940|sequence_index=0|block_index=1|species=mm7|mm7_1_0
+----AACGTTTCATTGATTGCTCATCATTTAAAAAAAGAAATTCCTCAGTGGAAGAGG
+>rheMac2.chr3(+):165788319-165788377|sequence_index=0|block_index=1|species=rheMac2|rheMac2_1_0
+AATTTGTGGTTTATTTATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+>panTro1.chr6(+):129885407-129885465|sequence_index=0|block_index=1|species=panTro1|panTro1_1_0
+AATTTGTGGTTTATTCGTTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGG
+
+>hg17.chr7(+):127471584-127471688|sequence_index=0|block_index=2|species=hg17|hg17_2_0
+GAGATATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggaattattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttt
+>panTro1.chr6(+):129885465-129885569|sequence_index=0|block_index=2|species=panTro1|panTro1_2_0
+GAGACATTT-GGggaaatttt-gtatagactagctt--tcacgatgttagggagttattattgtgtgataatggtcttgcagttac-acagaaattcttcctta-ttttt
+>rheMac2.chr3(+):165788377-165788482|sequence_index=0|block_index=2|species=rheMac2|rheMac2_2_0
+GAGATATTT-GGggaaatttg-gtatagactagctt--tcatgatgtaagggagttatttttgtgtgataatggccctacagttac-acagaaattcttccttatttttt
+>canFam2.chr14(-):11090703-11090811|sequence_index=0|block_index=2|species=canFam2|canFam2_2_0
+gagatattt-gggggaatttgaatgtagtgttgctcttttgtgatgctaagaaattataattgtctgatgatagtctcgtggttatgggggaaatgcttcctta-ttttt
+>bosTau2.chr4(-):50243931-50244034|sequence_index=0|block_index=2|species=bosTau2|bosTau2_2_0
+-agacattg-ggtaaaattcaaatgcagactagctc----atgatgttaaagaattactcttgtgtggtaatggtcttgtgatagagatagaaatgcttcctta-ttttt
+>rn3.chr4(+):56182200-56182295|sequence_index=0|block_index=2|species=rn3|rn3_2_0
+----TATTTGGGGGAAATATG-ATGTGCA----CTT--CCATGATCTTAAAGAATTGCTACTGTTTGATAGTGATCTTATGGTTAA-ATAAAAAAAAT--CTTA-GTTGT
+>dasNov1.scaffold_256527(+):298-392|sequence_index=0|block_index=2|species=dasNov1|dasNov1_2_0
+GAGACATTT-GGAGAAATTTG-----------Aatt--tcatgatgttaaggaattacttttgtatgatgatggtcttgtggctat-gtagaatttcttccgtg-tttta
+
+>hg17.chr7(+):127471688-127471871|sequence_index=0|block_index=3|species=hg17|hg17_3_0
+tgggaagcaccaaagta-------gggataaaatgtcatgatgtgtgcaatacactttaaaatgtttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaatttttcattta--------------------------aaaa-
+>panTro1.chr6(+):129885569-129885752|sequence_index=0|block_index=3|species=panTro1|panTro1_3_0
+tgggaaacaccaaagta-------gggataaaatgtcatgatgtgtgcaatacgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataattattaaatctaggt-----gatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaaattttcattta--------------------------aaaa-
+>rheMac2.chr3(+):165788482-165788684|sequence_index=0|block_index=3|species=rheMac2|rheMac2_3_0
+tgggaagcacaaaagta-------gggataaaatgtcatgatgtgtacaatatgctttaaaatatttttgccaaaa----------taattaa-------------------------tgaagc--aaatatg---gaaaataataactgttaaatctaggt-----gttgggtatattgcagttcattatgttattgcacacttttctgtgtgtttaaaattttcatttaaaaatatgttttaaaaatg-------aaaa-
+>rn3.chr4(+):56182295-56182489|sequence_index=0|block_index=3|species=rn3|rn3_3_0
+TAGAAAATACTCAAATATTTAGGGGCGTGACAATGTCACAGTGTCTGCAATTTGCTTTAAAGATTTTT-----AAA----------TATTTAAAAAAGTTTTAATAATTTTGAAAAACTGAAGCTACACTATG---GGAAGTGGTAATTGTTACATATGGGT-----AATAAGTAT-----AATTCGTTATATTAT-------TTTTC------TTAGAATTTTTCATTTG--------------------------AAAA-
+>bosTau2.chr4(-):50243792-50243930|sequence_index=0|block_index=3|species=bosTau2|bosTau2_3_0
+agataaacacttaagtattta---aggatgaaacgccctgatgtttgtaatttgctttagaatattttagccaaaa----------gaattaa-------------------------tgatgc--aaatatg--caaaaagagta--cgttaaacctaa-----------------------------------------------------atttgCGATTttcattta--------------------------aaaa-
+>canFam2.chr14(-):11090345-11090505|sequence_index=0|block_index=3|species=canFam2|canFam2_3_0
+agacacaaactgaagtattta---aggatgaaatgtcatgatgtttgcaattggctttaaaatattttagccaaaa-----------agtaaa-------------------------tgaagc--AAATATG--GGAAGACAATAATCATTAAATCTAGGT-----GATGCATAC---------------------------TTTTCCATATGTTTGAAATTTTCATTTA--------------------------AAAA-
+>dasNov1.scaffold_256527(+):393-625|sequence_index=0|block_index=3|species=dasNov1|dasNov1_3_0
+agacgcatgctgaagcatgta---aggataaaatgtcgtggtgtttgtaatttattctaaaacattttagccaaaaacaaataaataaataaa-------------------------tgaagc--aaatatgggggaaatgtttaattgttaaatctagatttaacacggtatataccgtgcttcattatactagtctctacttttccatgtgtttgaaattttCATTAAAATGTTTGTTTGTTGTCTGTTTTAATGAAAT
+
+>hg17.chr7(+):127471871-127471910|sequence_index=0|block_index=4|species=hg17|hg17_4_0
+actttgagctagacaccaggctatgagcta-ggagcatag
+>rheMac2.chr3(+):165788684-165788723|sequence_index=0|block_index=4|species=rheMac2|rheMac2_4_0
+actttgagctagataccaggttatgagcta-ggagcatag
+>panTro1.chr6(+):129885752-129885791|sequence_index=0|block_index=4|species=panTro1|panTro1_4_0
+actttgagctagacaccaggctatgagcta-ggagcatag
+>bosTau2.chr4(-):50243734-50243773|sequence_index=0|block_index=4|species=bosTau2|bosTau2_4_0
+tcttcgtgcaacgcacggggctatcaatgt-gggatacag
+>canFam2.chr14(-):11090081-11090120|sequence_index=0|block_index=4|species=canFam2|canFam2_4_0
+ACATCAtgctagatcctggactatgagctg-ggtatatag
+>dasNov1.scaffold_256527(+):625-665|sequence_index=0|block_index=4|species=dasNov1|dasNov1_4_0
+CCTTTGTGCTAGCCACTGGGATGAAAGCTAGGGAACACAG
+
+>hg17.chr7(+):127471910-127472074|sequence_index=0|block_index=5|species=hg17|hg17_5_0
+caatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACCTG-TGTGATCCTGGCTTTTCCTGTTCCCTCCG---CATCATCACTGCAGGTGTGTTTTCCCAAG
+>panTro1.chr6(+):129885791-129885955|sequence_index=0|block_index=5|species=panTro1|panTro1_5_0
+caatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa-ctc-aaagaatgcacattctCTG-GGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACATG-TGTGATCCTGGCTTTTCCTGTTCCCTCTG---CATCATCACTGCAGGTGTATTTTCCCAAG
+>rheMac2.chr3(+):165788723-165788885|sequence_index=0|block_index=5|species=rheMac2|rheMac2_5_0
+caatgaccaa----------------------------------------------------------------------------------------------atagacccctaccga-ctc-aaagaatgtacattctTTG-GGAAACATGTTTCCATCAGAAAATCTCAAATGCAATGTGACTGGGGTCTCCAGGACCTG-TGTGAGCCTGGCTTTTCCTGTTCCCTCCA---CATCATCACTGCAGGTGTATTTTCCC--G
+>mm7.chr6(+):28990714-28990875|sequence_index=0|block_index=5|species=mm7|mm7_5_0
+caaaaaccaa------------------------------------------------------------------------------------------------aaaaACCTATAGC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAACATGCATCCGCTAGAAAGTCCCAAGTACACTATGACAGTTG--CCCAGGCCCCGCCTTAAACCTGGTTTTCCTGGTTTCTTTCA---CATCATTACCACGAATATATTTCCTCAAG
+>rn3.chr4(+):56183448-56183705|sequence_index=0|block_index=5|species=rn3|rn3_5_0
+--ATGACCAATATACACTGTTTACATGTATAGCATTGTGAATGGAGACATAAAAAGATAATCTAGCTTTGTGCTAGGTAGGTGCTGAGCTCTTAACAGTGCTGGGCAGAAACCTATAAC-CTC-ACAGGGTGGGTTGTCTTTG-AGGAGCGTGCTAACCCTAGGAAGTCTCAAATACAATGTGATGGTTGCCCCCAGGCACCACCTTGAACCTGGTCTTCCTGGTTTCTTTCA---CACCATTACCACAAATACATTTTCTCAGG
+>bosTau2.chr4(-):50243566-50243734|sequence_index=0|block_index=5|species=bosTau2|bosTau2_5_0
+atgtgaacaa---------------------------------------------------------------------------------------------aacggacccgtgtgggactcggcggagcacacagattttgcgggagCACGTTCCCGTTAGGAAGTCTCTGATGCAATACGACCGGTGCCTTCAGGACCTG-TG--AGGCTGACTTTCCTTA-CCCCTCCACACCATCATCAAGGCAGGTGTGATTTTCCAGG
+>canFam2.chr14(-):11089913-11090081|sequence_index=0|block_index=5|species=canFam2|canFam2_5_0
+cagtgaacaa---------------------------------------------------------------------------------------------aacagagccctgcagt-cttgatggagcacacaacctttg-gggaaCATGTTTCCATAAGAAAGTCTCCAATGTGATCTGA-TGGTGCCGCCAGGACCTA-TGTCAGCCTACCGTTCCATGTCCCCTCCACACCATCATCACTGCAGGTGTGTTTTCCCACA
+>dasNov1.scaffold_256527(+):665-786|sequence_index=0|block_index=5|species=dasNov1|dasNov1_5_0
+CAGTGAGCAA-----------------------------------------------------------------------------------------------CAGCCTGGCTCCGT-CC--GGGGGCCGCTCAGCAGCTC-GGGAGCGTGGAGACG---GGAAGTCTGTCACGCGATGCG-----------CTGGGCCCG------------CTGTTCCCGCCCCCCTCC---CCCC----------------TTTCCCAAG
+
+>hg17.chr7(+):127472074-127472258|sequence_index=0|block_index=6|species=hg17|hg17_6_0
+TTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-ccc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagtt
+>panTro1.chr6(+):129885955-129886139|sequence_index=0|block_index=6|species=panTro1|panTro1_6_0
+TTTTAAA------CATTTACCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagtt
+>rheMac2.chr3(+):165788885-165789069|sequence_index=0|block_index=6|species=rheMac2|rheMac2_6_0
+TTTTAAA------CATTTACTCTCCCAGTAGCCTTGCATCTCGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTGTACGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaatttt-tcc-tgggcacccctgtgttcttggcactggaaaagtaccaggacttaaatagta
+>mm7.chr6(+):28990875-28991025|sequence_index=0|block_index=6|species=mm7|mm7_6_0
+TTTAAAGAAAGTACCCCCTCCTTTCCAGT-GCCTCAAATCTAGAAGAATATTCATAGTGAAGT-GC------------------------ACAGCCGGGTGGTGCATGGTA-ATCTGGAAGTCACCTCTGCAAATCTT-TCC----------------TGTTGGTGCTGTGAAGGCACCAGGACTTCAAGAGTA
+>rn3.chr4(+):56183705-56183879|sequence_index=0|block_index=6|species=rn3|rn3_6_0
+TTTAAAAGAAGT-CCCACTCCTTTCCAGT-GCCCTAGATCTAGAAGCACATTCATAATGATGT-ACAC-----TAACCC----------GACAGCTGTGTGGTATATGGTA-TCCCGGAAGTCACCTCAGCAAACCTT-TCCCGGGGAACCTACATGGTGTTGGTGCTGTGAAGGTACCAGGTTGTCAAGGGTA
+>canFam2.chr14(-):11089743-11089913|sequence_index=0|block_index=6|species=canFam2|canFam2_6_0
+TTTTAAA------TATCTGC-TTCCCGGTGGCCTTGAGTCTAGAGGAGTCCCCCCACTATGGTGGCACTAATACTGAAGGTCAGAAATAATCAGTTCTGTGGTGCATGTTGCCCCTGAGGTTCTGTTCGGGAAACTTC-TTC-TGAGCAC----ATGCACCTGGCACTGCAAACGTACCAGGA-----------
+>dasNov1.scaffold_256527(+):786-964|sequence_index=0|block_index=6|species=dasNov1|dasNov1_6_0
+TTTTAAA------AATTTACCTTCCCAGTGGCGGTGAATCCGGAGGAATACGGAAACTGGGGC-GCACTACCATGACACGTGTCAAA-AATCAGTTCCGTGGTCCGTGGAGGGCCTGGGGTTC------GAAAATCTTGTCC-CGAGCACCCCCGTGCGCCTGGCACCGCGACAGTGACAGGACTGAAGCGTG-
+
+>hg17.chr7(+):127472258-127472280|sequence_index=0|block_index=7|species=hg17|hg17_7_0
+gatggccca-atccctgtcctct-
+>panTro1.chr6(+):129886139-129886161|sequence_index=0|block_index=7|species=panTro1|panTro1_7_0
+gatggccca-atccctgtcctct-
+>rheMac2.chr3(+):165789069-165789091|sequence_index=0|block_index=7|species=rheMac2|rheMac2_7_0
+gatggccca-atccctgtcctct-
+>mm7.chr6(+):28991025-28991048|sequence_index=0|block_index=7|species=mm7|mm7_7_0
+AATGGCAGAGGGCTCTGTTCTCT-
+>rn3.chr4(+):56183879-56183902|sequence_index=0|block_index=7|species=rn3|rn3_7_0
+AATGGCAGAGGCCCCTGTTCTCT-
+>canFam2.chr14(-):11089526-11089548|sequence_index=0|block_index=7|species=canFam2|canFam2_7_0
+GGAGACTTG-ATGCCTGCCTTCC-
+>dasNov1.scaffold_256527(+):964-987|sequence_index=0|block_index=7|species=dasNov1|dasNov1_7_0
+GACGGCCAG-ACCTCTGCCCTCGG
+
+>hg17.chr7(+):127472280-127472681|sequence_index=0|block_index=8|species=hg17|hg17_8_0
+taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGGCCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCACAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgcaaatttcctgt
+>panTro1.chr6(+):129886161-129886562|sequence_index=0|block_index=8|species=panTro1|panTro1_8_0
+taaaacctaagggaggagaTGGAAAG-GGGCACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCCGGGAGAAGCGATGGATGCGCAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCA------ATGTGC-AATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta--aagggccaggtagaaaacattttaggctttctaagccaagg---caaaattgaggat-attacatgggtacttatacaacaagaataaacaatt---tacacaa-ttttttgttgacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgt
+>rheMac2.chr3(+):165789091-165789492|sequence_index=0|block_index=8|species=rheMac2|rheMac2_8_0
+taaaacctaatggaggagatggaATG-GGTCACCCAACCCGGACTGAGAGACAGGAATTAGCTGCAAGGGTAACCAGGACAAGCTTCTCTA---ATGATG--GAGAGACCCTA-GTGGAATGGGGAGATTCTTCTGGGAGAAGCGATGGATTCGTAGTTGGGCATCCCCACAGAGGGACTGGAAAGAAAAAAGACCTGGAGGAACCA------ATGTGC-AATGTATGTGTGTTTCCTGGTTcaagggctggcaaactttctcta--aagggccagatagaaaacattttaggctttgtaagccaagg---caaaatcgaggag-attacatgggtacttatacaacaagaataaacaatt---tccacaa--tttttattcacagaattcaaaa---ctttat----agacac---agaaatgtaaatttcctgt
+>rn3.chr4(+):56183902-56184219|sequence_index=0|block_index=8|species=rn3|rn3_8_0
+------------------------------------GTCCATAGTCAAAG------------------------------AAGCCTCTCAG---ATGGAG--AGCAGGGCCTATGCAAAAGAGGGGGCTTCTGTAGGCAGAAGGGATGGACTAGCCTCCGGACATAGCCATAGAGAGGCTGGCAGGACTGAGACCCAGGAGAAGCCAGCGCAGGTGTGCGGGCGTGTGTATATTTCATAGTTTGCAGGTTGG----------------------------CAAACAATTCCTGCTTTGCAGGCCAAGA---GGAAACTGAAGGTGACCCCGTGAGTGCTTAC---ACAAGAGAAAACAAG-------ACAA-TTTTTGGTTGACCAAATTCAGAA---CTTTATTTGAGGATGC---TAAAGTTTAAATTTCTTTT
+>canFam2.chr14(-):11089143-11089523|sequence_index=0|block_index=8|species=canFam2|canFam2_8_0
+TACAGCCTGTGGGCAGAGGTGGGAAGAGGTCACGCAAGCCAGTTGGAATGAGGGGAGTTGGCTGGAAAGGTGACCAGGACAAGCTACTTCAACCAGGAAG--AAGAGACCCCG-GTG----------------CTTGGAGAAGGCCTGATTGAGCAGTCCTGCATGCCCGCCCAC-GACTGGCAGGAATAAAGACCCAGAAGAGCTA------ACGTGC-AATGTA------TTTTCTAGTTCCAgggttggcaaactttctctct-aagggtgggatgataaacattttaggcttttcagaccaaga---ggcgacatcagag-ggtatgtaggt---------acaagagggaaaagttgcccccggaa-ttttttg--gataaaattcaaaa---ctttacttagggatgc---caaaatgtaaacttcatat
+>dasNov1.scaffold_256527(+):987-1401|sequence_index=0|block_index=8|species=dasNov1|dasNov1_8_0
+CTAAATCTCGCGGAGAAGGTGGAACA-GGTTACCCAAACCCGACCGAG-GAGGCGAGTTG---GAAACGGCGACTGGGACAAGCTCCCTCA---GAGACGGAGAGAGACCCCA-GTGGAAGGGGGGAGAGGCTCTTAGGGAAACGATGGGGGGACCCGCCCGCACCCGCACAGAGGCGCTGGCAGGCACAGCGGCCCCGAGGAGCCC------AGGAGC-AGGGC-TGTGT-TCCCCTGCATcaggggttggcaaactttttctgcaaagggccagatagtaaatattttaggctttgcaaaccaagaagtagaaagggaggcc-attatgtacgtatttatatagcaagagagaacattt---cccacaatttttttattgacagaatttaaaacttctttattgatgaacaccaaagaaacttgaatttcatat
+
+>hg17.chr7(+):127472681-127472715|sequence_index=0|block_index=9|species=hg17|hg17_9_0
+aattttcccat---gagaactattcttcttttgtttt
+>rheMac2.chr3(+):165789492-165789526|sequence_index=0|block_index=9|species=rheMac2|rheMac2_9_0
+aattttcacat---aagaactattcttcttttgtttt
+>panTro1.chr6(+):129886562-129886596|sequence_index=0|block_index=9|species=panTro1|panTro1_9_0
+aattttcccgt---gagaactattcttcttttgtttt
+>canFam2.chr14(-):11089108-11089143|sequence_index=0|block_index=9|species=canFam2|canFam2_9_0
+aatggtcatgt--ccataactattcttcttttatttt
+>dasNov1.scaffold_256527(+):1401-1433|sequence_index=0|block_index=9|species=dasNov1|dasNov1_9_0
+aattttcacatatcacgaagtatttttttttt-----
+
diff -r 99dcba7af5b6 -r e929a2d803e4 test-data/maf_split_by_species_collapsed_out.maf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/maf_split_by_species_collapsed_out.maf	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,61 @@
+##maf version=1
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT-GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC--GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC-GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGCAG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC---AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC---AG 
+
diff -r 99dcba7af5b6 -r e929a2d803e4 test-data/maf_split_by_species_in.maf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/maf_split_by_species_in.maf	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,11 @@
+##maf version=1
+a score=2047408.0
+s species1.chr1          147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species1.chr1          147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species1.chr1          147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species1.chr1          147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1          129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species2.chr1          129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species2.chr1          129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3           68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
diff -r 99dcba7af5b6 -r e929a2d803e4 test-data/maf_split_by_species_not_collapsed_out.maf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/maf_split_by_species_not_collapsed_out.maf	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,61 @@
+##maf version=1
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+a score=2047408.0
+s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
diff -r 99dcba7af5b6 -r e929a2d803e4 tool_conf.xml.sample
--- a/tool_conf.xml.sample	Fri Sep 04 10:31:23 2009 -0400
+++ b/tool_conf.xml.sample	Fri Sep 04 10:40:16 2009 -0400
@@ -90,6 +90,7 @@
   <section name="Fetch Alignments" id="fetchAlign">
     <tool file="maf/interval2maf_pairwise.xml" />
     <tool file="maf/interval2maf.xml" />
+    <tool file="maf/maf_split_by_species.xml"/>
     <tool file="maf/interval_maf_to_merged_fasta.xml" />
     <tool file="maf/genebed_maf_to_fasta.xml"/>
     <tool file="maf/maf_stats.xml"/>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/annotation_profiler/annotation_profiler_for_interval.py
--- a/tools/annotation_profiler/annotation_profiler_for_interval.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/annotation_profiler/annotation_profiler_for_interval.py	Fri Sep 04 10:40:16 2009 -0400
@@ -22,10 +22,12 @@
     fmt_size = struct.calcsize( fmt )
     def __init__( self, filename ):
         self.file_size = os.stat( filename ).st_size
-        self.file = open( filename, 'rb' )
+        self.file = open( filename, 'rb' )
+        self.filename = filename
         self.length = int( self.file_size / self.fmt_size / 2 )
         self._cached_ranges = [ None for i in xrange( self.length ) ]
     def __getitem__( self, i ):
+        old_i = i
         if self._cached_ranges[i] is not None:
             return self._cached_ranges[i]
         if i < 0: i = self.length + i
@@ -35,7 +37,13 @@
             start = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
             end = struct.unpack( self.fmt, self.file.read( self.fmt_size ) )[0]
         except Exception, e:
-            raise IndexError, e
+            print 'filename', self.filename
+            print 'len', len( self )
+            print 'fmtsize', self.fmt_size
+            print 'index', i
+            print 'old i', old_i
+            print 'offset', offset
+            raise IndexError( str( e ) )
         self._cached_ranges[i] = ( start, end )
         return start, end
     def __len__( self ):
@@ -141,20 +149,24 @@
             self.chromosome_coverage[chrom] = bx.bitset.BitSet( chrom_length )
         
         self.chromosome_coverage[chrom].set_range( region_start, region_length )
-        for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ):
-            if table_name not in self.table_coverage:
-                self.table_coverage[table_name] = 0
-                self.table_chromosome_size[table_name] = {}
-                self.table_regions_overlaped_count[table_name] = 0
-                self.interval_table_overlap_count[table_name] = 0
-                self.table_chromosome_count[table_name] = {}
-            if chrom not in self.table_chromosome_size[table_name]:
-                self.table_chromosome_size[table_name][chrom] = self.coverage_reader._coverage[table_name][chrom]._total_coverage
-                self.table_chromosome_count[table_name][chrom] = len( self.coverage_reader._coverage[table_name][chrom]._coverage )
-            self.table_coverage[table_name] += coverage
-            if coverage:
-                self.interval_table_overlap_count[table_name] += 1
-            self.table_regions_overlaped_count[table_name] += regions
+        try:
+            for table_name, coverage, regions in self.coverage_reader.iter_table_coverage_regions_by_region( chrom, region_start, region_end ):
+                if table_name not in self.table_coverage:
+                    self.table_coverage[table_name] = 0
+                    self.table_chromosome_size[table_name] = {}
+                    self.table_regions_overlaped_count[table_name] = 0
+                    self.interval_table_overlap_count[table_name] = 0
+                    self.table_chromosome_count[table_name] = {}
+                if chrom not in self.table_chromosome_size[table_name]:
+                    self.table_chromosome_size[table_name][chrom] = self.coverage_reader._coverage[table_name][chrom]._total_coverage
+                    self.table_chromosome_count[table_name][chrom] = len( self.coverage_reader._coverage[table_name][chrom]._coverage )
+                self.table_coverage[table_name] += coverage
+                if coverage:
+                    self.interval_table_overlap_count[table_name] += 1
+                self.table_regions_overlaped_count[table_name] += regions
+        except Exception, e:
+            print "chrom:%s, start:%s, end%s:." % ( chrom, start, end )
+            raise e
     def iter_table_coverage( self ):
         def get_nr_coverage():
             #returns non-redundant coverage, where user's input intervals have been collapse to resolve overlaps
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/genebed_maf_to_fasta.xml
--- a/tools/maf/genebed_maf_to_fasta.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/genebed_maf_to_fasta.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,8 +1,8 @@
-<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks">
+<tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks" version="1.0.1">
   <description>given a set of coding exon intervals</description>
   <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source  --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-#end if
+#end if# --overwrite_with_gaps=$overwrite_with_gaps
   </command>
   <inputs>
     <param name="input1" type="data" format="bed" label="Gene BED File">
@@ -29,42 +29,48 @@
       <when value="cached">
         <param name="maf_identifier" type="select" label="MAF Type" >
           <options from_file="maf_index.loc">
-	        <column name="name" index="0"/>
-	        <column name="value" index="1"/>
-	        <column name="dbkey" index="2"/>
-	        <column name="species" index="3"/>
-	        <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-	        <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
+            <column name="name" index="0"/>
+            <column name="value" index="1"/>
+            <column name="dbkey" index="2"/>
+            <column name="species" index="3"/>
+            <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
+            <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
           </options>
         </param> 
         <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
           <options from_file="maf_index.loc">
-	        <column name="uid" index="1"/>
-	        <column name="value" index="3"/>
-	        <column name="name" index="3"/>
-	        <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
-	        <filter type="multiple_splitter" column="3" separator=","/>
+            <column name="uid" index="1"/>
+            <column name="value" index="3"/>
+            <column name="name" index="3"/>
+            <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
+            <filter type="multiple_splitter" column="3" separator=","/>
           </options>
         </param>
       </when>
     </conditional>
-   </inputs>
+    <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occuring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
+      <option value="True" selected="true">No</option>
+      <option value="False">Yes</option>
+    </param>
+  </inputs>
   <outputs>
     <data format="fasta" name="out_file1" />
   </outputs>
   <tests>
     <test>
       <param name="input1" value="8.bed"/>
-      <param name="maf_source" value="cached"/>
+      <param name="maf_source" value="cached"/>in aligning species
       <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="overwrite_with_gaps" value="True"/>
       <output name="out_file1" file="gene_bed_maf_to_fasta_out.fasta" />
     </test>
     <test>
       <param name="input1" value="8.bed"/>
       <param name="maf_source" value="user"/>
       <param name="maf_file" value="4.maf"/>
-      <param name="species" value="hg17,panTro1"/>
+      <param name="species" value="hg17,panTro1"/>
+      <param name="overwrite_with_gaps" value="True"/>
       <output name="out_file1" file="gene_bed_maf_to_fasta_user_out.fasta" />
     </test>
   </tests> 
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/interval2maf.py
--- a/tools/maf/interval2maf.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/interval2maf.py	Fri Sep 04 10:40:16 2009 -0400
@@ -20,6 +20,8 @@
    -i, --interval_file=i:       Input interval file
    -o, --output_file=o:      Output MAF file
    -p, --species=p: Species to include in output
+   -P, --split_blocks_by_species=P: Split blocks by species
+   -r, --remove_all_gap_columns=r: Remove all Gap columns
    -l, --indexLocation=l: Override default maf_index.loc file
    -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc )
 """
@@ -45,25 +47,21 @@
     if options.dbkey: dbkey = options.dbkey
     else: dbkey = None
     if dbkey in [None, "?"]:
-        print >>sys.stderr, "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file."
-        sys.exit()
+        maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
     
     species = maf_utilities.parse_species_option( options.species )
     
     if options.chromCol: chromCol = int( options.chromCol ) - 1
     else: 
-        print >>sys.stderr, "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes."
-        sys.exit()
+        maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
     
     if options.startCol: startCol = int( options.startCol ) - 1
     else: 
-        print >>sys.stderr, "Start column not set, click the pencil icon in the history item to set the metadata attributes."
-        sys.exit()
+        maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
     
     if options.endCol: endCol = int( options.endCol ) - 1
     else: 
-        print >>sys.stderr, "End column not set, click the pencil icon in the history item to set the metadata attributes."
-        sys.exit()
+        maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
     
     if options.strandCol: strandCol = int( options.strandCol ) - 1
     else: 
@@ -71,13 +69,17 @@
     
     if options.interval_file: interval_file = options.interval_file
     else: 
-        print >>sys.stderr, "Input interval file has not been specified."
-        sys.exit()
+        maf_utilities.tool_fail( "Input interval file has not been specified." )
     
     if options.output_file: output_file = options.output_file
     else: 
-        print >>sys.stderr, "Output file has not been specified."
-        sys.exit()
+        maf_utilities.tool_fail( "Output file has not been specified." )
+    
+    split_blocks_by_species = remove_all_gap_columns = False
+    if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
+        split_blocks_by_species = True
+        if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
+            remove_all_gap_columns = True
     #Finish parsing command line
     
     #Open indexed access to MAFs
@@ -87,16 +89,13 @@
         else:
             index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
         if index is None:
-            print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType )
-            sys.exit()
+            maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) )
     elif options.mafFile:
         index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
         if index is None:
-            print >> sys.stderr, "Your MAF file appears to be malformed."
-            sys.exit()
+            maf_utilities.tool_fail( "Your MAF file appears to be malformed." )
     else:
-        print >>sys.stderr, "Desired source MAF type has not been specified."
-        sys.exit()
+        maf_utilities.tool_fail( "Desired source MAF type has not been specified." )
     
     #Create MAF writter
     out = bx.align.maf.Writer( open(output_file, "w") )
@@ -105,10 +104,20 @@
     num_blocks = 0
     num_regions = None
     for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
-        src = "%s.%s" % ( dbkey, region.chrom )
-        for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, species, mincols ):
-            out.write( block )
-            num_blocks += 1
+        src = maf_utilities.src_merge( dbkey, region.chrom )
+        for block in index.get_as_iterator( src, region.start, region.end ):
+            if split_blocks_by_species:
+                blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ]
+            else:
+                blocks = [ block ]
+            for block in blocks:
+                block = maf_utilities.chop_block_by_region( block, src, region )
+                if block is not None:
+                    block = maf_utilities.orient_block_by_region( block, src, region )
+                    if remove_all_gap_columns:
+                        block.remove_all_gap_columns()
+                    out.write( block )
+                    num_blocks += 1
     
     #Close output MAF
     out.close()
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/interval2maf.xml
--- a/tools/maf/interval2maf.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/interval2maf.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -3,6 +3,9 @@
   <command interpreter="python">
     #if $maf_source_type.maf_source == "user":#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --mafIndex=$maf_source_type.mafFile.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc --species=$maf_source_type.species
     #else:#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$maf_source_type.mafType --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc --species=$maf_source_type.species
+    #end if
+    --split_blocks_by_species=$split_blocks_by_species_selector.split_blocks_by_species
+    #if $split_blocks_by_species_selector.split_blocks_by_species == "split_blocks_by_species":# --remove_all_gap_columns=$split_blocks_by_species_selector.remove_all_gap_columns
     #end if
   </command>
   <inputs>
@@ -48,7 +51,22 @@
           </options>
         </param>
       </when>
-    </conditional>
+    </conditional>
+    <conditional name="split_blocks_by_species_selector">
+      <param name="split_blocks_by_species" type="select" label="Split blocks by species" help="See the Split MAF blocks by Species tool for more information.">
+        <option value="split_blocks_by_species">Split by species</option>
+        <option value="dont_split_blocks_by_species" selected="true">Do not split</option>
+      </param>
+      <when value="dont_split_blocks_by_species">
+        <!-- do nothing here -->
+      </when>
+      <when value="split_blocks_by_species">
+        <param name="remove_all_gap_columns" type="select" label="Collapse empty alignment columns">
+          <option value="remove_all_gap_columns" selected="true">Collapse empty columns</option>
+          <option value="do_not_remove_all_gap_columns">Do not collapse</option>
+        </param>
+      </when>
+    </conditional>
    </inputs>
    <outputs>
      <data format="maf" name="out_file1"/>
@@ -59,6 +77,7 @@
        <param name="maf_source" value="cached"/>
        <param name="mafType" value="ENCODE_TBA_hg17"/>
        <param name="species" value="hg17,panTro1,baboon,marmoset,galago,rn3,mm6,rabbit,cow,canFam1,rfbat,shrew,armadillo,tenrec,monDom1,tetNig1,fr1,rheMac1,galGal2,xenTro1,danRer2,elephant,platypus,hedgehog,colobus_monkey,dusky_titi,owl_monkey,mouse_lemur"/>
+       <param name="split_blocks_by_species" value="dont_split_blocks_by_species"/>
        <output name="out_file1" file="fsa_interval2maf.dat" />
      </test>
      <test>
@@ -66,6 +85,7 @@
        <param name="maf_source" value="user"/>
        <param name="mafFile" value="fsa_interval2maf.dat"/>
        <param name="species" value="hg17,panTro1,baboon,marmoset,galago,rn3,mm6,rabbit,cow,canFam1,rfbat,shrew,armadillo,tenrec,monDom1,tetNig1,fr1,rheMac1,galGal2,xenTro1,danRer2,elephant,platypus,hedgehog,colobus_monkey,dusky_titi,owl_monkey,mouse_lemur"/>
+       <param name="split_blocks_by_species" value="dont_split_blocks_by_species"/>
        <output name="out_file1" file="fsa_interval2maf.dat" />
      </test>
    </tests>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/interval2maf_pairwise.xml
--- a/tools/maf/interval2maf_pairwise.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/interval2maf_pairwise.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="Interval2Maf_pairwise1" name="Extract Pairwise MAF blocks">
+<tool id="Interval2Maf_pairwise1" name="Extract Pairwise MAF blocks" version="1.0.1">
   <description>given a set of genomic intervals</description>
   <command interpreter="python">interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$mafType --interval_file=$input1 --output_file=$out_file1 --indexLocation=${GALAXY_DATA_INDEX_DIR}/maf_pairwise.loc</command>
   <inputs>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/interval_maf_to_merged_fasta.py
--- a/tools/maf/interval_maf_to_merged_fasta.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/interval_maf_to_merged_fasta.py	Fri Sep 04 10:40:16 2009 -0400
@@ -19,6 +19,7 @@
    -i, --interval_file=i:       Input interval file
    -o, --output_file=o:      Output MAF file
    -p, --species=p: Species to include in output
+   -O, --overwrite_with_gaps=O: Overwrite bases found in a lower-scoring block with gaps interior to the sequence for a species.
    -z, --mafIndexFileDir=z: Directory of local maf_index.loc file
 
 usage: %prog dbkey_of_BED comma_separated_list_of_additional_dbkeys_to_extract comma_separated_list_of_indexed_maf_files input_gene_bed_file output_fasta_file cached|user GALAXY_DATA_INDEX_DIR
@@ -93,6 +94,11 @@
             strand_col = int( options.strandCol ) - 1
     
     mafIndexFile = "%s/maf_index.loc" % options.mafIndexFileDir
+    
+    overwrite_with_gaps = True
+    if options.overwrite_with_gaps and options.overwrite_with_gaps.lower() == 'false':
+        overwrite_with_gaps = False
+    
     #Finish parsing command line
         
     #get index for mafs based on type 
@@ -127,7 +133,7 @@
                 try:
                     starts, ends, fields = maf_utilities.get_starts_ends_fields_from_gene_bed( line )
                     #create spliced alignment object
-                    alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand = '+', species = species, mincols = mincols )
+                    alignment = maf_utilities.get_spliced_region_alignment( index, primary_species, fields[0], starts, ends, strand = '+', species = species, mincols = mincols, overwrite_with_gaps = overwrite_with_gaps )
                     primary_name = secondary_name = fields[3]
                     alignment_strand = fields[5]
                 except Exception, e:
@@ -136,7 +142,7 @@
             else: #Process as standard intervals
                 try:
                     #create spliced alignment object
-                    alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand = '+', species = species, mincols = mincols )
+                    alignment = maf_utilities.get_region_alignment( index, primary_species, line.chrom, line.start, line.end, strand = '+', species = species, mincols = mincols, overwrite_with_gaps = overwrite_with_gaps )
                     primary_name = "%s(%s):%s-%s" % ( line.chrom, line.strand, line.start, line.end )
                     secondary_name = ""
                     alignment_strand = line.strand
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/interval_maf_to_merged_fasta.xml
--- a/tools/maf/interval_maf_to_merged_fasta.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/interval_maf_to_merged_fasta.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,8 +1,8 @@
-<tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks">
+<tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks" version="1.0.1">
   <description>given a set of genomic intervals</description>
   <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
-#end if
+#end if# --overwrite_with_gaps=$overwrite_with_gaps
   </command>
   <inputs>
     <page>
@@ -30,25 +30,29 @@
             <when value="cached">
               <param name="maf_identifier" type="select" label="MAF Type" >
                 <options from_file="maf_index.loc">
-		          <column name="name" index="0"/>
-		          <column name="value" index="1"/>
-		          <column name="dbkey" index="2"/>
-		          <column name="species" index="3"/>
-		          <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
-		          <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
+                  <column name="name" index="0"/>
+                  <column name="value" index="1"/>
+                  <column name="dbkey" index="2"/>
+                  <column name="species" index="3"/>
+                  <filter type="data_meta" ref="input1" key="dbkey" column="2" multiple="True" separator=","/>
+                  <validator type="no_options" message="No alignments are available for the build associated with the selected interval file"/>
                 </options>
               </param> 
               <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
                 <options from_file="maf_index.loc">
-		          <column name="uid" index="1"/>
-		          <column name="value" index="3"/>
-		          <column name="name" index="3"/>
-		          <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
-		          <filter type="multiple_splitter" column="3" separator=","/>
+                  <column name="uid" index="1"/>
+                  <column name="value" index="3"/>
+                  <column name="name" index="3"/>
+                  <filter type="param_value" ref="maf_identifier" name="uid" column="1"/>
+                  <filter type="multiple_splitter" column="3" separator=","/>
                 </options>
               </param>
             </when>
         </conditional>
+        <param name="overwrite_with_gaps" type="select" label="Split into Gapless MAF blocks" help="When set to Yes, blocks are divided around gaps appearing in any species. This will prevent gaps occuring in the interior of the sequence for an aligning species from overwriting a nucleotide found for the same position in a lower-scoring block.">
+          <option value="True" selected="true">No</option>
+          <option value="False">Yes</option>
+        </param>
     </page>
    </inputs>
   <outputs>
@@ -59,21 +63,24 @@
       <param name="input1" value="13.bed" dbkey="hg18" ftype="bed"/>
       <param name="maf_source" value="cached"/>
       <param name="maf_identifier" value="17_WAY_MULTIZ_hg18"/>
-      <param name="species" value="hg18,mm8"/>
+      <param name="species" value="hg18,mm8"/>
+      <param name="overwrite_with_gaps" value="True"/>
       <output name="out_file1" file="interval_maf_to_merged_fasta_out3.fasta" />
     </test>
     <test>
       <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
       <param name="maf_source" value="cached"/>
       <param name="maf_identifier" value="8_WAY_MULTIZ_hg17"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="overwrite_with_gaps" value="True"/>
       <output name="out_file1" file="interval_maf_to_merged_fasta_out.dat" />
     </test>
     <test>
       <param name="input1" value="1.bed" dbkey="hg17" ftype="bed"/>
       <param name="maf_source" value="user"/>
       <param name="maf_file" value="5.maf"/>
-      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="species" value="canFam1,hg17,mm5,panTro1,rn3"/>
+      <param name="overwrite_with_gaps" value="True"/>
       <output name="out_file1" file="interval_maf_to_merged_fasta_user_out.dat" />
     </test>
   </tests>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_filter.py
--- a/tools/maf/maf_filter.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_filter.py	Fri Sep 04 10:40:16 2009 -0400
@@ -46,7 +46,7 @@
     i = 0
     blocks_kept = 0
     for i, maf_block in enumerate( maf_reader ):
-        if min_size <= maf_block.components[0].size <= max_size:
+        if min_size <= maf_block.text_size <= max_size:
             local = {'maf_block':maf_block, 'ret_val':False}
             execfile( script_file, {}, local )
             if local['ret_val']:
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_limit_size.py
--- a/tools/maf/maf_limit_size.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_limit_size.py	Fri Sep 04 10:40:16 2009 -0400
@@ -28,7 +28,7 @@
     blocks_kept = 0
     i = 0
     for i, m in enumerate( maf_reader ):
-        if min_size <= m.components[0].size <= max_size:
+        if min_size <= m.text_size <= max_size:
             maf_writer.write( m )
             blocks_kept += 1
     print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_limit_size.xml
--- a/tools/maf/maf_limit_size.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_limit_size.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="maf_limit_size1" name="Filter MAF blocks">
+<tool id="maf_limit_size1" name="Filter MAF blocks" version="1.0.1">
   <description>by Size</description>
   <command interpreter="python">maf_limit_size.py $input1 $out_file1 $min_size $max_size</command>
   <inputs>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_limit_to_species.py
--- a/tools/maf/maf_limit_to_species.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_limit_to_species.py	Fri Sep 04 10:40:16 2009 -0400
@@ -11,13 +11,18 @@
 from galaxy import eggs
 import pkg_resources; pkg_resources.require( "bx-python" )
 import bx.align.maf
+from galaxy.tools.util import maf_utilities
 import sys
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
 def main():
 
-    species = sys.argv[1].split( ',' )
+    species = maf_utilities.parse_species_option( sys.argv[1] )
+    if species:
+        spec_len = len( species )
+    else:
+        spec_len = 0
     try:
         maf_reader = bx.align.maf.Reader( open( sys.argv[2],'r' ) )
         maf_writer = bx.align.maf.Writer( open( sys.argv[3],'w' ) )
@@ -30,10 +35,11 @@
     
     maf_blocks_kept = 0
     for m in maf_reader:
-        if species != ['None']:
+        if species:
             m = m.limit_to_species( species )
         m.remove_all_gap_columns()
-        if ( species == ['None'] or allow_partial or len( m.components ) == len( species ) ) and len( m.components ) > min_species_per_block:
+        spec_in_block_len = len( maf_utilities.get_species_in_block( m ) )
+        if ( not species or allow_partial or spec_in_block_len == spec_len ) and spec_in_block_len > min_species_per_block:
             maf_writer.write( m )
             maf_blocks_kept += 1
     
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_split_by_species.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/maf/maf_split_by_species.py	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+"""
+Read a maf and split blocks by unique species combinations 
+"""
+import sys
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.align import maf
+from galaxy.tools.util import maf_utilities
+from galaxy.util import string_as_bool
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def __main__():    
+    try:
+        maf_reader = maf.Reader( open( sys.argv[1] ) )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error opening MAF: %s" % e )
+    try:
+        out = maf.Writer( open( sys.argv[2], "w") )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
+    try:
+        collapse_columns = string_as_bool( sys.argv[3] )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e )
+    
+    start_count = 0
+    end_count = 0
+    for start_count, start_block in enumerate( maf_reader ):
+        for block in maf_utilities.iter_blocks_split_by_species( start_block ):
+            if collapse_columns:
+                block.remove_all_gap_columns()
+            out.write( block )
+            end_count += 1
+    out.close()
+    
+    if end_count:
+        print "%i alignment blocks created from %i original blocks." % ( end_count, start_count + 1 )
+    else:
+        print "No alignment blocks were created."
+
+if __name__ == "__main__": __main__()
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_split_by_species.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/maf/maf_split_by_species.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -0,0 +1,216 @@
+<tool id="MAF_split_blocks_by_species1" name="Split MAF blocks" version="1.0.0">
+  <description>by Species</description>
+  <command interpreter="python">maf_split_by_species.py $input1 $out_file1 $collapse_columns</command>
+  <inputs>
+    <param format="maf" name="input1" type="data" label="MAF file to split"/>
+    <param name="collapse_columns" type="select" label="Collapse empty alignment columns" help="Removes columns that are gaps in all sequences">
+      <option value="True" selected="true">Yes</option>
+      <option value="False">No</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="maf" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="maf_split_by_species_in.maf"/>
+      <param name="collapse_columns" value="True"/>
+      <output name="out_file1" file="maf_split_by_species_collapsed_out.maf"/>
+    </test>
+    <test>
+      <param name="input1" value="maf_split_by_species_in.maf"/>
+      <param name="collapse_columns" value="False"/>
+      <output name="out_file1" file="maf_split_by_species_not_collapsed_out.maf"/>
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+This tool examines each MAF block for multiple occurrences of a species in a single block. When this occurs, a block is split into multiple blocks where every combination of one sequence per species per block is represented.
+
+The interface for this tool has two inputs: 
+
+ * **MAF file to split**. Choose multiple alignments from history to be split by species.
+ * **Collapse empty alignment columns**. Should alignment columns containing only gaps in the new blocks be removed.
+
+-----
+
+**Example 1**: **Collapse empty alignment columns is Yes**:
+
+For the following alignment::
+
+  ##maf version=1
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+the tool will create **a single** history item containing 12 alignment blocks (notice that no columns contain only gaps)::
+
+  ##maf version=1
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT-GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC--GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+ 
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC-GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC-GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGCAG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC---AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC---AG 
+
+-----
+
+**Example 1**: **Collapse empty alignment columns is Yes**:
+
+For the following alignment::
+
+  ##maf version=1
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+the tool will create **a single** history item containing 12 alignment blocks (notice that some columns contain only gaps)::
+
+  ##maf version=1
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 85 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723125 83 - 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCT--GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTCGTCCTCAG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 85 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984545 83 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTT--GTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 + 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTT------AG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+  
+  a score=2047408.0
+  s species1.chr1 147984645 79 - 245522847 ATGGCGTCGGCCTCCTCCGGGCCGTCGTC---GGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTTGTC---AG 
+  s species2.chr1 129723925 79 + 229575298 ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTTTGATCCCGCGGTCCCTTCCTGTACCTC------AG 
+  s species3.chr3  68255714 76 - 258222147 ATGGCGTCCGCCTCCTCAGGGCCAGCGGC---GGCGGGGTTTTCACCCCTTGATTCCGGGGTCCCTGCCGGTACCGC------AG 
+
+-------
+
+.. class:: infomark
+
+**About formats**
+
+**MAF format** multiple alignment format file. This format stores multiple alignments at the DNA level between entire genomes. 
+
+ - The .maf format is line-oriented. Each multiple alignment ends with a blank line.
+ - Each sequence in an alignment is on a single line.
+ - Lines starting with # are considered to be comments.
+ - Each multiple alignment is in a separate paragraph that begins with an "a" line and contains an "s" line for each sequence in the multiple alignment.
+ - Some MAF files may contain two optional line types: 
+
+   - An "i" line containing information about what is in the aligned species DNA before and after the immediately preceding "s" line; 
+   - An "e" line containing information about the size of the gap between the alignments that span the current block.
+
+    </help>
+</tool>
+
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_stats.py
--- a/tools/maf/maf_stats.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_stats.py	Fri Sep 04 10:40:16 2009 -0400
@@ -64,16 +64,19 @@
         total_length += region_length
         coverage = { dbkey: BitSet( region_length ) }
         
-        for block in maf_utilities.get_chopped_blocks_for_region( index, src, region, force_strand='+' ):
-            #make sure all species are known
-            for c in block.components:
-                spec = c.src.split( '.' )[0]
+        
+        for block in index.get_as_iterator( src, region.start, region.end ):
+            for spec in maf_utilities.get_species_in_block( block ):
                 if spec not in coverage: coverage[spec] = BitSet( region_length )
-            start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start )
-            for i in range( len( alignment[dbkey] ) ):
-                for spec, text in alignment.items():
-                    if text[i] != '-':
-                        coverage[spec].set( start_offset + i )
+            for block in maf_utilities.iter_blocks_split_by_species( block ):
+                if maf_utilities.component_overlaps_region( block.get_component_by_src( src ), region ):
+                    #need to chop and orient the block
+                    block = maf_utilities.orient_block_by_region( maf_utilities.chop_block_by_region( block, src, region ), src, region, force_strand = '+' )
+                    start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start )
+                    for i in range( len( alignment[dbkey] ) ):
+                        for spec, text in alignment.items():
+                            if text[i] != '-':
+                                coverage[spec].set( start_offset + i )
         if summary:
             #record summary
             for key in coverage.keys():
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_stats.xml
--- a/tools/maf/maf_stats.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_stats.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="maf_stats1" name="MAF Coverage Stats">
+<tool id="maf_stats1" name="MAF Coverage Stats" version="1.0.1">
   <description>Alignment coverage information</description>
   <command interpreter="python">
     maf_stats.py
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_to_fasta.xml
--- a/tools/maf/maf_to_fasta.xml	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_to_fasta.xml	Fri Sep 04 10:40:16 2009 -0400
@@ -1,4 +1,4 @@
-<tool id="MAF_To_Fasta1" name="Maf to FASTA">
+<tool id="MAF_To_Fasta1" name="MAF to FASTA" version="1.0.1">
   <description>Converts a MAF formated file to FASTA format</description>
   <command interpreter="python">
     #if $fasta_target_type.fasta_type == "multiple":#maf_to_fasta_multiple_sets.py $input1 $out_file1 $fasta_target_type.species $fasta_target_type.complete_blocks
@@ -47,7 +47,7 @@
       <param name="species" value="hg17,panTro1,rheMac2,rn3,mm7,canFam2,bosTau2,dasNov1"/>
       <param name="complete_blocks" value="partial_allowed"/>
       <param name="fasta_type" value="multiple"/>
-      <output name="out_file1" file="cf_maf2fasta.dat" ftype="fasta"/>
+      <output name="out_file1" file="cf_maf2fasta_new.dat" ftype="fasta"/>
     </test>
   </tests>
   <help>
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_to_fasta_concat.py
--- a/tools/maf/maf_to_fasta_concat.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_to_fasta_concat.py	Fri Sep 04 10:40:16 2009 -0400
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-Read a maf and print the text as a fasta file, concatenating blocks
+Read a maf and output a single block fasta file, concatenating blocks
 
 usage %prog species1,species2 maf_file out_file
 """
@@ -15,28 +15,43 @@
 assert sys.version_info[:2] >= ( 2, 4 )
 
 def __main__():
-    print "Restricted to species:", sys.argv[1]
+    try:
+        species = maf_utilities.parse_species_option( sys.argv[1] )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error determining species value: %s" % e )
+    try:
+        input_filename = sys.argv[2]
+    except Exception, e:
+        maf_utilities.tool_fail( "Error reading MAF filename: %s" % e )
+    try:
+        file_out = open( sys.argv[3], 'w' )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
     
-    texts = {}
+    if species:
+        print "Restricted to species: %s" % ', '.join( species )
+    else:
+        print "Not restricted to species."
     
-    input_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    species = sys.argv[1].split( ',' )
+    if not species:
+        try:
+            species = maf_utilities.get_species_in_maf( input_filename )
+        except Exception, e:
+            maf_utilities.tool_fail( "Error determining species in input MAF: %s" % e )
     
-    if "None" in species:
-        species = maf_utilities.get_species_in_maf( input_filename )
-    
-    file_out = open( output_filename, 'w' )
     for spec in species:
         file_out.write( ">" + spec + "\n" )
         try:
-            for block in maf.Reader( open( input_filename, 'r' ) ):
-                component = block.get_component_by_src_start( spec )
-                if component: file_out.write( component.text )
-                else: file_out.write( "-" * block.text_size )
-        except:
-            print >>sys.stderr, "Your MAF file appears to be malformed."
-            sys.exit()
+            for start_block in maf.Reader( open( input_filename, 'r' ) ):
+                for block in maf_utilities.iter_blocks_split_by_species( start_block ):
+                    block.remove_all_gap_columns() #remove extra gaps
+                    component = block.get_component_by_src_start( spec ) #blocks only have one occurrence of a particular species, so this is safe
+                    if component:
+                        file_out.write( component.text )
+                    else:
+                        file_out.write( "-" * block.text_size )
+        except Exception, e:
+            maf_utilities.tool_fail( "Your MAF file appears to be malformed: %s" % e )
         file_out.write( "\n" )
     file_out.close()
 
diff -r 99dcba7af5b6 -r e929a2d803e4 tools/maf/maf_to_fasta_multiple_sets.py
--- a/tools/maf/maf_to_fasta_multiple_sets.py	Fri Sep 04 10:31:23 2009 -0400
+++ b/tools/maf/maf_to_fasta_multiple_sets.py	Fri Sep 04 10:40:16 2009 -0400
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 """
-Read a maf and print the text as a fasta file.
+Read a maf and output a multiple block fasta file.
 """
 #Dan Blankenberg
 import sys
@@ -13,35 +13,46 @@
 assert sys.version_info[:2] >= ( 2, 4 )
 
 def __main__():
-    print "Restricted to species:", sys.argv[3]
+    try:
+        maf_reader = maf.Reader( open( sys.argv[1] ) )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error opening input MAF: %s" % e )
+    try:
+        file_out = open( sys.argv[2], 'w' )
+    except Exception, e:
+        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
+    try:
+        species = maf_utilities.parse_species_option( sys.argv[3] )
+        if species:
+            num_species = len( species )
+        else:
+            num_species = 0
+    except Exception, e:
+        maf_utilities.tool_fail( "Error determining species value: %s" % e )
+    try:
+        partial = sys.argv[4]
+    except Exception, e:
+        maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
     
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    species = sys.argv[3].split( ',' )
-    partial = sys.argv[4]
-    num_species = len( species )
+    if species:
+        print "Restricted to species: %s" % ', '.join( species )
+    else:
+        print "Not restricted to species."
     
-    file_in = open( input_filename, 'r' )
-    try:
-        maf_reader = maf.Reader( file_in )
-        
-        file_out = open( output_filename, 'w' )
-        
-        for block_num, block in enumerate( maf_reader ):
-            if "None" not in species:
-                block = block.limit_to_species( species )
-            if len( block.components ) < num_species and partial == "partial_disallowed": continue
-            for component in block.components:
-                spec, chrom = maf.src_split( component.src )
-                if not spec or not chrom:
-                        spec = chrom = component.src
-                file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, suffix = "%s_%i" % ( spec, block_num ) ) )
-                file_out.write( "%s\n" % component.text )
-            file_out.write( "\n" )
-        file_in.close()
-    except Exception, e:
-        print >>sys.stderr, "Your MAF file appears to be malformed:", e
-        sys.exit()
+    for block_num, block in enumerate( maf_reader ):
+        if species:
+            block = block.limit_to_species( species )
+            if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue
+        spec_counts = {}
+        for component in block.components:
+            spec, chrom = maf_utilities.src_split( component.src )
+            if spec not in spec_counts:
+                spec_counts[ spec ] = 0
+            else:
+                spec_counts[ spec ] += 1
+            file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
+            file_out.write( "%s\n" % component.text )
+        file_out.write( "\n" )
     file_out.close()
 
 if __name__ == "__main__": __main__()

    

Greg Von Kuster

tags

participants (1)