commit/galaxy-central: 8 new changesets

18 Jun 2013

8 new commits in galaxy-central:

https://bitbucket.org/galaxy/galaxy-central/commits/d969c2604171/
Changeset:   d969c2604171
Branch:      search
User:        kellrott
Date:        2013-06-07 18:56:47
Summary:     Central Merge
Affected #:  559 files

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 .hgignore

--- a/.hgignore
+++ b/.hgignore
@@ -20,6 +20,7 @@
 database/pbs
 database/tmp
 database/*.sqlite
+database/openid_consumer_cache
 
 # Python bytecode
 *.pyc
@@ -35,6 +36,11 @@
 tool_shed_webapp.pid
 hgweb.config*
 
+# Reports Runtime Files
+reports_webapp.lock
+reports_webapp.log
+reports_webapp.pid
+
 # Config files
 universe_wsgi.ini
 reports_wsgi.ini
@@ -54,7 +60,7 @@
 job_conf.xml
 data_manager_conf.xml
 shed_data_manager_conf.xml
-
+visualizations_conf.xml
 static/welcome.html.*
 static/welcome.html
 
@@ -75,6 +81,7 @@
 
 # Test output
 run_functional_tests.html
+test/tool_shed/tmp/*
 
 # Project files
 *.kpf

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 .hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -1,3 +1,5 @@
 a4113cc1cb5eaa68091c9a73375f00555b66dd11 release_2013.01.13
 1c717491139269651bb59687563da9410b84c65d release_2013.02.08
 75f09617abaadbc8cc732bb8ee519decaeb56ea7 release_2013.04.01
+2cc8d10988e03257dc7b97f8bb332c7df745d1dd security_2013.04.08
+524f246ca85395082719ae7a6ff72260d7ad5612 release_2013.06.03

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 buildbot_setup.sh
--- a/buildbot_setup.sh
+++ b/buildbot_setup.sh
@@ -93,26 +93,49 @@
 
 JARS="/galaxy/software/jars"
 
-for link in $LINKS; do
-    echo "Linking $link"
-    rm -f tool-data/`basename $link`
-    ln -sf $link tool-data
-done
-
-if [ -d "$HYPHY" ]; then
-    echo "Linking $HYPHY"
-    rm -f tool-data/HYPHY
-    ln -sf $HYPHY tool-data/HYPHY
+if [ ! $1 ]; then
+	type="standard"
+elif [ $1 == "-ec2" ]; then
+	type="external-ec2"
+else
+	type="unknown"
 fi
 
-if [ -d "$JARS" ]; then
-    echo "Linking $JARS"
-    rm -f tool-data/shared/jars
-    ln -sf $JARS tool-data/shared/jars
-fi
+case $type in
+	external*)
+		echo "Running standalone buildbot setup..."
+		for sample in tool-data/*.sample; do
+			basename=${sample%.sample}
+			if [ ! -f $basename ]; then
+				echo "Copying $sample to $basename"
+				cp "$sample" "$basename"
+			fi
+		done
+		;;
+	*)
+		echo "Running standard buildbot setup..."
+		for link in $LINKS; do
+		    echo "Linking $link"
+		    rm -f tool-data/`basename $link`
+		    ln -sf $link tool-data
+		done
+		
+		if [ -d "$HYPHY" ]; then
+		    echo "Linking $HYPHY"
+		    rm -f tool-data/HYPHY
+		    ln -sf $HYPHY tool-data/HYPHY
+		fi
+		
+		if [ -d "$JARS" ]; then
+		    echo "Linking $JARS"
+		    rm -f tool-data/shared/jars
+		    ln -sf $JARS tool-data/shared/jars
+		fi
+		;;
+esac
 
 for sample in $SAMPLES; do
-    file=`echo $sample | sed -e 's/\.sample$//'`
+    file=${sample%.sample}
     echo "Copying $sample to $file"
     cp $sample $file
 done

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 datatypes_conf.xml.sample
--- a/datatypes_conf.xml.sample
+++ b/datatypes_conf.xml.sample
@@ -9,12 +9,7 @@
     <datatype extension="fli" type="galaxy.datatypes.tabular:FeatureLocationIndex" display_in_upload="false"/><datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream" display_in_upload="true"><converter file="bam_to_bai.xml" target_datatype="bai"/>
-      <converter file="bam_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
-      <!--
-        Caution: (a) this converter requires bedtools to be installed and (b) it is very memory intensive and
-        is not recommended for most laptops/desktops.
-        <converter file="bam_to_bigwig_converter.xml" target_datatype="bigwig"/>
-      -->
+      <converter file="bam_to_bigwig_converter.xml" target_datatype="bigwig"/><display file="ucsc/bam.xml" /><display file="ensembl/ensembl_bam.xml" /><display file="igv/bam.xml" />
@@ -22,10 +17,9 @@
     </datatype><datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true"><converter file="bed_to_gff_converter.xml" target_datatype="gff"/>
-      <converter file="interval_to_coverage.xml" target_datatype="coverage"/><converter file="bed_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="bed_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
-      <converter file="bed_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><converter file="bed_to_fli_converter.xml" target_datatype="fli"/><!-- <display file="ucsc/interval_as_bed.xml" /> --><display file="igb/bed.xml" />
@@ -51,7 +45,7 @@
     <datatype extension="chrint" type="galaxy.datatypes.interval:ChromatinInteractions" display_in_upload="True"><converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
-      <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><!-- MSI added Datatypes --><datatype extension="csv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="true" /><!-- FIXME: csv is 'tabular'ized data, but not 'tab-delimited'; the class used here is intended for 'tab-delimited' -->
@@ -93,7 +87,7 @@
     <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"><converter file="gff_to_bed_converter.xml" target_datatype="bed"/><converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/>
-      <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><converter file="gff_to_fli_converter.xml" target_datatype="fli"/><display file="ensembl/ensembl_gff.xml" inherit="True"/><!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> -->
@@ -103,7 +97,7 @@
     <datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/><datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"><converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/>
-        <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+        <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="toolshed.gz" type="galaxy.datatypes.binary:Binary" mimetype="multipart/x-gzip" subclass="True" /><datatype extension="h5" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" subclass="True" />
@@ -115,7 +109,7 @@
       <converter file="interval_to_bed12_converter.xml" target_datatype="bed12"/><converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
-	  <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+	    <converter file="interval_to_bigwig_converter.xml" target_datatype="bigwig"/><!-- <display file="ucsc/interval_as_bed.xml" inherit="True" /> --><display file="ensembl/ensembl_interval_as_bed.xml" inherit="True"/><display file="gbrowse/gbrowse_interval_as_bed.xml" inherit="True"/>
@@ -156,10 +150,13 @@
 	<datatype extension="encodepeak" type="galaxy.datatypes.interval:ENCODEPeak" display_in_upload="True"><converter file="encodepeak_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/><converter file="encodepeak_to_bgzip_converter.xml" target_datatype="bgzip"/>
-	  <converter file="encodepeak_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+	  <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/>
-    <datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" />
+    <datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true">
+      <converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/>
+      <converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
+    </datatype><datatype extension="png" type="galaxy.datatypes.images:Png" mimetype="image/png"/><datatype extension="qual" type="galaxy.datatypes.qualityscore:QualityScore" /><datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/>
@@ -169,7 +166,7 @@
     <datatype extension="Roadmaps" type="galaxy.datatypes.assembly:Roadmaps" display_in_upload="false"/><datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"><converter file="sam_to_bam.xml" target_datatype="bam"/>
-    	<converter file="sam_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+    	<converter file="sam_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="Sequences" type="galaxy.datatypes.assembly:Sequences" display_in_upload="false"/>
@@ -187,7 +184,7 @@
       <converter file="vcf_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="vcf_to_vcf_bgzip_converter.xml" target_datatype="vcf_bgzip"/><converter file="vcf_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/>
-      <converter file="vcf_to_summary_tree_converter.xml" target_datatype="summary_tree"/>
+      <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><display file="ucsc/vcf.xml" /><display file="igv/vcf.xml" /><display file="rviewer/vcf.xml" inherit="True"/>
@@ -200,7 +197,6 @@
       <!-- <display file="gbrowse/gbrowse_wig.xml" /> --><display file="igb/wig.xml" /></datatype>
-    <datatype extension="summary_tree" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="interval_index" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="tabix" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="bgzip" type="galaxy.datatypes.binary:Binary" subclass="True" />

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 eggs.ini
--- a/eggs.ini
+++ b/eggs.ini
@@ -14,7 +14,6 @@
 [eggs:platform]
 bx_python = 0.7.1
 Cheetah = 2.2.2
-ctypes = 1.0.2
 DRMAA_python = 0.2
 MarkupSafe = 0.12
 mercurial = 2.2.3
@@ -29,6 +28,7 @@
 simplejson = 2.1.1
 threadframe = 0.2
 guppy = 0.1.8
+SQLAlchemy = 0.7.9
 ; msgpack_python = 0.2.4
 
 [eggs:noplatform]
@@ -46,17 +46,17 @@
 nose = 0.11.1
 NoseHTML = 0.4.1
 NoseTestDiff = 0.1
+Parsley = 1.1
 Paste = 1.7.5.1
 PasteDeploy = 1.5.0
 pexpect = 2.4
 python_openid = 2.2.5
 python_daemon = 1.5.5
 Routes = 1.12.3
-SQLAlchemy = 0.5.6
-sqlalchemy_migrate = 0.5.4
+sqlalchemy_migrate = 0.7.2
 ssh = 1.7.14
 SVGFig = 1.1.6
-Tempita = 0.1
+Tempita = 0.5.1
 twill = 0.9
 WebError = 0.8a
 WebHelpers = 0.2
@@ -75,7 +75,6 @@
 MySQL_python = _5.1.41_static
 bx_python = _7b95ff194725
 GeneTrack = _dev_48da9e998f0caf01c5be731e926f4b0481f658f0
-SQLAlchemy = _dev_r6498
 pysam = _kanwei_b10f6e722e9a
 
 ; dependency source urls, necessary for scrambling.  for an explanation, see

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 job_conf.xml.sample_advanced
--- a/job_conf.xml.sample_advanced
+++ b/job_conf.xml.sample_advanced
@@ -7,7 +7,7 @@
         <plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner"/><plugin id="pbs" type="runner" load="galaxy.jobs.runners.pbs:PBSJobRunner" workers="2"/><plugin id="drmaa" type="runner" load="galaxy.jobs.runners.drmaa:DRMAAJobRunner"/>
-        <plugin id="lwr" type="runner" load="galaxy.jobs.runners.lwr.LwrJobRunner" /><!-- https://lwr.readthedocs.org -->
+        <plugin id="lwr" type="runner" load="galaxy.jobs.runners.lwr:LwrJobRunner" /><!-- https://lwr.readthedocs.org --><plugin id="cli" type="runner" load="galaxy.jobs.runners.cli:ShellJobRunner" /><plugin id="condor" type="runner" load="galaxy.jobs.runners.condor:CondorJobRunner" /></plugins>
@@ -40,7 +40,6 @@
         </destination><destination id="dynamic" runner="dynamic"><!-- A destination that represents a method in the dynamic runner. -->
-            <param id="type">python</param><param id="function">foo</param></destination><destination id="secure_lwr" runner="lwr">

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/fpconst.py
--- a/lib/fpconst.py
+++ /dev/null
@@ -1,163 +0,0 @@
-"""Utilities for handling IEEE 754 floating point special values
-
-This python module implements constants and functions for working with
-IEEE754 double-precision special values.  It provides constants for
-Not-a-Number (NaN), Positive Infinity (PosInf), and Negative Infinity
-(NegInf), as well as functions to test for these values.
-
-The code is implemented in pure python by taking advantage of the
-'struct' standard module. Care has been taken to generate proper
-results on both big-endian and little-endian machines. Some efficiency
-could be gained by translating the core routines into C.
-
-See <http://babbage.cs.qc.edu/courses/cs341/IEEE-754references.html>
-for reference material on the IEEE 754 floating point standard.
-
-Further information on this package is available at
-<http://www.analytics.washington.edu/statcomp/projects/rzope/fpconst/>.
-
-Author:    Gregory R. Warnes <gregory_r_warnes@groton.pfizer.com>
-Date::     2003-04-08
-Copyright: (c) 2003, Pfizer, Inc.
-"""
-
-__version__ = "0.7.0"
-ident = "$Id: fpconst.py,v 1.12 2004/05/22 04:38:17 warnes Exp $"
-
-import struct, operator
-
-# check endianess
-_big_endian = struct.pack('i',1)[0] != '\x01'
-
-# and define appropriate constants
-if(_big_endian): 
-    NaN    = struct.unpack('d', '\x7F\xF8\x00\x00\x00\x00\x00\x00')[0]
-    PosInf = struct.unpack('d', '\x7F\xF0\x00\x00\x00\x00\x00\x00')[0]
-    NegInf = -PosInf
-else:
-    NaN    = struct.unpack('d', '\x00\x00\x00\x00\x00\x00\xf8\xff')[0]
-    PosInf = struct.unpack('d', '\x00\x00\x00\x00\x00\x00\xf0\x7f')[0]
-    NegInf = -PosInf
-
-def _double_as_bytes(dval):
-    "Use struct.unpack to decode a double precision float into eight bytes"
-    tmp = list(struct.unpack('8B',struct.pack('d', dval)))
-    if not _big_endian:
-        tmp.reverse()
-    return tmp
-
-##
-## Functions to extract components of the IEEE 754 floating point format
-##
-
-def _sign(dval):
-    "Extract the sign bit from a double-precision floating point value"
-    bb = _double_as_bytes(dval)
-    return bb[0] >> 7 & 0x01
-
-def _exponent(dval):
-    """Extract the exponentent bits from a double-precision floating
-    point value.
-
-    Note that for normalized values, the exponent bits have an offset
-    of 1023. As a consequence, the actual exponentent is obtained
-    by subtracting 1023 from the value returned by this function
-    """
-    bb = _double_as_bytes(dval)
-    return (bb[0] << 4 | bb[1] >> 4) & 0x7ff
-
-def _mantissa(dval):
-    """Extract the _mantissa bits from a double-precision floating
-    point value."""
-
-    bb = _double_as_bytes(dval)
-    mantissa =  bb[1] & 0x0f << 48
-    mantissa += bb[2] << 40
-    mantissa += bb[3] << 32
-    mantissa += bb[4]
-    return mantissa 
-
-def _zero_mantissa(dval):
-    """Determine whether the mantissa bits of the given double are all
-    zero."""
-    bb = _double_as_bytes(dval)
-    return ((bb[1] & 0x0f) | reduce(operator.or_, bb[2:])) == 0
-
-##
-## Functions to test for IEEE 754 special values
-##
-
-def isNaN(value):
-    "Determine if the argument is a IEEE 754 NaN (Not a Number) value."
-    return (_exponent(value)==0x7ff and not _zero_mantissa(value))
-
-def isInf(value):
-    """Determine if the argument is an infinite IEEE 754 value (positive
-    or negative inifinity)"""
-    return (_exponent(value)==0x7ff and _zero_mantissa(value))
-
-def isFinite(value):
-    """Determine if the argument is an finite IEEE 754 value (i.e., is
-    not NaN, positive or negative inifinity)"""
-    return (_exponent(value)!=0x7ff)
-
-def isPosInf(value):
-    "Determine if the argument is a IEEE 754 positive infinity value"
-    return (_sign(value)==0 and _exponent(value)==0x7ff and \
-            _zero_mantissa(value))
-
-def isNegInf(value):
-    "Determine if the argument is a IEEE 754 negative infinity value"
-    return (_sign(value)==1 and _exponent(value)==0x7ff and \
-            _zero_mantissa(value))
-
-##
-## Functions to test public functions.
-## 
-
-def test_isNaN():
-    assert( not isNaN(PosInf) )
-    assert( not isNaN(NegInf) )
-    assert(     isNaN(NaN   ) )
-    assert( not isNaN(   1.0) )
-    assert( not isNaN(  -1.0) )
-
-def test_isInf():
-    assert(     isInf(PosInf) )
-    assert(     isInf(NegInf) )
-    assert( not isInf(NaN   ) )
-    assert( not isInf(   1.0) )
-    assert( not isInf(  -1.0) )
-
-def test_isFinite():
-    assert( not isFinite(PosInf) )
-    assert( not isFinite(NegInf) )
-    assert( not isFinite(NaN   ) )
-    assert(     isFinite(   1.0) )
-    assert(     isFinite(  -1.0) )
-
-def test_isPosInf():
-    assert(     isPosInf(PosInf) )
-    assert( not isPosInf(NegInf) )
-    assert( not isPosInf(NaN   ) )
-    assert( not isPosInf(   1.0) )
-    assert( not isPosInf(  -1.0) )
-
-def test_isNegInf():
-    assert( not isNegInf(PosInf) )
-    assert(     isNegInf(NegInf) )
-    assert( not isNegInf(NaN   ) )
-    assert( not isNegInf(   1.0) )
-    assert( not isNegInf(  -1.0) )
-
-# overall test
-def test():
-    test_isNaN()
-    test_isInf()
-    test_isFinite()
-    test_isPosInf()
-    test_isNegInf()
-    
-if __name__ == "__main__":
-    test()
-

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/__init__.py
--- a/lib/galaxy/__init__.py
+++ b/lib/galaxy/__init__.py
@@ -95,10 +95,15 @@
     pkg_resources.Distribution._insert_on = pkg_resources.Distribution.insert_on
     pkg_resources.Distribution.insert_on = _insert_on
 
-# patch to add the NullHandler class to logging
-if sys.version_info[:2] < ( 2, 7 ):
-    import logging
+# compat: BadZipFile introduced in Python 2.7
+import zipfile
+if not hasattr( zipfile, 'BadZipFile' ):
+    zipfile.BadZipFile = zipfile.error
+
+# compat: patch to add the NullHandler class to logging
+import logging
+if not hasattr( logging, 'NullHandler' ):
     class NullHandler( logging.Handler ):
         def emit( self, record ):
             pass
-    logging.NullHandler = NullHandler
+    logging.NullHandler = NullHandler
\ No newline at end of file

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/app.py
--- a/lib/galaxy/app.py
+++ b/lib/galaxy/app.py
@@ -15,6 +15,7 @@
 from galaxy.tags.tag_handler import GalaxyTagHandler
 from galaxy.visualization.genomes import Genomes
 from galaxy.visualization.data_providers.registry import DataProviderRegistry
+from galaxy.visualization.registry import VisualizationsRegistry
 from galaxy.tools.imp_exp import load_history_imp_exp_tools
 from galaxy.tools.genome_index import load_genome_index_tools
 from galaxy.sample_tracking import external_service_types
@@ -61,7 +62,8 @@
                                    self.config.database_engine_options,
                                    database_query_profiling_proxy = self.config.database_query_profiling_proxy,
                                    object_store = self.object_store,
-                                   trace_logger=self.trace_logger )
+                                   trace_logger=self.trace_logger,
+                                   use_pbkdf2=self.config.get_bool( 'use_pbkdf2', True ) )
         # Manage installed tool shed repositories.
         self.installed_repository_manager = tool_shed.galaxy_install.InstalledRepositoryManager( self )
         # Create an empty datatypes registry.
@@ -90,7 +92,7 @@
         # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables.
         self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config,
                                                      tool_data_path=self.tool_data_tables.tool_data_path,
-                                                     from_shed_config=True )
+                                                     from_shed_config=False )
         # Initialize the job management configuration
         self.job_config = jobs.JobConfiguration(self)
         # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file.
@@ -120,6 +122,9 @@
         load_history_imp_exp_tools( self.toolbox )
         # Load genome indexer tool.
         load_genome_index_tools( self.toolbox )
+        # visualizations registry: associates resources with visualizations, controls how to render
+        self.visualizations_registry = ( VisualizationsRegistry( self.config.root, self.config.visualizations_conf_path )
+                                         if self.config.visualizations_conf_path else None )
         # Load security policy.
         self.security_agent = self.model.security_agent
         self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions )

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/config.py
--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -65,6 +65,11 @@
         else:
             tcf = 'tool_conf.xml'
         self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ]
+        self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None )
+        if self.shed_tool_data_path:
+            self.shed_tool_data_path = resolve_path( self.shed_tool_data_path, self.root )
+        else:
+            self.shed_tool_data_path = self.tool_data_path
         self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root )
         self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root )
         self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) )
@@ -86,7 +91,6 @@
         self.galaxy_data_manager_data_path = kwargs.get( 'galaxy_data_manager_data_path',  self.tool_data_path )
         self.tool_secret = kwargs.get( "tool_secret", "" )
         self.id_secret = kwargs.get( "id_secret", "USING THE DEFAULT IS NOT SECURE!" )
-        self.set_metadata_externally = string_as_bool( kwargs.get( "set_metadata_externally", "False" ) )
         self.retry_metadata_internally = string_as_bool( kwargs.get( "retry_metadata_internally", "True" ) )
         self.use_remote_user = string_as_bool( kwargs.get( "use_remote_user", "False" ) )
         self.remote_user_maildomain = kwargs.get( "remote_user_maildomain", None )
@@ -155,6 +159,10 @@
         self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",")
         self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",")
         self.brand = kwargs.get( 'brand', None )
+        # Configuration for the message box directly below the masthead.
+        self.message_box_visible = kwargs.get( 'message_box_visible', False )
+        self.message_box_content = kwargs.get( 'message_box_content', None )
+        self.message_box_class = kwargs.get( 'message_box_class', 'info' )
         self.support_url = kwargs.get( 'support_url', 'http://wiki.g2.bx.psu.edu/Support' )
         self.wiki_url = kwargs.get( 'wiki_url', 'http://g2.trac.bx.psu.edu/' )
         self.blog_url = kwargs.get( 'blog_url', None )
@@ -166,6 +174,7 @@
         self.enable_whoosh_library_search = string_as_bool( kwargs.get( 'enable_whoosh_library_search', False ) )
         self.whoosh_index_dir = resolve_path( kwargs.get( "whoosh_index_dir", "database/whoosh_indexes" ), self.root )
         self.ftp_upload_dir = kwargs.get( 'ftp_upload_dir', None )
+        self.ftp_upload_dir_identifier = kwargs.get( 'ftp_upload_dir_identifier', 'email' )  # attribute on user - email, username, id, etc...
         self.ftp_upload_site = kwargs.get( 'ftp_upload_site', None )
         self.allow_library_path_paste = kwargs.get( 'allow_library_path_paste', False )
         self.disable_library_comptypes = kwargs.get( 'disable_library_comptypes', '' ).lower().split( ',' )
@@ -271,6 +280,8 @@
         self.fluent_log = string_as_bool( kwargs.get( 'fluent_log', False ) )
         self.fluent_host = kwargs.get( 'fluent_host', 'localhost' )
         self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) )
+        # visualizations registry config path
+        self.visualizations_conf_path = kwargs.get( 'visualizations_conf_path', None )
 
     @property
     def sentry_dsn_public( self ):

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/assembly.py
--- a/lib/galaxy/datatypes/assembly.py
+++ b/lib/galaxy/datatypes/assembly.py
@@ -5,14 +5,14 @@
 """
 
 import data
+import logging
+import os
+import re
+import sys
 from galaxy.datatypes import sequence
-import logging, os, sys, time, tempfile, shutil, string, glob, re
-import galaxy.model
-from galaxy.datatypes import metadata
+from galaxy.datatypes.images import Html
 from galaxy.datatypes.metadata import MetadataElement
-from galaxy import util
-from galaxy.datatypes.images import Html
-from sniff import *
+
 
 log = logging.getLogger(__name__)
 
@@ -174,7 +174,6 @@
         gen_msg = ''
         try:
             efp = dataset.extra_files_path
-            flist = os.listdir(efp)
             log_path = os.path.join(efp,'Log')
             f = open(log_path,'r')
             log_content = f.read(1000)
@@ -223,5 +222,5 @@
         self.regenerate_primary_file(dataset)
 
 if __name__ == '__main__':
-    import doctest, sys
+    import doctest
     doctest.testmod(sys.modules[__name__])

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py
+++ b/lib/galaxy/datatypes/binary.py
@@ -2,18 +2,26 @@
 Binary classes
 """
 
-import data, logging, binascii
+import binascii
+import data
+import gzip
+import logging
+import os
+import shutil
+import struct
+import subprocess
+import tempfile
+import zipfile
+
+from urllib import urlencode, quote_plus
+from galaxy import eggs
+eggs.require( "bx-python" )
+
+from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
+
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
-from urllib import urlencode, quote_plus
-import zipfile, gzip
-import os, subprocess, tempfile
-import struct
 
 log = logging.getLogger(__name__)
 
@@ -94,6 +102,9 @@
 class Bam( Binary ):
     """Class describing a BAM binary file"""
     file_ext = "bam"
+    track_type = "ReadTrack"
+    data_sources = { "data": "bai", "index": "bigwig" }
+
     MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, file_ext="bai", readonly=True, no_value=None, visible=False, optional=True )
 
     def _get_samtools_version( self ):
@@ -244,9 +255,7 @@
             return dataset.peek
         except:
             return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
-    def get_track_type( self ):
-        return "ReadTrack", { "data": "bai", "index": [ "bigwig", "summary_tree" ] }
-
+    
 Binary.register_sniffable_binary_format("bam", "bam", Bam)
 
 class H5( Binary ):
@@ -324,6 +333,9 @@
     The supplemental info in the paper has the binary details:
     http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btq351v1
     """
+    track_type = "LineTrack"
+    data_sources = { "data_standalone": "bigwig" }
+
     def __init__( self, **kwd ):
         Binary.__init__( self, **kwd )
         self._magic = 0x888FFC26
@@ -348,19 +360,18 @@
             return dataset.peek
         except:
             return "Binary UCSC %s file (%s)" % ( self._name, data.nice_size( dataset.get_size() ) )
-    def get_track_type( self ):
-        return "LineTrack", {"data_standalone": "bigwig"}
-
+    
 Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig)
 
 class BigBed(BigWig):
     """BigBed support from UCSC."""
+
+    data_sources = { "data_standalone": "bigbed" }
+
     def __init__( self, **kwd ):
         Binary.__init__( self, **kwd )
         self._magic = 0x8789F2EB
         self._name = "BigBed"
-    def get_track_type( self ):
-        return "LineTrack", {"data_standalone": "bigbed"}
 
 Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed)
 

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/checkers.py
--- a/lib/galaxy/datatypes/checkers.py
+++ b/lib/galaxy/datatypes/checkers.py
@@ -58,7 +58,7 @@
     for chars in temp:
         for char in chars:
             chars_read += 1
-            if ord( char ) > 128:
+            if util.is_binary( char ):
                 is_binary = True
                 break
             if chars_read > 100:

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/chrominfo.py
--- a/lib/galaxy/datatypes/chrominfo.py
+++ b/lib/galaxy/datatypes/chrominfo.py
@@ -1,7 +1,3 @@
-import data
-from galaxy import util
-from galaxy.datatypes.sniff import *
-from galaxy.web import url_for
 from tabular import Tabular
 from galaxy.datatypes import metadata
 from galaxy.datatypes.metadata import MetadataElement

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml
--- a/lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml
+++ b/lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml
@@ -1,7 +1,14 @@
 <tool id="CONVERTER_bam_to_bigwig_0" name="Convert BAM to BigWig" version="1.0.0" hidden="true"><!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --><command>
-        bedtools genomecov -bg -split -ibam $input -g $chromInfo | wigToBigWig stdin $chromInfo $output
+        bedtools genomecov -bg -split -ibam $input -g $chromInfo 
+
+        ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this
+        ## should only be used on systems with large RAM.
+        ## | wigToBigWig stdin $chromInfo $output
+
+        ## This can be used anywhere.
+        > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output
     </command><inputs><param format="bam" name="input" type="data" label="Choose BAM file"/>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<tool id="CONVERTER_bam_to_summary_tree_0" name="Convert BAM to Summary Tree" version="1.0.0" hidden="true">
-  <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">
-    sam_or_bam_to_summary_tree_converter.py --bam $input1 $input1.metadata.bam_index $output1
-  </command>
-  <inputs>
-    <param format="bam" name="input1" type="data" label="Choose BAM file"/>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bed_gff_or_vcf_to_bigwig_converter.xml
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/bed_gff_or_vcf_to_bigwig_converter.xml
@@ -0,0 +1,25 @@
+<tool id="CONVERTER_bed_gff_or_vcf_to_bigwig_0" name="Convert BED, GFF, or VCF to BigWig" version="1.0.0" hidden="true">
+    <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+    <command>
+        ## Remove comments and sort by chromosome.
+        grep -v '^#' $input | sort -k1,1 | 
+
+        ## Generate coverage bedgraph.
+        bedtools genomecov -bg -split -i stdin -g $chromInfo 
+
+        ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this
+        ## should only be used on systems with large RAM.
+        ## | wigToBigWig stdin $chromInfo $output
+
+        ## This can be used anywhere.
+        > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output
+    </command>
+    <inputs>
+        <param format="bed,gff,vcf" name="input" type="data" label="Choose input file"/>
+    </inputs>
+    <outputs>
+        <data format="bigwig" name="output"/>
+    </outputs>
+    <help>
+    </help>
+</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<tool id="CONVERTER_bed_to_summary_tree_0" name="Convert BED to Summary Tree" version="1.0.0" hidden="true">
-<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1</command>
-  <inputs>
-    <page>
-        <param format="bed" name="input1" type="data" label="Choose BED file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-<tool id="CONVERTER_encodepeak_to_summary_tree_0" name="Convert ENCODEPeak to Summary Tree" version="1.0.0" hidden="true">
-<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">interval_to_summary_tree_converter.py 
-                                -c ${input1.metadata.chromCol} 
-                                -s ${input1.metadata.startCol} 
-                                -e ${input1.metadata.endCol} 
-                                $input1 $output1
-  </command>
-  
-  <inputs>
-    <page>
-        <param format="ENCODEPeak" name="input1" type="data" label="Choose ENCODEPeak file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<tool id="CONVERTER_gff_to_summary_tree_0" name="Convert GFF to Summary Tree" version="1.0.0" hidden="true">
-<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1 --gff</command>
-  <inputs>
-    <page>
-        <param format="gff" name="input1" type="data" label="Choose GFF file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_bigwig_converter.xml
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/interval_to_bigwig_converter.xml
@@ -0,0 +1,33 @@
+<tool id="CONVERTER_interval_to_bigwig_0" name="Convert Genomic Intervals To Coverage">
+  <!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+  <!-- Used on the metadata edit page. -->
+  <command>
+
+    ## Remove comments and sort by chromosome.
+    grep -v '^#' $input1 | sort -k${input1.metadata.chromCol},${input1.metadata.chromCol} |
+
+    ## Create simple BED by cutting chrom, start, and end columns.
+    awk -v OFS='	' '{print $${input1.metadata.chromCol},$${input1.metadata.startCol},$${input1.metadata.endCol} }' |
+
+    ## Generate coverage bedgraph.
+    bedtools genomecov -bg -split -i stdin -g $chromInfo 
+
+    ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this
+    ## should only be used on systems with large RAM.
+    ## | wigToBigWig stdin $chromInfo $output
+
+    ## This can be used anywhere.
+    > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output
+
+  </command>
+  <inputs>
+    <page>
+      <param format="interval" name="input1" type="data" label="Choose intervals"/>
+    </page>
+   </inputs>
+  <outputs>
+    <data format="bigwig" name="output"/>
+  </outputs>
+  <help>
+  </help>
+</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py
--- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals).
-
-usage: %prog <options> in_file out_file
-    -c, --chr-col: chromosome column, default=1
-    -s, --start-col: start column, default=2
-    -e, --end-col: end column, default=3
-    -t, --strand-col: strand column, default=6
-    -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval
-"""
-from __future__ import division
-
-import sys, fileinput, optparse
-from galaxy import eggs
-import pkg_resources; pkg_resources.require( "bx-python" )
-from galaxy.visualization.tracks.summary import *
-from bx.intervals.io import *
-from galaxy.datatypes.util.gff_util import *
-
-def main():
-    # Read options, args.
-    parser = optparse.OptionParser()
-    parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 )
-    parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 )
-    parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 )
-    parser.add_option( '-t', '--strand-col', type='int', dest='strand_col', default=6 )
-    parser.add_option( '-G', '--gff', dest="gff_format", action="store_true" )
-    (options, args) = parser.parse_args()
-    input_fname, output_fname = args
-    
-    # Convert column indices to 0-based.
-    options.chrom_col -= 1
-    options.start_col -= 1
-    options.end_col -= 1
-    options.strand_col -= 1
-        
-    # Do conversion.
-    if options.gff_format:
-        reader_wrapper_class = GFFReaderWrapper
-        chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 )
-    else:
-        reader_wrapper_class = NiceReaderWrapper
-        chr_col, start_col, end_col, strand_col = ( options.chrom_col, options.start_col, options.end_col, options.strand_col )
-    reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ),
-                                            chrom_col=chr_col,
-                                            start_col=start_col,
-                                            end_col=end_col,
-                                            strand_col=strand_col,
-                                            fix_strand=True )
-    st = SummaryTree()
-    for feature in list( reader_wrapper ):
-        if isinstance( feature, GenomicInterval ):
-            # Tree expects BED coordinates.
-            if type( feature ) is GFFFeature:
-                convert_gff_coords_to_bed( feature )
-            st.insert_range( feature.chrom, long( feature.start ), long( feature.end ) )
-    
-    st.write( output_fname )
-
-if __name__ == "__main__": 
-    main()

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-<tool id="CONVERTER_interval_to_summary_tree_0" name="Convert Interval to Summary Tree" version="1.0.0" hidden="true">
-<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">interval_to_summary_tree_converter.py 
-                                -c ${input1.metadata.chromCol} 
-                                -s ${input1.metadata.startCol} 
-                                -e ${input1.metadata.endCol} 
-                                $input1 $output1
-  </command>
-  
-  <inputs>
-    <page>
-        <param format="interval" name="input1" type="data" label="Choose Interval file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+Convert from pileup file to interval index file.
+
+usage: %prog <options> in_file out_file
+"""
+
+from __future__ import division
+
+import sys, fileinput, optparse
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from galaxy.visualization.tracks.summary import *
+from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed
+from bx.interval_index_file import Indexes
+
+def main():
+    
+    # Read options, args.
+    parser = optparse.OptionParser()
+    (options, args) = parser.parse_args()
+    input_fname, output_fname = args
+    
+    # Do conversion.
+    index = Indexes()
+    offset = 0
+    for line in open( input_fname, "r" ):
+        chrom, start = line.split()[ 0:2 ]
+        # Pileup format is 1-based.
+        start = int( start ) - 1
+        index.add( chrom, start, start + 1, offset )
+        offset += len( line )
+            
+    index.write( open(output_fname, "w") )
+
+if __name__ == "__main__": 
+    main()
+    
\ No newline at end of file

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.xml
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.xml
@@ -0,0 +1,15 @@
+<tool id="CONVERTER_pileup_to_interval_index_0" name="Convert Pileup to Interval Index" version="1.0.0" hidden="true">
+<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
+  <command interpreter="python">pileup_to_interval_index_converter.py $input $output
+  </command>
+  <inputs>
+    <page>
+        <param format="pileup" name="input" type="data" label="Choose Pileup file"/>
+    </page>
+   </inputs>
+  <outputs>
+    <data format="interval_index" name="output"/>
+  </outputs>
+  <help>
+  </help>
+</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py
--- a/lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import division
-
-import sys, os, optparse
-sys.stderr = open(os.devnull, 'w')  # suppress stderr as cython produces warning on some systems:
-                                    # csamtools.so:6: RuntimeWarning: __builtin__.file size changed
-
-from galaxy import eggs
-import pkg_resources
-
-if sys.version_info[:2] == (2, 4):
-    pkg_resources.require( "ctypes" )
-pkg_resources.require( "pysam" )
-
-from pysam import csamtools
-from galaxy.visualization.tracks.summary import *
-
-def main():
-    parser = optparse.OptionParser()
-    parser.add_option( '-S', '--sam', action="store_true", dest="is_sam" )
-    parser.add_option( '-B', '--bam', action="store_true", dest="is_bam" )
-    options, args = parser.parse_args()
-    
-    if options.is_bam:
-        input_fname = args[0]
-        index_fname = args[1]
-        out_fname = args[2]
-        samfile = csamtools.Samfile( filename=input_fname, mode='rb', index_filename=index_fname )
-    elif options.is_sam:
-        input_fname = args[0]
-        out_fname = args[1]
-        samfile = csamtools.Samfile( filename=input_fname, mode='r' )
-    
-    st = SummaryTree()
-    for read in samfile.fetch():
-        st.insert_range( samfile.getrname( read.rname ), read.pos, read.pos + read.rlen )
-    
-    st.write(out_fname)
-
-if __name__ == "__main__": 
-    main()

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_to_bigwig_converter.xml
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/sam_to_bigwig_converter.xml
@@ -0,0 +1,20 @@
+<tool id="CONVERTER_sam_to_bigwig_0" name="Convert SAM to BigWig" version="1.0.0" hidden="true">
+    <command>
+        samtools view -bh $input | bedtools genomecov -bg -split -ibam stdin -g $chromInfo 
+
+        ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this
+        ## should only be used on systems with large RAM.
+        ## | wigToBigWig stdin $chromInfo $output
+
+        ## This can be used anywhere.
+        > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output
+    </command>
+    <inputs>
+        <param format="bam" name="input" type="data" label="Choose BAM file"/>
+    </inputs>
+    <outputs>
+        <data format="bigwig" name="output"/>
+    </outputs>
+    <help>
+    </help>
+</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/sam_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<tool id="CONVERTER_sam_to_summary_tree_0" name="Convert SAM to Summary Tree" version="1.0.0" hidden="true">
-<!--  <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">sam_or_bam_to_summary_tree_converter.py --sam $input1 $output1</command>
-  <inputs>
-    <page>
-        <param format="sam" name="input1" type="data" label="Choose sam file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py
--- a/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Convert from VCF file to summary tree file.
-
-usage: %prog in_file out_file
-"""
-from __future__ import division
-
-import optparse
-import galaxy_utils.sequence.vcf
-from galaxy.visualization.tracks.summary import SummaryTree
-
-def main():
-    # Read options, args.
-    parser = optparse.OptionParser()
-    (options, args) = parser.parse_args()
-    in_file, out_file = args
-        
-    # Do conversion.
-    st = SummaryTree()
-    for line in list( galaxy_utils.sequence.vcf.Reader( open( in_file ) ) ):
-        # VCF format provides a chrom and 1-based position for each variant. 
-        # SummaryTree expects 0-based coordinates.
-        st.insert_range( line.chrom, long( line.pos-1 ), long( line.pos ) )
-    
-    st.write(out_file)
-
-if __name__ == "__main__": 
-    main()
\ No newline at end of file

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.xml
--- a/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<tool id="CONVERTER_vcf_to_summary_tree_0" name="Convert VCF to Summary Tree" version="1.0.0" hidden="true">
-  <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
-  <command interpreter="python">vcf_to_summary_tree_converter.py $input1 $output1</command>
-  <inputs>
-    <page>
-        <param format="vcf" name="input1" type="data" label="Choose VCF file"/>
-    </page>
-   </inputs>
-  <outputs>
-    <data format="summary_tree" name="output1"/>
-  </outputs>
-  <help>
-  </help>
-</tool>

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/coverage.py
--- a/lib/galaxy/datatypes/coverage.py
+++ b/lib/galaxy/datatypes/coverage.py
@@ -2,21 +2,14 @@
 Coverage datatypes
 
 """
-import pkg_resources
-pkg_resources.require( "bx-python" )
 
-import logging, os, sys, time, tempfile, shutil
-import data
-from galaxy import util
-from galaxy.datatypes.sniff import *
-from galaxy.web import url_for
-from cgi import escape
-import urllib
-from bx.intervals.io import *
+import logging
+import math
+
+from galaxy import eggs
 from galaxy.datatypes import metadata
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes.tabular import Tabular
-import math
 
 log = logging.getLogger(__name__)
 
@@ -34,7 +27,7 @@
         Assumes we have a numpy file.
         """
         # Maybe if we import here people will still be able to use Galaxy when numpy kills it
-        pkg_resources.require("numpy>=1.2.1")
+        eggs.require("numpy>=1.2.1")
         #from numpy.lib import format
         import numpy
 

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -2,6 +2,7 @@
 import metadata
 import mimetypes
 import os
+import shutil
 import sys
 import tempfile
 import zipfile
@@ -17,12 +18,6 @@
 eggs.require( "Paste" )
 import paste
 
-
-if sys.version_info[:2] < ( 2, 6 ):
-    zipfile.BadZipFile = zipfile.error
-if sys.version_info[:2] < ( 2, 5 ):
-    zipfile.LargeZipFile = zipfile.error
-
 log = logging.getLogger(__name__)
 
 tmpd = tempfile.mkdtemp()
@@ -103,6 +98,12 @@
     #A per datatype setting (inherited): max file size (in bytes) for setting optional metadata
     _max_optional_metadata_filesize = None
 
+    # Trackster track type.
+    track_type = None
+
+    # Data sources.
+    data_sources = {}
+
     def __init__(self, **kwd):
         """Initialize the datatype"""
         object.__init__(self, **kwd)
@@ -545,21 +546,21 @@
         return False
 
 
-
     def merge( split_files, output_file):
         """
-        TODO: Do we need to merge gzip files using gzjoin? cat seems to work,
-        but might be brittle. Need to revisit this.
+            Merge files with copy.copyfileobj() will not hit the
+            max argument limitation of cat. gz and bz2 files are also working.
         """
         if not split_files:
             raise ValueError('Asked to merge zero files as %s' % output_file)
         elif len(split_files) == 1:
-            cmd = 'mv -f %s %s' % ( split_files[0], output_file )
+            shutil.copyfileobj(open(split_files[0], 'rb'), open(output_file, 'wb'))
         else:
-            cmd = 'cat %s > %s' % ( ' '.join(split_files), output_file )
-        result = os.system(cmd)
-        if result != 0:
-            raise Exception('Result %s from %s' % (result, cmd))
+            fdst = open(output_file, 'wb')
+            for fsrc in split_files:
+                shutil.copyfileobj(open(fsrc, 'rb'), fdst)
+            fdst.close()
+
     merge = staticmethod(merge)
 
     def get_visualizations( self, dataset ):
@@ -567,7 +568,7 @@
         Returns a list of visualizations for datatype.
         """
 
-        if hasattr( self, 'get_track_type' ):
+        if self.track_type:
             return [ 'trackster', 'circster' ]
         return []
 
@@ -864,7 +865,7 @@
         text = "%s file" % file_type
     else:
         try:
-            text = unicode( '\n'.join( lines ), 'utf-8' )
+            text = util.unicodify( '\n'.join( lines ) )
         except UnicodeDecodeError:
             text = "binary/unknown file"
     return text

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/application.py
--- a/lib/galaxy/datatypes/display_applications/application.py
+++ b/lib/galaxy/datatypes/display_applications/application.py
@@ -12,7 +12,7 @@
 log = logging.getLogger( __name__ )
 
 #Any basic functions that we want to provide as a basic part of parameter dict should be added to this dict
-BASE_PARAMS = { 'qp': quote_plus, 'url_for':url_for } #url_for has route memory...
+BASE_PARAMS = { 'qp': quote_plus, 'url_for':url_for }
 
 class DisplayApplicationLink( object ):
     @classmethod
@@ -40,7 +40,7 @@
         self.name = None
     def get_display_url( self, data, trans ):
         dataset_hash, user_hash = encode_dataset_user( trans, data, None )
-        return url_for( controller='/dataset',
+        return url_for( controller='dataset',
                         action="display_application",
                         dataset_id=dataset_hash,
                         user_id=user_hash,

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/link_generator.py
--- a/lib/galaxy/datatypes/display_applications/link_generator.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Classes to generate links for old-style display applications.
-
-Separating Transaction based elements of display applications from datatypes.
-"""
-
-#FIXME: The code contained within this file is for old-style display applications, but
-#this module namespace is intended to only handle the new-style display applications.
-
-import urllib
-
-# for the url_for hack
-import pkg_resources
-pkg_resources.require( "Routes" )
-import routes
-
-from galaxy import util
-from galaxy.web import url_for
-from galaxy.datatypes.interval import Interval, Gff, Wiggle, CustomTrack
-
-#TODO: Ideally, these classes would be instantiated in the trans (or some other semi-persistant fixture)
-#   Currently, these are instantiated per HDA which is not the best solution
-
-#TODO: these could be extended to handle file_function and parse/contain the builds.txt files
-
-#HACK: these duplicate functionality from the individual datatype classes themselves
-
-def get_display_app_link_generator( display_app_name ):
-    """Returns an instance of the proper link generator class
-    based on the display_app_name or DisplayAppLinkGenerator
-    if the display_app_name is unrecognized.
-    """
-    if display_app_name == 'ucsc':
-        return UCSCDisplayAppLinkGenerator()
-
-    elif display_app_name == 'gbrowse':
-        return GBrowseDisplayAppLinkGenerator()
-
-    return DisplayAppLinkGenerator()
-
-
-class DisplayAppLinkGenerator( object ):
-    """Base class for display application link generators.
-
-    This class returns an empty list of links for all datatypes.
-    """
-    def __init__( self ):
-        self.display_app_name = ''
-
-    def no_links_available( self, dataset, app, base_url, url_for=url_for ):
-        """Called when no display application links are available
-        for this display app name and datatype combination.
-        """
-        return []
-
-    def _link_function_from_datatype( self, datatype ):
-        """Dispatch to proper link generating function on datatype.
-        """
-        return self.no_links_available
-
-    def generate_links( self, trans, dataset ):
-        # here's the hack - which is expensive (time)
-        web_url_for = routes.URLGenerator( trans.webapp.mapper, trans.environ )
-
-        link_function = self._link_function_from_datatype( dataset.datatype )
-        display_links = link_function( dataset, trans.app, trans.request.base, url_for=web_url_for )
-
-        return display_links
-
-
-class UCSCDisplayAppLinkGenerator( DisplayAppLinkGenerator ):
-    """Class for generating links to display data in the
-    UCSC genome browser.
-
-    This class returns links for the following datatypes and their subclasses:
-        Interval, Wiggle, Gff, CustomTrack
-    """
-    def __init__( self ):
-        self.display_app_name = 'ucsc'
-
-    def _link_function_from_datatype( self, datatype ):
-        """Dispatch to proper link generating function based on datatype.
-        """
-        if( ( isinstance( datatype, Interval ) )
-        or  ( isinstance( datatype, Wiggle ) )
-        or  ( isinstance( datatype, Gff ) )
-        or  ( isinstance( datatype, CustomTrack ) ) ):
-            return self.ucsc_links
-        else:
-            return super( UCSCDisplayAppLinkGenerator, self )._link_function_from_datatype( datatype )
-
-    def ucsc_links( self, dataset, app, base_url, url_for=url_for ):
-        """Generate links to UCSC genome browser sites based on the dbkey
-        and content of dataset.
-        """
-        # this is a refactor of Interval.ucsc_links, GFF.ucsc_links, Wiggle.ucsc_links, and CustomTrack.ucsc_links
-        #TODO: app vars can be moved into init (and base_url as well)
-        chrom, start, stop = dataset.datatype.get_estimated_display_viewport( dataset )
-        if chrom is None:
-            return []
-        ret_val = []
-        for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey):
-            if site_name in app.config.ucsc_display_sites:
-                internal_url = url_for( controller='dataset', dataset_id=dataset.id,
-                                        action='display_at', filename='%s_%s' % ( self.display_app_name, site_name ) )
-                base_url = app.config.get( "display_at_callback", base_url )
-                display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at"
-                        % (base_url, url_for( controller='root' ), dataset.id, self.display_app_name) )
-                redirect_url = urllib.quote_plus( "%sdb=%s&position=%s:%s-%s&hgt.customText=%%s"
-                        % (site_url, dataset.dbkey, chrom, start, stop ) )
-
-                link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url )
-                ret_val.append( ( site_name, link ) )
-
-        return ret_val
-
-
-class GBrowseDisplayAppLinkGenerator( DisplayAppLinkGenerator ):
-    """Class for generating links to display data in the
-    GBrowse genome browser.
-
-    This class returns links for the following datatypes and their subclasses:
-        Gff, Wiggle
-    """
-    def __init__( self ):
-        self.display_app_name = 'gbrowse'
-
-    def _link_function_from_datatype( self, datatype ):
-        """Dispatch to proper link generating function based on datatype.
-        """
-        if( ( isinstance( datatype, Gff ) )
-        or  ( isinstance( datatype, Wiggle ) ) ):
-            return self.gbrowse_links
-        else:
-            return super( GBrowseDisplayAppLinkGenerator, self )._link_function_from_datatype( datatype )
-
-    def gbrowse_links( self, dataset, app, base_url, url_for=url_for ):
-        """Generate links to GBrowse genome browser sites based on the dbkey
-        and content of dataset.
-        """
-        # when normalized for var names, Gff.gbrowse_links and Wiggle.gbrowse_links are the same
-        # also: almost identical to ucsc_links except for the 'chr' stripping, sites_by_build, config key
-        #   could be refactored even more
-        chrom, start, stop = dataset.datatype.get_estimated_display_viewport( dataset )
-        if chrom is None:
-            return []
-        ret_val = []
-        for site_name, site_url in util.get_gbrowse_sites_by_build( dataset.dbkey ):
-            if site_name in app.config.gbrowse_display_sites:
-                # strip chr from seqid
-                if chrom.startswith( 'chr' ) and len ( chrom ) > 3:
-                    chrom = chrom[3:]
-                internal_url = url_for( controller='dataset', dataset_id=dataset.id,
-                                        action='display_at', filename='%s_%s' % ( self.display_app_name, site_name ) )
-                redirect_url = urllib.quote_plus( "%s/?q=%s:%s..%s&eurl=%%s" % ( site_url, chrom, start, stop ) )
-                base_url = app.config.get( "display_at_callback", base_url )
-                display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at"
-                    % ( base_url, url_for( controller='root' ), dataset.id, self.display_app_name ) )
-                link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url )
-                ret_val.append( ( site_name, link ) )
-
-        return ret_val

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/parameters.py
--- a/lib/galaxy/datatypes/display_applications/parameters.py
+++ b/lib/galaxy/datatypes/display_applications/parameters.py
@@ -163,7 +163,7 @@
         if self.parameter.strip_https and base_url[ : 5].lower() == 'https':
             base_url = "http%s" % base_url[ 5: ]
         return "%s%s" % ( base_url,
-                          url_for( controller='/dataset',
+                          url_for( controller='dataset',
                                    action="display_application",
                                    dataset_id=self._dataset_hash,
                                    user_id=self._user_hash,

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/interval.py
--- a/lib/galaxy/datatypes/interval.py
+++ b/lib/galaxy/datatypes/interval.py
@@ -46,6 +46,8 @@
     """Tab delimited data containing interval information"""
     file_ext = "interval"
     line_class = "region"
+    track_type = "FeatureTrack"
+    data_sources = { "data": "tabix", "index": "bigwig" }
 
     """Add metadata elements"""
     MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter )
@@ -242,7 +244,7 @@
         # Accumulate links for valid sites
         ret_val = []
         for site_name, site_url in valid_sites:
-            internal_url = url_for( controller='/dataset', dataset_id=dataset.id, 
+            internal_url = url_for( controller='dataset', dataset_id=dataset.id, 
                                     action='display_at', filename='ucsc_' + site_name )
             display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" 
                     % (base_url, url_for( controller='root' ), dataset.id, type) )
@@ -328,17 +330,13 @@
 
     def get_track_resolution( self, dataset, start, end):
         return None
-        
-    def get_track_type( self ):
-        return "FeatureTrack", {"data": "tabix", "index": "summary_tree"}
 
 class BedGraph( Interval ):
     """Tab delimited chrom/start/end/datavalue dataset"""
 
     file_ext = "bedgraph"
-
-    def get_track_type( self ):
-        return "LineTrack", { "data": "bigwig", "index": "bigwig" }
+    track_type = "LineTrack"
+    data_sources = { "data": "bigwig", "index": "bigwig" }
         
     def as_ucsc_display_file( self, dataset, **kwd ):
         """
@@ -356,6 +354,8 @@
 class Bed( Interval ):
     """Tab delimited data in BED format"""
     file_ext = "bed"
+    data_sources = { "data": "tabix", "index": "bigwig", "feature_search": "fli" }
+    track_type = Interval.track_type
 
     """Add metadata elements"""
     MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter )
@@ -510,9 +510,6 @@
                 else: return False
             return True
         except: return False
-    
-    def get_track_type( self ):
-        return "FeatureTrack", {"data": "tabix", "index": "summary_tree", "feature_search": "fli"}
 
 class BedStrict( Bed ):
     """Tab delimited data in strict BED format - no non-standard columns allowed"""
@@ -572,6 +569,8 @@
     """Tab delimited data in Gff format"""
     file_ext = "gff"
     column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Group' ]
+    data_sources = { "data": "interval_index", "index": "bigwig", "feature_search": "fli" }
+    track_type = Interval.track_type
 
     """Add metadata elements"""
     MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False )
@@ -783,10 +782,6 @@
             return True
         except:
             return False
-            
-    def get_track_type( self ):
-        return "FeatureTrack", {"data": "interval_index", "index": "summary_tree", "feature_search": "fli"}
-            
 
 class Gff3( Gff ):
     """Tab delimited data in Gff3 format"""
@@ -794,6 +789,7 @@
     valid_gff3_strand = ['+', '-', '.', '?']
     valid_gff3_phase = ['.', '0', '1', '2']
     column_names = [ 'Seqid', 'Source', 'Type', 'Start', 'End', 'Score', 'Strand', 'Phase', 'Attributes' ]
+    track_type = Interval.track_type
         
     """Add metadata elements"""
     MetadataElement( name="column_types", default=['str','str','str','int','int','float','str','int','list'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False )
@@ -898,6 +894,7 @@
     """Tab delimited data in Gtf format"""
     file_ext = "gtf"
     column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Attributes' ]
+    track_type = Interval.track_type
     
     """Add metadata elements"""
     MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False )
@@ -966,6 +963,8 @@
 class Wiggle( Tabular, _RemoteCallMixin ):
     """Tab delimited data in wiggle format"""
     file_ext = "wig"
+    track_type = "LineTrack"
+    data_sources = { "data": "bigwig", "index": "bigwig" }
 
     MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False )
     
@@ -1146,9 +1145,6 @@
         resolution = min( resolution, 100000 )
         resolution = max( resolution, 1 )
         return resolution
-        
-    def get_track_type( self ):
-        return "LineTrack", { "data": "bigwig", "index": "bigwig" }
 
 class CustomTrack ( Tabular ):
     """UCSC CustomTrack"""
@@ -1292,6 +1288,7 @@
     
     file_ext = "encodepeak"
     column_names = [ 'Chrom', 'Start', 'End', 'Name', 'Score', 'Strand', 'SignalValue', 'pValue', 'qValue', 'Peak' ]
+    data_sources = { "data": "tabix", "index": "bigwig" }
     
     """Add metadata elements"""
     MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter )
@@ -1303,15 +1300,14 @@
     def sniff( self, filename ):
         return False
         
-    def get_track_type( self ):
-        return "FeatureTrack", {"data": "tabix", "index": "summary_tree"}
-        
 class ChromatinInteractions( Interval ):
     '''
     Chromatin interactions obtained from 3C/5C/Hi-C experiments.
     '''
     
     file_ext = "chrint"
+    track_type = "DiagonalHeatmapTrack"
+    data_sources = { "data": "tabix", "index": "bigwig" }
     
     column_names = [ 'Chrom1', 'Start1', 'End1', 'Chrom2', 'Start2', 'End2', 'Value' ]
     
@@ -1328,11 +1324,6 @@
     
     def sniff( self, filename ):
         return False
-        
-    def get_track_type( self ):
-        return "DiagonalHeatmapTrack", {"data": "tabix", "index": "summary_tree"}
-    
-    
 
 if __name__ == '__main__':
     import doctest, sys

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py
+++ b/lib/galaxy/datatypes/metadata.py
@@ -1,22 +1,33 @@
-import sys, logging, copy, shutil, weakref, cPickle, tempfile, os
+"""
+Galaxy Metadata
+
+"""
+from galaxy import eggs
+eggs.require("simplejson")
+
+
+import copy
+import cPickle
+import logging
+import os
+import shutil
+import simplejson
+import sys
+import tempfile
+import weakref
+
 from os.path import abspath
 
-from galaxy.util import string_as_bool, stringify_dictionary_keys, listify
+import galaxy.model
+from galaxy.util import listify, stringify_dictionary_keys, string_as_bool
 from galaxy.util.odict import odict
 from galaxy.web import form_builder
-import galaxy.model
 from sqlalchemy.orm import object_session
 
-import pkg_resources
-pkg_resources.require("simplejson")
-import simplejson
-
-log = logging.getLogger( __name__ )
+log = logging.getLogger(__name__)
 
 STATEMENTS = "__galaxy_statements__" #this is the name of the property in a Datatype class where new metadata spec element Statements are stored
 
-DATABASE_CONNECTION_AVAILABLE = True #When False, certain metadata parameter types (see FileParameter) will behave differently
-
 class Statement( object ):
     """
     This class inserts its target into a list in the surrounding
@@ -74,8 +85,8 @@
     def __getattr__( self, name ):
         if name in self.spec:
             if name in self.parent._metadata:
-                return self.spec[name].wrap( self.parent._metadata[name] )
-            return self.spec[name].wrap( self.spec[name].default )
+                return self.spec[name].wrap( self.parent._metadata[name], object_session( self.parent ) )
+            return self.spec[name].wrap( self.spec[name].default, object_session( self.parent ) )
         if name in self.parent._metadata:
             return self.parent._metadata[name]
     def __setattr__( self, name, value ):
@@ -202,7 +213,7 @@
         self.validate( value )
         return value
     
-    def wrap( self, value ):
+    def wrap( self, value, session ):
         """
         Turns a value into its usable form.
         """
@@ -245,11 +256,11 @@
     def get( self, name, default=None ):
         return self.__dict__.get(name, default)
 
-    def wrap( self, value ):
+    def wrap( self, value, session ):
         """
         Turns a stored value into its usable form.
         """
-        return self.param.wrap( value )
+        return self.param.wrap( value, session )
 
     def unwrap( self, value ):
         """
@@ -312,7 +323,7 @@
             return ", ".join( map( str, value ) )
         return MetadataParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd )
 
-    def wrap( self, value ):
+    def wrap( self, value, session ):
         value = self.marshal( value ) #do we really need this (wasteful)? - yes because we are not sure that all existing selects have been stored previously as lists. Also this will handle the case where defaults/no_values are specified and are single non-list values.
         if self.multiple:
             return value
@@ -424,26 +435,16 @@
     def get_html( self, value=None, context={}, other_values={}, **kwd ):
         return "<div>No display available for Metadata Files</div>"
 
-    def wrap( self, value ):
+    def wrap( self, value, session ):
         if value is None:
             return None
         if isinstance( value, galaxy.model.MetadataFile ) or isinstance( value, MetadataTempFile ):
             return value
-        if DATABASE_CONNECTION_AVAILABLE:
-            try:
-                # FIXME: this query requires a monkey patch in assignmapper.py since
-                # MetadataParameters do not have a handle to the sqlalchemy session
-                return galaxy.model.MetadataFile.get( value )
-            except:
-                #value was not a valid id
-                return None
-        else:
-            mf = galaxy.model.MetadataFile()
-            mf.id = value #we assume this is a valid id, since we cannot check it
-            return mf
+        mf = session.query( galaxy.model.MetadataFile ).get( value )
+        return mf
     
     def make_copy( self, value, target_context, source_context ):
-        value = self.wrap( value )
+        value = self.wrap( value, object_session( target_context.parent ) )
         if value:
             new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name )
             object_session( target_context.parent ).add( new_value )
@@ -485,13 +486,13 @@
         return value
     
     def new_file( self, dataset = None, **kwds ):
-        if DATABASE_CONNECTION_AVAILABLE:
+        if object_session( dataset ):
             mf = galaxy.model.MetadataFile( name = self.spec.name, dataset = dataset, **kwds )
             object_session( dataset ).add( mf )
             object_session( dataset ).flush() #flush to assign id
             return mf
         else:
-            #we need to make a tmp file that is accessable to the head node, 
+            #we need to make a tmp file that is accessable to the head node,
             #we will be copying its contents into the MetadataFile objects filename after restoring from JSON
             #we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us
             return MetadataTempFile( **kwds )

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/registry.py
--- a/lib/galaxy/datatypes/registry.py
+++ b/lib/galaxy/datatypes/registry.py
@@ -163,7 +163,7 @@
                             # Use default mime type as per datatype spec
                             mimetype = self.datatypes_by_extension[ extension ].get_mime()
                         self.mimetypes_by_extension[ extension ] = mimetype
-                        if hasattr( datatype_class, "get_track_type" ):
+                        if datatype_class.track_type:
                             self.available_tracks.append( extension )
                         if display_in_upload:
                             self.upload_file_formats.append( extension )
@@ -379,6 +379,36 @@
                 if not included:
                     self.sniff_order.append(datatype)
         append_to_sniff_order()
+        
+    def get_datatype_class_by_name( self, name ):
+        """
+        Return the datatype class where the datatype's `type` attribute
+        (as defined in the datatype_conf.xml file) contains `name`.
+        """
+        #TODO: too roundabout - would be better to generate this once as a map and store in this object
+        found_class = None
+        for ext, datatype_obj in self.datatypes_by_extension.items():
+            datatype_obj_class = datatype_obj.__class__
+            datatype_obj_class_str = str( datatype_obj_class )
+            #print datatype_obj_class_str
+            if name in datatype_obj_class_str:
+                return datatype_obj_class
+        return None
+        # these seem to be connected to the dynamic classes being generated in this file, lines 157-158
+        #   they appear when a one of the three are used in inheritance with subclass="True"
+        #TODO: a possible solution is to def a fn in datatypes __init__ for creating the dynamic classes
+
+        #remap = {
+        #    'galaxy.datatypes.registry.Tabular'   : galaxy.datatypes.tabular.Tabular,
+        #    'galaxy.datatypes.registry.Text'      : galaxy.datatypes.data.Text,
+        #    'galaxy.datatypes.registry.Binary'    : galaxy.datatypes.binary.Binary
+        #}
+        #datatype_str = str( datatype )
+        #if datatype_str in remap:
+        #    datatype = remap[ datatype_str ]
+        #
+        #return datatype
+
     def get_available_tracks(self):
         return self.available_tracks
     def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ):
@@ -397,7 +427,7 @@
         except KeyError:
             builder = data.Text()
         return builder
-    def change_datatype(self, data, ext, set_meta = True ):
+    def change_datatype(self, data, ext):
         data.extension = ext
         # call init_meta and copy metadata from itself.  The datatype
         # being converted *to* will handle any metadata copying and
@@ -405,10 +435,6 @@
         if data.has_data():
             data.set_size()
             data.init_meta( copy_from=data )
-            if set_meta:
-                #metadata is being set internally
-                data.set_meta( overwrite = False )
-                data.set_peek()
         return data
     def old_change_datatype(self, data, ext):
         """Creates and returns a new datatype based on an existing data and an extension"""

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/tabular.py
--- a/lib/galaxy/datatypes/tabular.py
+++ b/lib/galaxy/datatypes/tabular.py
@@ -265,7 +265,7 @@
         while cursor and ck_data[-1] != '\n':
             ck_data += cursor
             cursor = f.read(1)
-        return to_json_string({'ck_data': ck_data, 'ck_index': ck_index+1})
+        return to_json_string( { 'ck_data': util.unicodify( ck_data ), 'ck_index': ck_index + 1 } )
 
     def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, chunk=None):
         preview = util.string_as_bool( preview )
@@ -328,7 +328,6 @@
         """
         Returns a list of visualizations for datatype.
         """
-
         # Can visualize tabular data as scatterplot if there are 2+ numerical
         # columns.
         num_numerical_cols = 0
@@ -358,6 +357,9 @@
 
 class Sam( Tabular ):
     file_ext = 'sam'
+    track_type = "ReadTrack"
+    data_sources = { "data": "bam", "index": "bigwig" }
+
     def __init__(self, **kwd):
         """Initialize taxonomy datatype"""
         Tabular.__init__( self, **kwd )
@@ -467,17 +469,16 @@
             raise Exception('Result %s from %s' % (result, cmd))
     merge = staticmethod(merge)
 
-    def get_track_type( self ):
-        return "ReadTrack", {"data": "bam", "index": "summary_tree"}
-
 class Pileup( Tabular ):
     """Tab delimited data in pileup (6- or 10-column) format"""
     file_ext = "pileup"
     line_class = "genomic coordinate"
+    data_sources = { "data": "tabix" }
 
     """Add metadata elements"""
     MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter )
     MetadataElement( name="startCol", default=2, desc="Start column", param=metadata.ColumnParameter )
+    MetadataElement( name="endCol", default=2, desc="End column", param=metadata.ColumnParameter )
     MetadataElement( name="baseCol", default=3, desc="Reference base column", param=metadata.ColumnParameter )
 
     def init_meta( self, dataset, copy_from=None ):
@@ -525,8 +526,7 @@
             return True
         except:
             return False
-
-
+            
 class ElandMulti( Tabular ):
     file_ext = 'elandmulti'
 
@@ -535,23 +535,39 @@
 
 class Vcf( Tabular ):
     """ Variant Call Format for describing SNPs and other simple genome variations. """
+    track_type = "VariantTrack"
+    data_sources = { "data": "tabix", "index": "bigwig" }
 
     file_ext = 'vcf'
     column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ]
 
     MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False )
     MetadataElement( name="column_types", default=['str','int','str','str','str','int','str','list','str','str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False )
-    MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, multiple=True )
+    MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, multiple=True, visible=False )
+    MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=False, optional=True, no_value=[] )
 
     def sniff( self, filename ):
         headers = get_headers( filename, '\n', count=1 )
         return headers[0][0].startswith("##fileformat=VCF")
+
     def display_peek( self, dataset ):
         """Returns formated html of peek"""
         return Tabular.make_html_table( self, dataset, column_names=self.column_names )
 
-    def get_track_type( self ):
-        return "VcfTrack", {"data": "tabix", "index": "summary_tree"}
+    def set_meta( self, dataset, **kwd ):
+        Tabular.set_meta( self, dataset, **kwd )
+        source = open( dataset.file_name )
+
+        # Skip comments.
+        line = None
+        for line in source:
+            if not line.startswith( '##' ):
+                break
+
+        if line and line.startswith( '#' ):
+            # Found header line, get sample names.
+            dataset.metadata.sample_names = line.split()[ 9: ]
+
 
 class Eland( Tabular ):
     """Support for the export.txt.gz file used by Illumina's ELANDv2e aligner"""

diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/eggs/__init__.py
--- a/lib/galaxy/eggs/__init__.py
+++ b/lib/galaxy/eggs/__init__.py
@@ -387,7 +387,6 @@
                          "guppy":           lambda: self.config.get( "app:main", "use_memdump" ),
                          "python_openid":   lambda: self.config.get( "app:main", "enable_openid" ),
                          "python_daemon":   lambda: sys.version_info[:2] >= ( 2, 5 ),
-                         "ctypes":          lambda: ( "drmaa" in self.config.get( "app:main", "start_job_runners" ).split(",") ) and sys.version_info[:2] == ( 2, 4 ),
                          "pysam":           lambda: check_pysam()
                        }.get( egg_name, lambda: True )()
             except:

This diff is so big that we needed to truncate the remainder.

https://bitbucket.org/galaxy/galaxy-central/commits/575dedd2d0ba/
Changeset:   575dedd2d0ba
Branch:      search
User:        kellrott
Date:        2013-06-07 19:04:42
Summary:     Fixing the tag searching for histories and history datasets
Affected #:  2 files

diff -r d969c260417179cf48dc0bc7a4ea233224064264 -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 lib/galaxy/model/search.py
--- a/lib/galaxy/model/search.py
+++ b/lib/galaxy/model/search.py
@@ -32,7 +32,9 @@
 import parsley
 
 from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset
-from galaxy.model import StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation
+from galaxy.model import (StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation,
+HistoryDatasetAssociationTagAssociation,
+ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation)
 from galaxy.model import ToolVersion
 
 from sqlalchemy import and_
@@ -269,12 +271,30 @@
 #History Dataset Searching
 ##################
 
+def history_dataset_handle_tag(view, left, operator, right):
+    if operator == "=":
+        view.do_query = True
+        #aliasing the tag association table, so multiple links to different tags can be formed during a single query
+        tag_table = aliased(HistoryDatasetAssociationTagAssociation)
+
+        view.query = view.query.filter(
+           HistoryDatasetAssociation.id == tag_table.history_dataset_association_id
+        )
+        tmp = right.split(":")
+        view.query = view.query.filter( tag_table.user_tname == tmp[0] )
+        if len(tmp) > 1:
+            view.query = view.query.filter( tag_table.user_value == tmp[1] )
+    else:
+        raise GalaxyParseError("Invalid comparison operator: %s" % (operator))
+
 
 class HistoryDatasetView(ViewQueryBaseClass):
     DOMAIN = "history_dataset"
     FIELDS = {
         'name' : ViewField('name', sqlalchemy_field=HistoryDatasetAssociation.name),
-        'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True)
+        'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True),
+        'tag' : ViewField("tag", handler=history_dataset_handle_tag)
+
     }
 
     def search(self, trans):
@@ -289,13 +309,14 @@
 def history_handle_tag(view, left, operator, right):
     if operator == "=":
         view.do_query = True
+        tag_table = aliased(HistoryTagAssociation)
         view.query = view.query.filter(
-           History.id == HistoryTagAssociation.history_id
+           History.id == tag_table.history_id
         )
         tmp = right.split(":")
-        view.query = view.query.filter( HistoryTagAssociation.user_tname == tmp[0] )
+        view.query = view.query.filter( tag_table.user_tname == tmp[0] )
         if len(tmp) > 1:
-            view.query = view.query.filter( HistoryTagAssociation.user_value == tmp[1] )
+            view.query = view.query.filter( tag_table.user_value == tmp[1] )
     else:
         raise GalaxyParseError("Invalid comparison operator: %s" % (operator))
 

diff -r d969c260417179cf48dc0bc7a4ea233224064264 -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 lib/galaxy/webapps/galaxy/api/search.py
--- a/lib/galaxy/webapps/galaxy/api/search.py
+++ b/lib/galaxy/webapps/galaxy/api/search.py
@@ -30,7 +30,6 @@
                 current_user_roles = trans.get_current_user_roles()
                 try:
                     results = query.process(trans)
-                    print results
                 except Exception, e:
                     return {'error' : str(e)}
                 for item in results:


https://bitbucket.org/galaxy/galaxy-central/commits/a5b83353f9ee/
Changeset:   a5b83353f9ee
Branch:      search
User:        Kyle Ellrott
Date:        2013-06-17 19:59:32
Summary:     galaxy-central merge
Affected #:  66 files

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db job_conf.xml.sample_advanced
--- a/job_conf.xml.sample_advanced
+++ b/job_conf.xml.sample_advanced
@@ -54,7 +54,15 @@
             <param id="shell_hostname">foo.example.org</param><param id="Job_Execution_Time">24:00:00</param></destination>
-        <destination id="condor" runner="condor"/>
+        <destination id="condor" runner="condor">
+            <!-- With no params, jobs are submitted to the 'vanilla' universe with:
+                    notification = NEVER
+                    getenv = true
+                 Additional/override query ClassAd params can be specified with
+                 <param> tags.
+            -->
+            <param id="request_cpus">8</param>
+        </destination></destinations><tools><!-- Tools can be configured to use specific destinations or handlers,

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/config.py
--- a/lib/galaxy/config.py
+++ b/lib/galaxy/config.py
@@ -64,6 +64,9 @@
             tcf = kwargs[ 'tool_config_files' ]
         else:
             tcf = 'tool_conf.xml'
+        self.tool_filters = listify( kwargs.get( "tool_filters", [] ) )
+        self.tool_label_filters = listify( kwargs.get( "tool_label_filters", [] ) )
+        self.tool_section_filters = listify( kwargs.get( "tool_section_filters", [] ) )
         self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ]
         self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None )
         if self.shed_tool_data_path:
@@ -73,17 +76,21 @@
         self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root )
         self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root )
         self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) )
+        self.hours_between_check = kwargs.get( 'hours_between_check', 12 )
         try:
-            self.hours_between_check = kwargs.get( 'hours_between_check', 12 )
-            if isinstance( self.hours_between_check, float ):
+            hbc_test = int( self.hours_between_check )
+            self.hours_between_check = hbc_test
+            if self.hours_between_check < 1 or self.hours_between_check > 24:
+                self.hours_between_check = 12
+        except:
+            try:
                 # Float values are supported for functional tests.
+                hbc_test = float( self.hours_between_check )
+                self.hours_between_check = hbc_test
                 if self.hours_between_check < 0.001 or self.hours_between_check > 24.0:
                     self.hours_between_check = 12.0
-            else:
-                if self.hours_between_check < 1 or self.hours_between_check > 24:
-                    self.hours_between_check = 12
-        except:
-            self.hours_between_check = 12
+            except:
+                self.hours_between_check = 12
         self.update_integrated_tool_panel = kwargs.get( "update_integrated_tool_panel", True )
         self.enable_data_manager_user_view = string_as_bool( kwargs.get( "enable_data_manager_user_view", "False" ) )
         self.data_manager_config_file = resolve_path( kwargs.get('data_manager_config_file', 'data_manager_conf.xml' ), self.root )
@@ -159,6 +166,7 @@
         self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",")
         self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",")
         self.brand = kwargs.get( 'brand', None )
+        self.welcome_url = kwargs.get( 'welcome_url', '/static/welcome.html' )
         # Configuration for the message box directly below the masthead.
         self.message_box_visible = kwargs.get( 'message_box_visible', False )
         self.message_box_content = kwargs.get( 'message_box_content', None )

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.py
+++ b/lib/galaxy/datatypes/binary.py
@@ -22,6 +22,7 @@
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
+import dataproviders
 
 log = logging.getLogger(__name__)
 
@@ -74,6 +75,7 @@
         trans.response.headers["Content-Disposition"] = 'attachment; filename="Galaxy%s-[%s].%s"' % (dataset.hid, fname, to_ext)
         return open( dataset.file_name )
 
+
 class Ab1( Binary ):
     """Class describing an ab1 binary sequence file"""
     file_ext = "ab1"
@@ -93,12 +95,15 @@
 
 Binary.register_unsniffable_binary_ext("ab1")
 
+
 class GenericAsn1Binary( Binary ):
     """Class for generic ASN.1 binary format"""
     file_ext = "asn1-binary"
 
 Binary.register_unsniffable_binary_ext("asn1-binary")
 
+
+@dataproviders.decorators.has_dataproviders
 class Bam( Binary ):
     """Class describing a BAM binary file"""
     file_ext = "bam"
@@ -255,9 +260,92 @@
             return dataset.peek
         except:
             return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+    # ------------- Dataproviders
+    # pipe through samtools view
+    #ALSO: (as Sam)
+    # bam does not use '#' to indicate comments/headers - we need to strip out those headers from the std. providers
+    #TODO:?? seems like there should be an easier way to do/inherit this - metadata.comment_char?
+    #TODO: incorporate samtools options to control output: regions first, then flags, etc.
+    @dataproviders.decorators.dataprovider_factory( 'line' )
+    def line_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.line.FilteredLineDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'regex-line' )
+    def regex_line_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.line.RegexLineDataProvider( samtools_source, **settings )
     
+    @dataproviders.decorators.dataprovider_factory( 'column' )
+    def column_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.column.ColumnarDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'map' )
+    def map_dataprovider( self, dataset, **settings ):
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        settings[ 'comment_char' ] = '@'
+        return dataproviders.column.MapDataProvider( samtools_source, **settings )
+
+    # these can't be used directly - may need BamColumn, BamMap (Bam metadata -> column/map)
+    # OR - see genomic_region_dataprovider
+    #@dataproviders.decorators.dataprovider_factory( 'dataset-column' )
+    #def dataset_column_dataprovider( self, dataset, **settings ):
+    #    settings[ 'comment_char' ] = '@'
+    #    return super( Sam, self ).dataset_column_dataprovider( dataset, **settings )
+
+    #@dataproviders.decorators.dataprovider_factory( 'dataset-map' )
+    #def dataset_map_dataprovider( self, dataset, **settings ):
+    #    settings[ 'comment_char' ] = '@'
+    #    return super( Sam, self ).dataset_map_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'header' )
+    def header_dataprovider( self, dataset, **settings ):
+        # in this case we can use an option of samtools view to provide just what we need (w/o regex)
+        samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset, '-H' )
+        return dataproviders.line.RegexLineDataProvider( samtools_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'id-seq-qual' )
+    def id_seq_qual_dataprovider( self, dataset, **settings ):
+        settings[ 'indeces' ] = [ 0, 9, 10 ]
+        settings[ 'column_types' ] = [ 'str', 'str', 'str' ]
+        settings[ 'column_names' ] = [ 'id', 'seq', 'qual' ]
+        return self.map_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'genomic-region' )
+    def genomic_region_dataprovider( self, dataset, **settings ):
+        # GenomicRegionDataProvider currently requires a dataset as source - may not be necc.
+        #TODO:?? consider (at least) the possible use of a kwarg: metadata_source (def. to source.dataset),
+        #   or remove altogether...
+        #samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset )
+        #return dataproviders.dataset.GenomicRegionDataProvider( samtools_source, metadata_source=dataset,
+        #                                                        2, 3, 3, **settings )
+
+        # instead, set manually and use in-class column gen
+        settings[ 'indeces' ] = [ 2, 3, 3 ]
+        settings[ 'column_types' ] = [ 'str', 'int', 'int' ]
+        return self.column_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'genomic-region-map' )
+    def genomic_region_map_dataprovider( self, dataset, **settings ):
+        settings[ 'indeces' ] = [ 2, 3, 3 ]
+        settings[ 'column_types' ] = [ 'str', 'int', 'int' ]
+        settings[ 'column_names' ] = [ 'chrom', 'start', 'end' ]
+        return self.map_dataprovider( dataset, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'samtools' )
+    def samtools_dataprovider( self, dataset, **settings ):
+        """Generic samtools interface - all options available through settings."""
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.dataset.SamtoolsDataProvider( dataset_source, **settings )
+
 Binary.register_sniffable_binary_format("bam", "bam", Bam)
 
+
 class H5( Binary ):
     """Class describing an HDF5 file"""
     file_ext = "h5"
@@ -277,6 +365,7 @@
 
 Binary.register_unsniffable_binary_ext("h5")
 
+
 class Scf( Binary ):
     """Class describing an scf binary sequence file"""
     file_ext = "scf"
@@ -296,6 +385,7 @@
 
 Binary.register_unsniffable_binary_ext("scf")
 
+
 class Sff( Binary ):
     """ Standard Flowgram Format (SFF) """
     file_ext = "sff"
@@ -327,6 +417,7 @@
 
 Binary.register_sniffable_binary_format("sff", "sff", Sff)
 
+
 class BigWig(Binary):
     """
     Accessing binary BigWig files from UCSC.
@@ -363,6 +454,7 @@
     
 Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig)
 
+
 class BigBed(BigWig):
     """BigBed support from UCSC."""
 
@@ -375,6 +467,7 @@
 
 Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed)
 
+
 class TwoBit (Binary):
     """Class describing a TwoBit format nucleotide file"""
     
@@ -399,3 +492,5 @@
             return dataset.peek
         except:
             return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size()))
+
+Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit)

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -14,6 +14,8 @@
 from galaxy.util.odict import odict
 from galaxy.util.sanitize_html import sanitize_html
 
+import dataproviders
+
 from galaxy import eggs
 eggs.require( "Paste" )
 import paste
@@ -56,6 +58,7 @@
                 cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls
         metadata.Statement.process( cls )
 
+@dataproviders.decorators.has_dataproviders
 class Data( object ):
     """
     Base class for all datatypes.  Implements basic interfaces as well
@@ -545,7 +548,13 @@
     def has_resolution(self):
         return False
 
-
+    def matches_any( self, target_datatypes ):
+        """
+        Check if this datatype is of any of the target_datatypes or is
+        a subtype thereof.
+        """
+        datatype_classes = tuple( [ datatype.__class__ for datatype in target_datatypes ] )
+        return isinstance( self, datatype_classes )
     def merge( split_files, output_file):
         """
             Merge files with copy.copyfileobj() will not hit the
@@ -572,6 +581,40 @@
             return [ 'trackster', 'circster' ]
         return []
 
+    # ------------- Dataproviders
+    def has_dataprovider( self, data_format ):
+        """
+        Returns True if `data_format` is available in `dataproviders`.
+        """
+        return ( data_format in self.dataproviders )
+
+    def dataprovider( self, dataset, data_format, **settings ):
+        """
+        Base dataprovider factory for all datatypes that returns the proper provider
+        for the given `data_format` or raises a `NoProviderAvailable`.
+        """
+        #TODO:?? is this handling super class providers?
+        if self.has_dataprovider( data_format ):
+            return self.dataproviders[ data_format ]( self, dataset, **settings )
+        raise dataproviders.exceptions.NoProviderAvailable( self, data_format )
+
+    @dataproviders.decorators.dataprovider_factory( 'base' )
+    def base_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.base.DataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'chunk' )
+    def chunk_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.chunk.ChunkDataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'chunk64' )
+    def chunk64_dataprovider( self, dataset, **settings ):
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.chunk.Base64ChunkDataProvider( dataset_source, **settings )
+
+
+@dataproviders.decorators.has_dataproviders
 class Text( Data ):
     file_ext = 'txt'
     line_class = 'line'
@@ -741,10 +784,31 @@
         f.close()
     split = classmethod(split)
 
+    # ------------- Dataproviders
+    @dataproviders.decorators.dataprovider_factory( 'line' )
+    def line_dataprovider( self, dataset, **settings ):
+        """
+        Returns an iterator over the dataset's lines (that have been `strip`ed)
+        optionally excluding blank lines and lines that start with a comment character.
+        """
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.line.FilteredLineDataProvider( dataset_source, **settings )
+
+    @dataproviders.decorators.dataprovider_factory( 'regex-line' )
+    def regex_line_dataprovider( self, dataset, **settings ):
+        """
+        Returns an iterator over the dataset's lines
+        optionally including/excluding lines that match one or more regex filters.
+        """
+        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
+        return dataproviders.line.RegexLineDataProvider( dataset_source, **settings )
+
+
 class GenericAsn1( Text ):
     """Class for generic ASN.1 text format"""
     file_ext = 'asn1'
 
+
 class LineCount( Text ):
     """
     Dataset contains a single line with a single integer that denotes the
@@ -752,6 +816,7 @@
     """
     pass
 
+
 class Newick( Text ):
     """New Hampshire/Newick Format"""
     file_ext = "nhx"

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/__init__.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/__init__.py
@@ -0,0 +1,28 @@
+
+#TODO: ---- This is a work in progress ----
+"""
+Dataproviders are iterators with context managers that provide data to some
+consumer datum by datum.
+
+As well as subclassing and overriding to get the proper data, Dataproviders
+can be piped from one to the other.
+..example::
+
+.. note:: be careful to NOT pipe providers into subclasses of those providers.
+    Subclasses provide all the functionality of their superclasses,
+    so there's generally no need.
+
+.. note:: be careful to when using piped providers that accept the same keywords
+    in their __init__ functions (such as limit or offset) to pass those
+    keywords to the proper (often final) provider. These errors that result
+    can be hard to diagnose.
+"""
+import decorators
+import exceptions
+
+import base
+import chunk
+import line
+import column
+import external
+import dataset

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/base.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/base.py
@@ -0,0 +1,260 @@
+"""
+Base class(es) for all DataProviders.
+"""
+# there's a blurry line between functionality here and functionality in datatypes module
+# attempting to keep parsing to a minimum here and focus on chopping/pagination/reformat(/filtering-maybe?)
+#   and using as much pre-computed info/metadata from the datatypes module as possible
+# also, this shouldn't be a replacement/re-implementation of the tool layer
+#   (which provides traceability/versioning/reproducibility)
+
+from collections import deque
+import exceptions
+
+_TODO = """
+hooks into datatypes (define providers inside datatype modules) as factories
+capture tell() when provider is done
+    def stop( self ): self.endpoint = source.tell(); raise StopIteration()
+implement __len__ sensibly where it can be (would be good to have where we're giving some progress - '100 of 300')
+    seems like sniffed files would have this info
+unit tests
+add datum entry/exit point methods: possibly decode, encode
+    or create a class that pipes source through - how would decode work then?
+
+icorporate existing visualization/dataproviders
+some of the sources (esp. in datasets) don't need to be re-created
+
+YAGNI: InterleavingMultiSourceDataProvider, CombiningMultiSourceDataProvider
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- base classes
+class DataProvider( object ):
+    """
+    Base class for all data providers. Data providers:
+        (a) have a source (which must be another file-like object)
+        (b) implement both the iterator and context manager interfaces
+        (c) do not allow write methods
+            (but otherwise implement the other file object interface methods)
+    """
+    def __init__( self, source, **kwargs ):
+        """
+        :param source: the source that this iterator will loop over.
+            (Should implement the iterable interface and ideally have the
+            context manager interface as well)
+        """
+        self.source = self.validate_source( source )
+
+    def validate_source( self, source ):
+        """
+        Is this a valid source for this provider?
+
+        :raises InvalidDataProviderSource: if the source is considered invalid.
+
+        Meant to be overridden in subclasses.
+        """
+        if not source or not hasattr( source, '__iter__' ):
+            # that's by no means a thorough check
+            raise exceptions.InvalidDataProviderSource( source )
+        return source
+
+    #TODO: (this might cause problems later...)
+    #TODO: some providers (such as chunk's seek and read) rely on this... remove
+    def __getattr__( self, name ):
+        if name == 'source':
+            # if we're inside this fn, source hasn't been set - provide some safety just for this attr
+            return None
+        # otherwise, try to get the attr from the source - allows us to get things like provider.encoding, etc.
+        if hasattr( self.source, name ):
+            return getattr( self.source, name )
+        # raise the proper error
+        return self.__getattribute__( name )
+
+    # write methods should not be allowed
+    def truncate( self, size ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+    def write( self, string ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+    def writelines( self, sequence ):
+        raise NotImplementedError( 'Write methods are purposely disabled' )
+
+    #TODO: route read methods through next?
+    #def readline( self ):
+    #    return self.next()
+    def readlines( self ):
+        return [ line for line in self ]
+
+    # iterator interface
+    def __iter__( self ):
+        # it's generators all the way up, Timmy
+        with self as source:
+            for datum in self.source:
+                yield datum
+    def next( self ):
+        return self.source.next()
+
+    # context manager interface
+    def __enter__( self ):
+        # make the source's context manager interface optional
+        if hasattr( self.source, '__enter__' ):
+            self.source.__enter__()
+        return self
+    def __exit__( self, *args ):
+        # make the source's context manager interface optional, call on source if there
+        if hasattr( self.source, '__exit__' ):
+            self.source.__exit__( *args )
+        # alternately, call close()
+        elif hasattr( self.source, 'close' ):
+            self.source.close()
+
+    def __str__( self ):
+        """
+        String representation for easier debugging.
+
+        Will call `__str__` on it's source so this will display piped dataproviders.
+        """
+        # we need to protect against recursion (in __getattr__) if self.source hasn't been set
+        source_str = str( self.source ) if hasattr( self, 'source' ) else ''
+        return '%s(%s)' %( self.__class__.__name__, str( source_str ) )
+
+
+class FilteredDataProvider( DataProvider ):
+    """
+    Passes each datum through a filter function and yields it if that function
+    returns a non-`None` value.
+
+    Also maintains counters:
+        - `num_data_read`: how many data have been consumed from the source.
+        - `num_valid_data_read`: how many data have been returned from `filter`.
+        - `num_data_returned`: how many data has this provider yielded.
+    """
+    def __init__( self, source, filter_fn=None, **kwargs ):
+        """
+        :param filter_fn: a lambda or function that will be passed a datum and
+            return either the (optionally modified) datum or None.
+        """
+        super( FilteredDataProvider, self ).__init__( source, **kwargs )
+        self.filter_fn = filter_fn
+        # count how many data we got from the source
+        self.num_data_read = 0
+        # how many valid data have we gotten from the source
+        #   IOW, data that's passed the filter and been either provided OR have been skipped due to offset
+        self.num_valid_data_read = 0
+        # how many lines have been provided/output
+        self.num_data_returned = 0
+
+    def __iter__( self ):
+        parent_gen = super( FilteredDataProvider, self ).__iter__()
+        for datum in parent_gen:
+            self.num_data_read += 1
+            datum = self.filter( datum )
+            if datum != None:
+                self.num_valid_data_read += 1
+                self.num_data_returned += 1
+                yield datum
+
+    #TODO: may want to squash this into DataProvider
+    def filter( self, datum ):
+        """
+        When given a datum from the provider's source, return None if the datum
+        'does not pass' the filter or is invalid. Return the datum if it's valid.
+
+        :param datum: the datum to check for validity.
+        :returns: the datum, a modified datum, or None
+
+        Meant to be overridden.
+        """
+        if self.filter_fn:
+            return self.filter_fn( datum )
+        # also can be overriden entirely
+        return datum
+
+
+class LimitedOffsetDataProvider( FilteredDataProvider ):
+    """
+    A provider that uses the counters from FilteredDataProvider to limit the
+    number of data and/or skip `offset` number of data before providing.
+
+    Useful for grabbing sections from a source (e.g. pagination).
+    """
+    #TODO: may want to squash this into DataProvider
+    def __init__( self, source, offset=0, limit=None, **kwargs ):
+        """
+        :param offset:  the number of data to skip before providing.
+        :param limit:   the final number of data to provide.
+        """
+        super( LimitedOffsetDataProvider, self ).__init__( source, **kwargs )
+
+        # how many valid data to skip before we start outputing data - must be positive
+        #   (diff to support neg. indeces - must be pos.)
+        self.offset = max( offset, 0 )
+
+        # how many valid data to return - must be positive (None indicates no limit)
+        self.limit = limit
+        if self.limit != None:
+            self.limit = max( self.limit, 0 )
+
+    def __iter__( self ):
+        """
+        Iterate over the source until `num_valid_data_read` is greater than
+        `offset`, begin providing datat, and stop when `num_data_returned`
+        is greater than `offset`.
+        """
+        parent_gen = super( LimitedOffsetDataProvider, self ).__iter__()
+        for datum in parent_gen:
+
+            if self.limit != None and self.num_data_returned > self.limit:
+                break
+
+            if self.num_valid_data_read > self.offset:
+                yield datum
+            else:
+                # wot a cheezy way of doing this...
+                self.num_data_returned -= 1
+
+    #TODO: skipping lines is inefficient - somehow cache file position/line_num pair and allow provider
+    #   to seek to a pos/line and then begin providing lines
+    # the important catch here is that we need to have accurate pos/line pairs
+    #   in order to preserve the functionality of limit and offset
+    #if file_seek and len( file_seek ) == 2:
+    #    seek_pos, new_line_num = file_seek
+    #    self.seek_and_set_curr_line( seek_pos, new_line_num )
+
+    #def seek_and_set_curr_line( self, file_seek, new_curr_line_num ):
+    #    self.seek( file_seek, os.SEEK_SET )
+    #    self.curr_line_num = new_curr_line_num
+
+
+class MultiSourceDataProvider( DataProvider ):
+    """
+    A provider that iterates over a list of given sources and provides data
+    from one after another.
+
+    An iterator over iterators.
+    """
+    def __init__( self, source_list, **kwargs ):
+        """
+        :param source_list: an iterator of iterables
+        """
+        self.source_list = deque( source_list )
+
+    def __iter__( self ):
+        """
+        Iterate over the source_list, then iterate over the data in each source.
+
+        Skip a given source in `source_list` if it is `None` or invalid.
+        """
+        for source in self.source_list:
+            # just skip falsy sources
+            if not source:
+                continue
+            try:
+                self.source = self.validate_source( source )
+            except exceptions.InvalidDataProviderSource, invalid_source:
+                continue
+            
+            parent_gen = super( MultiSourceDataProvider, self ).__iter__()
+            for datum in parent_gen:
+                 yield datum

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/chunk.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/chunk.py
@@ -0,0 +1,80 @@
+"""
+Chunk (N number of bytes at M offset to a source's beginning) provider.
+
+Primarily for file sources but usable by any iterator that has both
+seek and read( N ).
+"""
+import os
+import base64
+
+import base
+import exceptions
+
+_TODO = """
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# -----------------------------------------------------------------------------
+class ChunkDataProvider( base.DataProvider ):
+    """
+    Data provider that yields chunks of data from it's file.
+
+    Note: this version does not account for lines and works with Binary datatypes.
+    """
+    MAX_CHUNK_SIZE = 2**16
+    DEFAULT_CHUNK_SIZE = MAX_CHUNK_SIZE
+
+    #TODO: subclass from LimitedOffsetDataProvider?
+    # see web/framework/base.iterate_file, util/__init__.file_reader, and datatypes.tabular
+    def __init__( self, source, chunk_index=0, chunk_size=DEFAULT_CHUNK_SIZE, **kwargs ):
+        """
+        :param chunk_index: if a source can be divided into N number of
+            `chunk_size` sections, this is the index of which section to
+            return.
+        :param chunk_size:  how large are the desired chunks to return
+            (gen. in bytes).
+        """
+        super( ChunkDataProvider, self ).__init__( source, **kwargs )
+        self.chunk_size = chunk_size
+        self.chunk_pos = chunk_index * self.chunk_size
+
+    def validate_source( self, source ):
+        """
+        Does the given source have both the methods `seek` and `read`?
+        :raises InvalidDataProviderSource: if not.
+        """
+        source = super( ChunkDataProvider, self ).validate_source( source )
+        if( ( not hasattr( source, 'seek' ) )
+        or  ( not hasattr( source, 'read' ) ) ):
+            raise exceptions.InvalidDataProviderSource( source )
+        return source
+
+    def __iter__( self ):
+        # not reeeally an iterator per se
+        self.__enter__()
+        self.source.seek( self.chunk_pos, os.SEEK_SET )
+        chunk = self.encode( self.source.read( self.chunk_size ) )
+        yield chunk
+        self.__exit__()
+
+    def encode( self, chunk ):
+        """
+        Called on the chunk before returning.
+
+        Overrride to modify, encode, or decode chunks.
+        """
+        return chunk
+
+
+class Base64ChunkDataProvider( ChunkDataProvider ):
+    """
+    Data provider that yields chunks of base64 encoded data from it's file.
+    """
+    def encode( self, chunk ):
+        """
+        Return chunks encoded in base 64.
+        """
+        return base64.b64encode( chunk )

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/column.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/column.py
@@ -0,0 +1,242 @@
+"""
+Providers that provide lists of lists generally where each line of a source
+is further subdivided into multiple data (e.g. columns from a line).
+"""
+
+import line
+
+_TODO = """
+move ColumnarDataProvider parsers to more sensible location
+
+TransposedColumnarDataProvider: provides each column as a single array
+    - see existing visualizations/dataprovider/basic.ColumnDataProvider
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- base classes
+class ColumnarDataProvider( line.RegexLineDataProvider ):
+    """
+    Data provider that provide a list of columns from the lines of it's source.
+
+    Columns are returned in the order given in indeces, so this provider can
+    re-arrange columns.
+
+    If any desired index is outside the actual number of columns
+    in the source, this provider will None-pad the output and you are guaranteed
+    the same number of columns as the number of indeces asked for (even if they
+    are filled with None).
+    """
+    def __init__( self, source, indeces=None,
+            column_count=None, column_types=None, parsers=None, parse_columns=True,
+            deliminator='\t', **kwargs ):
+        """
+        :param indeces: a list of indeces of columns to gather from each row
+            Optional: will default to `None`.
+            If `None`, this provider will return all rows (even when a
+                particular row contains more/less than others).
+            If a row/line does not contain an element at a given index, the
+                provider will-return/fill-with a `None` value as the element.
+        :type indeces: list or None
+
+        :param column_count: an alternate means of defining indeces, use an int
+            here to effectively provide the first N columns.
+            Optional: will default to `None`.
+        :type column_count: int
+
+        :param column_types: a list of string names of types that the
+            provider will use to look up an appropriate parser for the column.
+            (e.g. 'int', 'float', 'str', 'bool')
+            Optional: will default to parsing all columns as strings.
+        :type column_types: list of strings
+
+        :param parsers: a dictionary keyed with column type strings
+            and with values that are functions to use when parsing those
+            types.
+            Optional: will default to using the function `_get_default_parsers`.
+        :type parsers: dictionary
+
+        :param parse_columns: attempt to parse columns?
+            Optional: defaults to `True`.
+        :type parse_columns: bool
+
+        :param deliminator: character(s) used to split each row/line of the source.
+            Optional: defaults to the tab character.
+        :type deliminator: str
+
+        .. note: that the subclass constructors are passed kwargs - so they're
+        params (limit, offset, etc.) are also applicable here.
+        """
+        #TODO: other columnar formats: csv, etc.
+        super( ColumnarDataProvider, self ).__init__( source, **kwargs )
+
+        #IMPLICIT: if no indeces, column_count, or column_types passed: return all columns
+        self.selected_column_indeces = indeces
+        self.column_count = column_count
+        self.column_types = column_types or []
+        # if no column count given, try to infer from indeces or column_types
+        if not self.column_count:
+            if   self.selected_column_indeces:
+                self.column_count = len( self.selected_column_indeces )
+            elif self.column_types:
+                self.column_count = len( self.column_types )
+        # if no indeces given, infer from column_count
+        if not self.selected_column_indeces and self.column_count:
+            self.selected_column_indeces = list( xrange( self.column_count ) )
+
+        self.deliminator = deliminator
+
+        # how/whether to parse each column value
+        self.parsers = {}
+        if parse_columns:
+            self.parsers = self._get_default_parsers()
+            # overwrite with user desired parsers
+            self.parsers.update( parsers or {} )
+
+    def _get_default_parsers( self ):
+        """
+        Return parser dictionary keyed for each columnar type
+        (as defined in datatypes).
+
+        .. note: primitives only by default (str, int, float, boolean, None).
+            Other (more complex) types are retrieved as strings.
+        :returns: a dictionary of the form:
+            `{ <parser type name> : <function used to parse type> }`
+        """
+        #TODO: move to module level (or datatypes, util)
+        return {
+            # str is default and not needed here
+            'int'   : int,
+            'float' : float,
+            'bool'  : bool,
+
+            # unfortunately, 'list' is used in dataset metadata both for
+            #   query style maps (9th col gff) AND comma-sep strings.
+            #   (disabled for now)
+            #'list'  : lambda v: v.split( ',' ),
+            #'csv'   : lambda v: v.split( ',' ),
+            ## i don't like how urlparses does sub-lists...
+            #'querystr' : lambda v: dict([ ( p.split( '=', 1 ) if '=' in p else ( p, True ) )
+            #                              for p in v.split( ';', 1 ) ])
+
+            #'scifloat': #floating point which may be in scientific notation
+
+            # always with the 1 base, biologists?
+            #'int1'  : ( lambda i: int( i ) - 1 ),
+
+            #'gffval': string or '.' for None
+            #'gffint': # int or '.' for None
+            #'gffphase': # 0, 1, 2, or '.' for None
+            #'gffstrand': # -, +, ?, or '.' for None, etc.
+        }
+
+    def _parse_value( self, val, type ):
+        """
+        Attempt to parse and return the given value based on the given type.
+
+        :param val: the column value to parse (often a string)
+        :param type: the string type 'name' used to find the appropriate parser
+        :returns: the parsed value
+            or `value` if no `type` found in `parsers`
+            or `None` if there was a parser error (ValueError)
+        """
+        if type == 'str' or type == None: return val
+        try:
+            return self.parsers[ type ]( val )
+        except KeyError, err:
+            # no parser - return as string
+            pass
+        except ValueError, err:
+            # bad value - return None
+            return None
+        return val
+
+    def _get_column_type( self, index ):
+        """
+        Get the column type for the parser from `self.column_types` or `None`
+        if the type is unavailable.
+        :param index: the column index
+        :returns: string name of type (e.g. 'float', 'int', etc.)
+        """
+        try:
+            return self.column_types[ index ]
+        except IndexError, ind_err:
+            return None
+
+    def _parse_column_at_index( self, columns, parser_index, index ):
+        """
+        Get the column type for the parser from `self.column_types` or `None`
+        if the type is unavailable.
+        """
+        try:
+            return self._parse_value( columns[ index ], self._get_column_type( parser_index ) )
+        # if a selected index is not within columns, return None
+        except IndexError, index_err:
+            return None
+
+    def _parse_columns_from_line( self, line ):
+        """
+        Returns a list of the desired, parsed columns.
+        :param line: the line to parse
+        :type line: str
+        """
+        #TODO: too much going on in this loop - the above should all be precomputed AMAP...
+        all_columns = line.split( self.deliminator )
+        # if no indeces were passed to init, return all columns
+        selected_indeces = self.selected_column_indeces or list( xrange( len( all_columns ) ) )
+        parsed_columns = []
+        for parser_index, column_index in enumerate( selected_indeces ):
+            parsed_columns.append( self._parse_column_at_index( all_columns, parser_index, column_index ) )
+        return parsed_columns
+
+    def __iter__( self ):
+        parent_gen = super( ColumnarDataProvider, self ).__iter__()
+        for line in parent_gen:
+            columns = self._parse_columns_from_line( line )
+            yield columns
+
+    #TODO: implement column filters here and not below - flatten hierarchy
+
+class FilteredByColumnDataProvider( ColumnarDataProvider ):
+    """
+    Data provider that provide a list of columns from the lines of it's source
+    _only_ if they pass a given filter function.
+
+    e.g. column #3 is type int and > N
+    """
+    # TODO: how to do this and still have limit and offset work?
+    def __init__( self, source, **kwargs ):
+        raise NotImplementedError()
+        super( FilteredByColumnDataProvider, self ).__init__( source, **kwargs )
+
+
+class MapDataProvider( ColumnarDataProvider ):
+    """
+    Data provider that column_names and columns from the source's contents
+    into a dictionary.
+
+    A combination use of both `column_names` and `indeces` allows 'picking'
+    key/value pairs from the source.
+
+    .. note: that the subclass constructors are passed kwargs - so they're
+    params (limit, offset, etc.) are also applicable here.
+    """
+    def __init__( self, source, column_names=None, **kwargs ):
+        """
+        :param column_names: an ordered list of strings that will be used as the keys
+            for each column in the returned dictionaries.
+            The number of key, value pairs each returned dictionary has will
+            be as short as the number of column names provided.
+        :type column_names:
+        """
+        #TODO: allow passing in a map instead of name->index { 'name1': index1, ... }
+        super( MapDataProvider, self ).__init__( source, **kwargs )
+        self.column_names = column_names or []
+
+    def __iter__( self ):
+        parent_gen = super( MapDataProvider, self ).__iter__()
+        for column_values in parent_gen:
+            map = dict( zip( self.column_names, column_values ) )
+            yield map

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/dataset.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -0,0 +1,671 @@
+"""
+Dataproviders that use either:
+    - the file contents and/or metadata from a Galaxy DatasetInstance as
+        their source.
+    - or provide data in some way relevant to bioinformatic data
+        (e.g. parsing genomic regions from their source)
+"""
+
+import pkg_resources
+pkg_resources.require( 'bx-python' )
+from bx import seq as bx_seq
+from bx import wiggle as bx_wig
+
+import galaxy.model
+import galaxy.datatypes
+import galaxy.datatypes.data
+
+#TODO: can't import these due to circular ref in model/registry
+#import galaxy.datatypes.binary
+#import galaxy.datatypes.tabular
+
+import exceptions
+import base
+import line
+import column
+import external
+
+_TODO = """
+use bx as much as possible
+the use of DatasetInstance seems to create some import problems
+gff3 hierarchies
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- base for using a Glx dataset
+class DatasetDataProvider( base.DataProvider ):
+    """
+    Class that uses the file contents and/or metadata from a Galaxy DatasetInstance
+    as it's source.
+
+    DatasetDataProvider can be seen as the intersection between a datatype's
+    metadata and a dataset's file contents. It (so far) mainly provides helper
+    and conv. methods for using dataset metadata to set up and control how
+    the data is provided.
+    """
+    def __init__( self, dataset, **kwargs ):
+        """
+        :param dataset: the Galaxy dataset whose file will be the source
+        :type dataset: model.DatasetInstance
+
+        :raises exceptions.InvalidDataProviderSource: if not a DatsetInstance
+        """
+        if not isinstance( dataset, galaxy.model.DatasetInstance ):
+            raise exceptions.InvalidDataProviderSource( "Data provider can only be used with a DatasetInstance" )
+        self.dataset = dataset
+        # this dataset file is obviously the source
+        #TODO: this might be a good place to interface with the object_store...
+        super( DatasetDataProvider, self ).__init__( open( dataset.file_name, 'rb' ) )
+
+    #TODO: this is a bit of a mess
+    @classmethod
+    def get_column_metadata_from_dataset( cls, dataset ):
+        """
+        Convenience class method to get column metadata from a dataset.
+        :returns: a dictionary of `column_count`, `column_types`, and `column_names`
+            if they're available, setting each to `None` if not.
+        """
+        # re-map keys to fit ColumnarProvider.__init__ kwargs
+        params = {}
+        params[ 'column_count' ] = dataset.metadata.columns
+        params[ 'column_types' ] = dataset.metadata.column_types
+        params[ 'column_names' ] = dataset.metadata.column_names or getattr( dataset.datatype, 'column_names', None )
+        return params
+
+    def get_metadata_column_types( self, indeces=None ):
+        """
+        Return the list of `column_types` for this dataset or `None` if unavailable.
+        :param indeces: the indeces for the columns of which to return the types.
+            Optional: defaults to None (return all types)
+        :type indeces: list of ints
+        """
+        metadata_column_types = ( self.dataset.metadata.column_types
+                                    or getattr( self.dataset.datatype, 'column_types', None )
+                                    or None )
+        if not metadata_column_types:
+            return metadata_column_types
+        if indeces:
+            column_types = []
+            for index in indeces:
+                column_type = metadata_column_types[ index ] if index < len( metadata_column_types ) else None
+                column_types.append( column_type )
+            return column_types
+        return metadata_column_types
+
+    def get_metadata_column_names( self, indeces=None ):
+        """
+        Return the list of `column_names` for this dataset or `None` if unavailable.
+        :param indeces: the indeces for the columns of which to return the names.
+            Optional: defaults to None (return all names)
+        :type indeces: list of ints
+        """
+        metadata_column_names = ( self.dataset.metadata.column_names
+                                    or getattr( self.dataset.datatype, 'column_names', None )
+                                    or None )
+        if not metadata_column_names:
+            return metadata_column_names
+        if indeces:
+            column_names = []
+            for index in indeces:
+                column_type = metadata_column_names[ index ] if index < len( metadata_column_names ) else None
+                column_names.append( column_type )
+            return column_names
+        return metadata_column_names
+
+    #TODO: merge the next two
+    def get_indeces_by_column_names( self, list_of_column_names ):
+        """
+        Return the list of column indeces when given a list of column_names.
+        :param list_of_column_names: the names of the columns of which to get indeces.
+        :type list_of_column_names: list of strs
+        :raises KeyError: if column_names are not found
+        :raises ValueError: if an entry in list_of_column_names is not in column_names
+        """
+        metadata_column_names = ( self.dataset.metadata.column_names
+                                    or getattr( self.dataset.datatype, 'column_names', None )
+                                    or None )
+        if not metadata_column_names:
+            raise KeyError( 'No column_names found for '
+                          + 'datatype: %s, dataset: %s' %( str( self.dataset.datatype ), str( self.dataset ) ) )
+        indeces = []
+        for column_name in list_of_column_names:
+            indeces.append( metadata_column_names.index( column_name ) )
+        return indeces
+
+    def get_metadata_column_index_by_name( self, name ):
+        """
+        Return the 1-base index of a sources column with the given `name`.
+        """
+        # metadata columns are 1-based indeces
+        column = getattr( self.dataset.metadata, name )
+        return ( column - 1 ) if isinstance( column, int ) else None
+
+    def get_genomic_region_indeces( self, check=False ):
+        """
+        Return a list of column indeces for 'chromCol', 'startCol', 'endCol' from
+        a source representing a genomic region.
+
+        :param check: if True will raise a ValueError if any were not found.
+        :type check: bool
+        :raises ValueError: if check is `True` and one or more indeces were not found.
+        :returns: list of column indeces for the named columns.
+        """
+        region_column_names = ( 'chromCol', 'startCol', 'endCol' )
+        region_indeces = [ self.get_metadata_column_index_by_name( name ) for name in region_column_names ]
+        if check and not all( map( lambda i: i != None, indeces ) ):
+            raise ValueError( "Could not determine proper column indeces for chrom, start, end: %s" %( str( indeces ) ) )
+        return region_indeces
+
+
+class ConvertedDatasetDataProvider( DatasetDataProvider ):
+    """
+    Class that uses the file contents of a dataset after conversion to a different
+    format.
+    """
+    def __init__( self, dataset, **kwargs ):
+        raise NotImplementedError( 'Abstract class' )
+        self.original_dataset = dataset
+        self.converted_dataset = self.convert_dataset( dataset, **kwargs )
+        super( ConvertedDatasetDataProvider, self ).__init__( self.converted_dataset, **kwargs )
+        #NOTE: now self.converted_dataset == self.dataset
+
+    def convert_dataset( self, dataset, **kwargs ):
+        """
+        Convert the given dataset in some way.
+        """
+        return dataset
+
+
+# ----------------------------------------------------------------------------- uses metadata for settings
+class DatasetColumnarDataProvider( column.ColumnarDataProvider ):
+    """
+    Data provider that uses a DatasetDataProvider as it's source and the
+    dataset's metadata to buuild settings for the ColumnarDataProvider it's
+    inherited from.
+    """
+    def __init__( self, dataset, **kwargs ):
+        """
+        All kwargs are inherited from ColumnarDataProvider.
+        .. seealso:: column.ColumnarDataProvider
+
+        If no kwargs are given, this class will attempt to get those kwargs
+        from the dataset source's metadata.
+        If any kwarg is given, it will override and be used in place of
+        any metadata available.
+        """
+        dataset_source = DatasetDataProvider( dataset )
+        if not kwargs.get( 'column_types', None ):
+            indeces = kwargs.get( 'indeces', None )
+            kwargs[ 'column_types' ] = dataset_source.get_metadata_column_types( indeces=indeces )
+        super( DatasetColumnarDataProvider, self ).__init__( dataset_source, **kwargs )
+
+
+class DatasetMapDataProvider( column.MapDataProvider ):
+    """
+    Data provider that uses a DatasetDataProvider as it's source and the
+    dataset's metadata to buuild settings for the MapDataProvider it's
+    inherited from.
+    """
+    def __init__( self, dataset, **kwargs ):
+        """
+        All kwargs are inherited from MapDataProvider.
+        .. seealso:: column.MapDataProvider
+
+        If no kwargs are given, this class will attempt to get those kwargs
+        from the dataset source's metadata.
+        If any kwarg is given, it will override and be used in place of
+        any metadata available.
+
+        The relationship between column_names and indeces is more complex:
+        +-----------------+-------------------------------+-----------------------+
+        |                 | Indeces given                 | Indeces NOT given     |
+        +=================+===============================+=======================+
+        | Names given     | pull indeces, rename w/ names | pull by name          |
+        +=================+-------------------------------+-----------------------+
+        | Names NOT given | pull indeces, name w/ meta    | pull all, name w/meta |
+        +=================+-------------------------------+-----------------------+
+        """
+        dataset_source = DatasetDataProvider( dataset )
+
+        #TODO: getting too complicated - simplify at some lvl, somehow
+        # if no column_types given, get column_types from indeces (or all if indeces == None)
+        indeces = kwargs.get( 'indeces', None )
+        column_names = kwargs.get( 'column_names', None )
+
+        #if indeces and column_names:
+        #    # pull using indeces and re-name with given names - no need to alter (does as super would)
+        #    pass
+
+        if   not indeces and column_names:
+            # pull columns by name
+            indeces = kwargs[ 'indeces' ] = dataset_source.get_indeces_by_column_names( column_names )
+
+        elif indeces and not column_names:
+            # pull using indeces, name with meta
+            column_names = kwargs[ 'column_names' ] = dataset_source.get_metadata_column_names( indeces=indeces )
+
+        elif not indeces and not column_names:
+            # pull all indeces and name using metadata
+            column_names = kwargs[ 'column_names' ] = dataset_source.get_metadata_column_names( indeces=indeces )
+
+        # if no column_types given, use metadata column_types
+        if not kwargs.get( 'column_types', None ):
+            kwargs[ 'column_types' ] = dataset_source.get_metadata_column_types( indeces=indeces )
+
+        super( DatasetMapDataProvider, self ).__init__( dataset_source, **kwargs )
+
+
+# ----------------------------------------------------------------------------- provides a bio-relevant datum
+class GenomicRegionDataProvider( column.ColumnarDataProvider ):
+    """
+    Data provider that parses chromosome, start, and end data from a file
+    using the datasets metadata settings.
+
+    Is a ColumnarDataProvider that uses a DatasetDataProvider as it's source.
+
+    If `named_columns` is true, will return dictionaries with the keys
+    'chrom', 'start', 'end'.
+    """
+    # dictionary keys when named_columns=True
+    COLUMN_NAMES = [ 'chrom', 'start', 'end' ]
+
+    def __init__( self, dataset, chrom_column=None, start_column=None, end_column=None, named_columns=False, **kwargs ):
+        """
+        :param dataset: the Galaxy dataset whose file will be the source
+        :type dataset: model.DatasetInstance
+
+        :param chrom_column: optionally specify the chrom column index
+        :type chrom_column: int
+        :param start_column: optionally specify the start column index
+        :type start_column: int
+        :param end_column: optionally specify the end column index
+        :type end_column: int
+
+        :param named_columns: optionally return dictionaries keying each column
+            with 'chrom', 'start', or 'end'.
+            Optional: defaults to False
+        :type named_columns: bool
+        """
+        #TODO: allow passing in a string format e.g. "{chrom}:{start}-{end}"
+        dataset_source = DatasetDataProvider( dataset )
+
+        if chrom_column == None:
+            chrom_column = dataset_source.get_metadata_column_index_by_name( 'chromCol' )
+        if start_column == None:
+            start_column = dataset_source.get_metadata_column_index_by_name( 'startCol' )
+        if end_column == None:
+            end_column = dataset_source.get_metadata_column_index_by_name( 'endCol' )
+        indeces = [ chrom_column, start_column, end_column ]
+        if not all( map( lambda i: i != None, indeces ) ):
+            raise ValueError( "Could not determine proper column indeces for"
+                            + " chrom, start, end: %s" %( str( indeces ) ) )
+        kwargs.update({ 'indeces' : indeces })
+
+        if not kwargs.get( 'column_types', None ):
+            kwargs.update({ 'column_types' : dataset_source.get_metadata_column_types( indeces=indeces ) })
+
+        self.named_columns = named_columns
+        if self.named_columns:
+            self.column_names = self.COLUMN_NAMES
+
+        super( GenomicRegionDataProvider, self ).__init__( dataset_source, **kwargs )
+
+    def __iter__( self ):
+        parent_gen = super( GenomicRegionDataProvider, self ).__iter__()
+        for column_values in parent_gen:
+            if self.named_columns:
+                yield dict( zip( self.column_names, column_values ) )
+            else:
+                yield column_values
+
+
+#TODO: this optionally provides the same data as the above and makes GenomicRegionDataProvider redundant
+#   GenomicRegionDataProvider is a better name, tho
+class IntervalDataProvider( column.ColumnarDataProvider ):
+    """
+    Data provider that parses chromosome, start, and end data (as well as strand
+    and name if set in the metadata) using the dataset's metadata settings.
+
+    If `named_columns` is true, will return dictionaries with the keys
+    'chrom', 'start', 'end' (and 'strand' and 'name' if available).
+    """
+    COLUMN_NAMES = [ 'chrom', 'start', 'end', 'strand', 'name' ]
+
+    def __init__( self, dataset, chrom_column=None, start_column=None, end_column=None,
+                  strand_column=None, name_column=None, named_columns=False, **kwargs ):
+        """
+        :param dataset: the Galaxy dataset whose file will be the source
+        :type dataset: model.DatasetInstance
+
+        :param named_columns: optionally return dictionaries keying each column
+            with 'chrom', 'start', 'end', 'strand', or 'name'.
+            Optional: defaults to False
+        :type named_columns: bool
+        """
+        #TODO: allow passing in a string format e.g. "{chrom}:{start}-{end}"
+        dataset_source = DatasetDataProvider( dataset )
+
+        # get genomic indeces and add strand and name
+        if chrom_column == None:
+            chrom_column = dataset_source.get_metadata_column_index_by_name( 'chromCol' )
+        if start_column == None:
+            start_column = dataset_source.get_metadata_column_index_by_name( 'startCol' )
+        if end_column == None:
+            end_column = dataset_source.get_metadata_column_index_by_name( 'endCol' )
+        if strand_column == None:
+            strand_column = dataset_source.get_metadata_column_index_by_name( 'strandCol' )
+        if name_column == None:
+            name_column = dataset_source.get_metadata_column_index_by_name( 'nameCol' )
+        indeces = [ chrom_column, start_column, end_column, strand_column, name_column ]
+        kwargs.update({ 'indeces' : indeces })
+
+        if not kwargs.get( 'column_types', None ):
+            kwargs.update({ 'column_types' : dataset_source.get_metadata_column_types( indeces=indeces ) })
+
+        self.named_columns = named_columns
+        if self.named_columns:
+            self.column_names = self.COLUMN_NAMES
+
+        super( IntervalDataProvider, self ).__init__( dataset_source, **kwargs )
+
+    def __iter__( self ):
+        parent_gen = super( IntervalDataProvider, self ).__iter__()
+        for column_values in parent_gen:
+            if self.named_columns:
+                yield dict( zip( self.column_names, column_values ) )
+            else:
+                yield column_values
+
+
+#TODO: ideally with these next two - you'd allow pulling some region from the sequence
+#   WITHOUT reading the entire seq into memory - possibly apply some version of limit/offset
+class FastaDataProvider( base.FilteredDataProvider ):
+    """
+    Class that returns fasta format data in a list of maps of the form:
+        {
+            id: <fasta header id>,
+            sequence: <joined lines of nucleotide/amino data>
+        }
+    """
+    def __init__( self, source, ids=None, **kwargs ):
+        """
+        :param ids: optionally return only ids (and sequences) that are in this list.
+            Optional: defaults to None (provide all ids)
+        :type ids: list or None
+        """
+        source = bx_seq.fasta.FastaReader( source )
+        #TODO: validate is a fasta
+        super( FastaDataProvider, self ).__init__( source, **kwargs )
+        self.ids = ids
+        # how to do ids?
+
+    def __iter__( self ):
+        parent_gen = super( FastaDataProvider, self ).__iter__()
+        for fasta_record in parent_gen:
+            yield {
+                'id'  : fasta_record.name,
+                'seq' : fasta_record.text
+            }
+
+
+class TwoBitFastaDataProvider( DatasetDataProvider ):
+    """
+    Class that returns fasta format data in a list of maps of the form:
+        {
+            id: <fasta header id>,
+            sequence: <joined lines of nucleotide/amino data>
+        }
+    """
+    def __init__( self, source, ids=None, **kwargs ):
+        """
+        :param ids: optionally return only ids (and sequences) that are in this list.
+            Optional: defaults to None (provide all ids)
+        :type ids: list or None
+        """
+        source = bx_seq.twobit.TwoBitFile( source )
+        #TODO: validate is a 2bit
+        super( FastaDataProvider, self ).__init__( source, **kwargs )
+        # could do in order provided with twobit
+        self.ids = ids or self.source.keys()
+
+    def __iter__( self ):
+        for id_ in self.ids:
+            yield {
+                'id'  : id_,
+                'seq' : self.source[ name ]
+            }
+
+
+#TODO:
+class WiggleDataProvider( base.LimitedOffsetDataProvider ):
+    """
+    Class that returns chrom, pos, data from a wiggle source.
+    """
+    COLUMN_NAMES = [ 'chrom', 'pos', 'value' ]
+
+    def __init__( self, source, named_columns=False, column_names=None, **kwargs ):
+        """
+        :param named_columns: optionally return dictionaries keying each column
+            with 'chrom', 'start', 'end', 'strand', or 'name'.
+            Optional: defaults to False
+        :type named_columns: bool
+
+        :param column_names: an ordered list of strings that will be used as the keys
+            for each column in the returned dictionaries.
+            The number of key, value pairs each returned dictionary has will
+            be as short as the number of column names provided.
+        :type column_names:
+        """
+        #TODO: validate is a wig
+        # still good to maintain a ref to the raw source bc Reader won't
+        self.raw_source = source
+        self.parser = bx_wig.Reader( source )
+        super( WiggleDataProvider, self ).__init__( self.parser, **kwargs )
+
+        self.named_columns = named_columns
+        self.column_names = column_names or self.COLUMN_NAMES
+
+    def __iter__( self ):
+        parent_gen = super( WiggleDataProvider, self ).__iter__()
+        for three_tuple in parent_gen:
+            if self.named_columns:
+                yield dict( zip( self.column_names, three_tuple ) )
+            else:
+                # list is not strictly necessary - but consistent
+                yield list( three_tuple )
+
+
+class BigWigDataProvider( base.LimitedOffsetDataProvider ):
+    """
+    Class that returns chrom, pos, data from a wiggle source.
+    """
+    COLUMN_NAMES = [ 'chrom', 'pos', 'value' ]
+
+    def __init__( self, source, chrom, start, end, named_columns=False, column_names=None, **kwargs ):
+        """
+        :param chrom: which chromosome within the bigbed file to extract data for
+        :type chrom: str
+        :param start: the start of the region from which to extract data
+        :type start: int
+        :param end: the end of the region from which to extract data
+        :type end: int
+
+        :param named_columns: optionally return dictionaries keying each column
+            with 'chrom', 'start', 'end', 'strand', or 'name'.
+            Optional: defaults to False
+        :type named_columns: bool
+
+        :param column_names: an ordered list of strings that will be used as the keys
+            for each column in the returned dictionaries.
+            The number of key, value pairs each returned dictionary has will
+            be as short as the number of column names provided.
+        :type column_names:
+        """
+        raise NotImplementedError( 'Work in progress' )
+        #TODO: validate is a wig
+        # still good to maintain a ref to the raw source bc Reader won't
+        self.raw_source = source
+        self.parser = bx_bbi.bigwig_file.BigWigFile( source )
+        super( BigWigDataProvider, self ).__init__( self.parser, **kwargs )
+
+        self.named_columns = named_columns
+        self.column_names = column_names or self.COLUMN_NAMES
+
+    def __iter__( self ):
+        parent_gen = super( BigWigDataProvider, self ).__iter__()
+        for three_tuple in parent_gen:
+            if self.named_columns:
+                yield dict( zip( self.column_names, three_tuple ) )
+            else:
+                # list is not strictly necessary - but consistent
+                yield list( three_tuple )
+
+
+# ----------------------------------------------------------------------------- binary, external conversion or tool
+class DatasetSubprocessDataProvider( external.SubprocessDataProvider ):
+    """
+    Create a source from running a subprocess on a dataset's file.
+
+    Uses a subprocess as it's source and has a dataset (gen. as an input file
+    for the process).
+    """
+    #TODO: below should be a subclass of this and not RegexSubprocess
+    def __init__( self, dataset, *args, **kwargs ):
+        """
+        :param args: the list of strings used to build commands.
+        :type args: variadic function args
+        """
+        raise NotImplementedError( 'Abstract class' )
+        super( DatasetSubprocessDataProvider, self ).__init__( *args, **kwargs )
+        self.dataset = dataset
+
+
+class SamtoolsDataProvider( line.RegexLineDataProvider ):
+    """
+    Data provider that uses samtools on a Sam or Bam file as it's source.
+
+    This can be piped through other providers (column, map, genome region, etc.).
+
+    .. note:: that only the samtools 'view' command is currently implemented.
+    """
+    FLAGS_WO_ARGS = 'bhHSu1xXcB'
+    FLAGS_W_ARGS  = 'fFqlrs'
+    VALID_FLAGS   = FLAGS_WO_ARGS + FLAGS_W_ARGS
+
+    def __init__( self, dataset, options_string='', options_dict=None, regions=None, **kwargs ):
+        """
+        :param options_string: samtools options in string form (flags separated
+            by spaces)
+            Optional: defaults to ''
+        :type options_string: str
+        :param options_dict: dictionary of samtools options
+            Optional: defaults to None
+        :type options_dict: dict or None
+        :param regions: list of samtools regions strings
+            Optional: defaults to None
+        :type regions: list of str or None
+        """
+        #TODO: into validate_source
+
+        #TODO: have to import these here due to circular ref in model/datatypes
+        import galaxy.datatypes.binary
+        import galaxy.datatypes.tabular
+        if( not( isinstance( dataset.datatype, galaxy.datatypes.tabular.Sam )
+        or       isinstance( dataset.datatype, galaxy.datatypes.binary.Bam ) ) ):
+            raise exceptions.InvalidDataProviderSource(
+                'dataset must be a Sam or Bam datatype: %s' %( str( dataset.datatype ) ) )
+        self.dataset = dataset
+
+        options_dict = options_dict or {}
+        # ensure regions are strings
+        regions      = [ str( r ) for r in regions ] if regions else []
+
+        #TODO: view only for now
+        #TODO: not properly using overriding super's validate_opts, command here
+        subcommand = 'view'
+        #TODO:?? do we need a path to samtools?
+        subproc_args = self.build_command_list( subcommand, options_string, options_dict, regions )
+#TODO: the composition/inheritance here doesn't make a lot sense
+        subproc_provider = external.SubprocessDataProvider( *subproc_args )
+        super( SamtoolsDataProvider, self ).__init__( subproc_provider, **kwargs )
+
+    def build_command_list( self, subcommand, options_string, options_dict, regions ):
+        """
+        Convert all init args to list form.
+        """
+        command = [ 'samtools', subcommand ]
+        # add options and switches, input file, regions list (if any)
+        command.extend( self.to_options_list( options_string, options_dict ) )
+        command.append( self.dataset.file_name )
+        command.extend( regions )
+        return command
+
+    def to_options_list( self, options_string, options_dict ):
+        """
+        Convert both options_string and options_dict to list form
+        while filtering out non-'valid' options.
+        """
+        opt_list = []
+
+        # strip out any user supplied bash switch formating -> string of option chars
+        #   then compress to single option string of unique, VALID flags with prefixed bash switch char '-'
+        options_string = options_string.strip( '- ' )
+        validated_flag_list = set([ flag for flag in options_string if flag in self.FLAGS_WO_ARGS ])
+
+        # if sam add -S
+        if( ( isinstance( self.dataset.datatype, galaxy.datatypes.tabular.Sam )
+        and ( 'S' not in validated_flag_list ) ) ):
+            validated_flag_list.append( 'S' )
+
+        if validated_flag_list:
+            opt_list.append( '-' + ''.join( validated_flag_list ) )
+
+        for flag, arg in options_dict.items():
+            if flag in self.FLAGS_W_ARGS:
+                opt_list.extend([ '-' + flag, str( arg ) ])
+
+        return opt_list
+
+    @classmethod
+    def extract_options_from_dict( cls, dictionary ):
+        """
+        Separrates valid samtools key/value pair options from a dictionary and
+        returns both as a 2-tuple.
+        """
+        # handy for extracting options from kwargs - but otherwise...
+        #TODO: could be abstracted to util.extract( dict, valid_keys_list )
+        options_dict = {}
+        new_kwargs = {}
+        for key, value in dictionary.items():
+            if key in cls.FLAGS_W_ARGS:
+                options_dict[ key ] = value
+            else:
+                new_kwargs[ key ] = value
+        return options_dict, new_kwargs
+
+
+class BcftoolsDataProvider( line.RegexLineDataProvider ):
+    """
+    Data provider that uses an bcftools on a bcf (or vcf?) file as it's source.
+
+    This can be piped through other providers (column, map, genome region, etc.).
+    """
+    def __init__( self, dataset, **kwargs ):
+        #TODO: as samtools
+        raise NotImplementedError()
+        super( BCFDataProvider, self ).__init__( dataset, **kwargs )
+
+
+class BGzipTabixDataProvider( base.DataProvider ):
+    """
+    Data provider that uses an g(un)zip on a file as it's source.
+
+    This can be piped through other providers (column, map, genome region, etc.).
+    """
+    def __init__( self, dataset, **kwargs ):
+        #TODO: as samtools - need more info on output format
+        raise NotImplementedError()
+        super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs )

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/decorators.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/decorators.py
@@ -0,0 +1,107 @@
+"""
+DataProvider related decorators.
+"""
+
+# I'd like to decorate the factory methods that give data_providers by the name they can be accessed from. e.g.:
+#@provides( 'id_seq' ) # where 'id_seq' is some 'data_format' string/alias
+#def get_id_seq_provider( dataset, **settings ):
+
+# then in some central dispatch (maybe data.Data), have it look up the proper method by the data_format string
+
+# also it would be good to have this decorator maintain a list of available providers (for a datatype)
+
+# i don't particularly want to cut up method names ( get_([\w_]*)_provider )
+#!/usr/bin/env python
+
+# adapted from: http://stackoverflow.com
+#    /questions/14095616/python-can-i-programmatically-decorate-class-methods-from-a-class-instance
+
+from functools import wraps
+#from types import MethodType
+import copy
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# -----------------------------------------------------------------------------
+_DATAPROVIDER_CLASS_MAP_KEY = 'dataproviders'
+_DATAPROVIDER_METHOD_NAME_KEY = '_dataprovider_name'
+
+# -----------------------------------------------------------------------------
+def has_dataproviders( cls ):
+    """
+    Wraps a class (generally a Datatype), finds methods within that have been
+    decorated with `@dataprovider` and adds them, by their name, to a map
+    in the class.
+
+    This allows a class to maintain a name -> method map, effectively
+    'registering' dataprovider factory methods.
+
+    .. example::
+    @has_dataproviders
+    class MyDtype( data.Data ):
+
+        @dataprovider_factory( 'bler' )
+        def provide_some_bler( self, dataset, **settings ):
+            '''blerblerbler'''
+            dataset_source = providers.DatasetDataProvider( dataset )
+            # ... chain other, intermidiate providers here
+            return providers.BlerDataProvider( dataset_source, **settings )
+
+    # use the base method in data.Data
+    provider = dataset.datatype.dataprovider( dataset, 'bler',
+                                              my_setting='blah', ... )
+    # OR directly from the map
+    provider = dataset.datatype.dataproviders[ 'bler' ]( dataset,
+                                                         my_setting='blah', ... )
+    """
+    #log.debug( 'has_dataproviders:', cls )
+    # init the class dataproviders map if necc.
+    if not hasattr( cls, _DATAPROVIDER_CLASS_MAP_KEY ):
+        setattr( cls, _DATAPROVIDER_CLASS_MAP_KEY, {} )
+    else:
+        # need to deepcopy or subclasses will modify super.dataproviders as well
+        existing_dataproviders = getattr( cls, _DATAPROVIDER_CLASS_MAP_KEY )
+        copied_dataproviders = copy.deepcopy( existing_dataproviders )
+        setattr( cls, _DATAPROVIDER_CLASS_MAP_KEY, copied_dataproviders )
+
+    dataproviders = getattr( cls, _DATAPROVIDER_CLASS_MAP_KEY )
+
+    # scan for methods with dataprovider names and add them to the map
+    # note: this has a 'cascading' effect
+    #       where it's possible to override a super's provider with a sub's
+    for attr_key, attr_value in cls.__dict__.iteritems():
+        #log.debug( '\t key:', attr_key )
+        # can't use isinstance( attr_value, MethodType ) bc of wrapping
+        if( ( callable( attr_value ) )
+        and ( not attr_key.startswith( "__" ) )
+        and ( getattr( attr_value, _DATAPROVIDER_METHOD_NAME_KEY, None ) ) ):
+            #log.debug( '\t\t is a dataprovider', attr_key )
+            name = getattr( attr_value, _DATAPROVIDER_METHOD_NAME_KEY )
+            dataproviders[ name ] = attr_value
+
+    #log.debug( 'dataproviders:' )
+    #for name, fn in cls.dataproviders.items():
+    #    log.debug( '\t ', name, '->', fn.__name__, fn )
+    #    log.debug( '\t\t ', fn.__doc__ )
+    return cls
+
+def dataprovider_factory( name ):
+    """
+    Wraps a class method and marks it as a dataprovider factory.
+
+    :param name: what name/key to register the factory under in `cls.dataproviders`
+    :param type: any hashable var
+    """
+    #log.debug( 'dataprovider:', name )
+    def named_dataprovider_factory( func ):
+        #log.debug( 'named_dataprovider_factory:', name, '->', func.__name__ )
+        setattr( func, _DATAPROVIDER_METHOD_NAME_KEY, name )
+        #log.debug( '\t setting:', getattr( func, _DATAPROVIDER_METHOD_NAME_KEY ) )
+        @wraps( func )
+        def wrapped_dataprovider_factory( self, *args, **kwargs ):
+            #log.debug( 'wrapped_dataprovider_factory', name, self, args, kwargs )
+            return func( self, *args, **kwargs )
+        return wrapped_dataprovider_factory
+    return named_dataprovider_factory

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/exceptions.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/exceptions.py
@@ -0,0 +1,33 @@
+"""
+DataProvider related exceptions.
+"""
+
+class InvalidDataProviderSource( TypeError ):
+    """
+    Raised when a unusable source is passed to a provider.
+    """
+    def __init__( self, source=None, msg='' ):
+        msg = msg or 'Invalid source for provider: %s' %( source )
+        super( InvalidDataProviderSource, self ).__init__( msg )
+
+
+class NoProviderAvailable( TypeError ):
+    """
+    Raised when no provider is found for the given `format_requested`.
+
+    :param factory_source:      the item that the provider was requested from
+    :param format_requested:    the format_requested (a hashable key to access
+        `factory_source.datatypes` with)
+
+    Both params are attached to this class and accessible to the try-catch
+    receiver.
+
+    Meant to be used within a class that builds dataproviders (e.g. a Datatype)
+    """
+    def __init__( self, factory_source, format_requested=None, msg='' ):
+        self.factory_source = factory_source
+        self.format_requested = format_requested
+        msg = msg or 'No provider available in factory_source "%s" for format requested' %( str( factory_source ) )
+        if self.format_requested:
+            msg += ': "%s"' %( self.format_requested )
+        super( NoProviderAvailable, self ).__init__( msg )

diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/external.py
--- /dev/null
+++ b/lib/galaxy/datatypes/dataproviders/external.py
@@ -0,0 +1,165 @@
+"""
+Data providers that iterate over a source that is not in memory
+or not in a file.
+"""
+
+import subprocess
+import urllib, urllib2
+import gzip
+
+import base
+import line
+
+_TODO = """
+YAGNI: ftp, image, cryptos, sockets
+job queue
+admin: admin server log rgx/stats, ps aux
+"""
+
+import logging
+log = logging.getLogger( __name__ )
+
+
+# ----------------------------------------------------------------------------- server subprocess / external prog
+class SubprocessDataProvider( base.DataProvider ):
+    """
+    Data provider that uses the output from an intermediate program and
+    subprocess as it's data source.
+    """
+    #TODO: need better ways of checking returncode, stderr for errors and raising
+    def __init__( self, *args, **kwargs ):
+        """
+        :param args: the list of strings used to build commands.
+        :type args: variadic function args
+        """
+        self.exit_code = None
+        command_list = args
+        self.popen = self.subprocess( *command_list, **kwargs )
+        #TODO:?? not communicate()?
+        super( SubprocessDataProvider, self ).__init__( self.popen.stdout )
+        self.exit_code = self.popen.poll()
+
+    #NOTE: there's little protection here v. sending a ';' and a dangerous command here
+    # but...we're all adults here, right? ...RIGHT?!
+    def subprocess( self, *command_list, **kwargs ):
+        """
+        :param args: the list of strings used as commands.
+        :type args: variadic function args
+        """
+        try:
+            # how expensive is this?
+            popen = subprocess.Popen( command_list, stderr=subprocess.PIPE, stdout=subprocess.PIPE )
+            log.info( 'opened subrocess (%s), PID: %s' %( str( command_list ), str( popen.pid ) ) )
+            #log.debug( 'stderr:\n%s\n' %( popen.stderr.read() ) )
+
+        except OSError, os_err:
+            command_str = ' '.join( self.command )
+            raise OSError( ' '.join([ str( os_err ), ':', command_str ]) )
+
+        return popen
+
+    def __exit__( self, *args ):
+        # poll the subrocess for an exit code
+        self.exit_code = self.popen.poll()
+        log.info( '%s.__exit__, exit_code: %s' %( str( self ), str( self.exit_code ) ) )
+        return super( SubprocessDataProvider, self ).__exit__( *args )
+
+    def __str__( self ):
+        # provide the pid and current return code
+        source_str = ''
+        if hasattr( self, 'popen' ):
+            source_str = '%s:%s' %( str( self.popen.pid ), str( self.popen.poll() ) )
+        return '%s(%s)' %( self.__class__.__name__, str( source_str ) )
+
+
+class RegexSubprocessDataProvider( line.RegexLineDataProvider ):
+    """
+    RegexLineDataProvider that uses a SubprocessDataProvider as it's data source.
+    """
+    # this is a conv. class and not really all that necc...
+    def __init__( self, *args, **kwargs ):
+        # using subprocess as proxy data source in filtered line prov.
+        subproc_provider = SubprocessDataProvider( *args )
+        super( RegexSubprocessDataProvider, self ).__init__( subproc_provider, **kwargs )
+
+
+# ----------------------------------------------------------------------------- other apis
+class URLDataProvider( base.DataProvider ):
+    """
+    Data provider that uses the contents of a URL for it's data source.
+
+    This can be piped through other providers (column, map, genome region, etc.).
+    """
+    VALID_METHODS = ( 'GET', 'POST' )
+
+    def __init__( self, url, method='GET', data=None, **kwargs ):
+        """
+        :param url: the base URL to open.
+        :param method: the HTTP method to use.
+            Optional: defaults to 'GET'
+        :param data: any data to pass (either in query for 'GET'
+            or as post data with 'POST')
+        :type data: dict
+        """
+        self.url = url
+        self.method = method
+
+        self.data = data or {}
+        encoded_data = urllib.urlencode( self.data )
+
+        if method == 'GET':
+            self.url += '?%s' %( encoded_data )
+            opened = urllib2.urlopen( url )
+        elif method == 'POST':
+            opened = urllib2.urlopen( url, encoded_data )
+        else:
+            raise ValueError( 'Not a valid method: %s' %( method ) )
+
+        super( URLDataProvider, self ).__init__( opened, **kwargs )
+        #NOTE: the request object is now accessible as self.source
+
+    def __enter__( self ):
+        pass
+
+    def __exit__( self, *args ):
+        self.source.close()
+
+
+# ----------------------------------------------------------------------------- generic compression
+class GzipDataProvider( base.DataProvider ):
+    """
+    Data provider that uses g(un)zip on a file as it's source.
+
+    This can be piped through other providers (column, map, genome region, etc.).
+    """
+    def __init__( self, source, **kwargs ):
+        unzipped = gzip.GzipFile( source, 'rb' )
+        super( GzipDataProvider, self ).__init__( unzipped, **kwargs )
+        #NOTE: the GzipFile is now accessible in self.source
+
+
+# ----------------------------------------------------------------------------- intermediate tempfile
+class TempfileDataProvider( base.DataProvider ):
+    """
+    Writes the data from the given source to a temp file, allowing
+    it to be used as a source where a file_name is needed (e.g. as a parameter
+    to a command line tool: samtools view -t <this_provider.source.file_name>)
+    """
+    def __init__( self, source, **kwargs ):
+        #TODO:
+        raise NotImplementedError()
+        # write the file here
+        self.create_file
+        super( TempfileDataProvider, self ).__init__( self.tmp_file, **kwargs )
+
+    def create_file( self ):
+        self.tmp_file = tempfile.NamedTemporaryFile()
+        return self.tmp_file
+
+    def write_to_file( self ):
+        parent_gen = super( TempfileDataProvider, self ).__iter__()
+        #???
+        with open( self.tmp_file, 'w' ) as open_file:
+            for datum in parent_gen:
+                open_file.write( datum + '\n' )
+

This diff is so big that we needed to truncate the remainder.

https://bitbucket.org/galaxy/galaxy-central/commits/f48f86a23282/
Changeset:   f48f86a23282
Branch:      search
User:        Kyle Ellrott
Date:        2013-06-17 21:00:32
Summary:     Adding the ability to search the job table
Affected #:  4 files

diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -157,7 +157,10 @@
         return total
 
 
-class Job( object ):
+class Job( object, APIItem ):
+    api_collection_visible_keys = [ 'id'  ]
+    api_element_visible_keys = [ 'id' ]
+
     """
     A job represents a request to run a tool given input datasets, tool
     parameters, and output datasets.
@@ -360,6 +363,28 @@
                 dataset.blurb = 'deleted'
                 dataset.peek = 'Job deleted'
                 dataset.info = 'Job output deleted by user before job completed'
+    def get_api_value( self, view='collection' ):
+        rval = super( Job, self ).get_api_value( view=view )
+        rval['tool_name'] = self.tool_id
+        param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
+        for i in self.input_datasets:
+            if i.dataset is not None:
+                param_dict[i.name] = {"hda_id" : i.dataset.id}
+        for i in self.input_library_datasets:
+            if i.dataset is not None:
+                param_dict[i.name] = {"ldda_id" : i.dataset.id}
+        rval['params'] = param_dict
+        
+        output_dict = {}
+        for i in self.output_datasets:
+            if i.dataset is not None:
+                output_dict[i.name] = {"hda_id" : i.dataset.id}
+        for i in self.output_library_datasets:
+            if i.dataset is not None:
+                output_dict[i.name] = {"ldda_id" : i.dataset.id}
+        rval['outputs'] = output_dict
+        
+        return rval
 
 class Task( object ):
     """

diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/model/search.py
--- a/lib/galaxy/model/search.py
+++ b/lib/galaxy/model/search.py
@@ -31,11 +31,10 @@
 eggs.require("Parsley")
 import parsley
 
-from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset
-from galaxy.model import (StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation,
-HistoryDatasetAssociationTagAssociation,
-ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation)
-from galaxy.model import ToolVersion
+from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, 
+History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, 
+StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation,
+ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, ToolVersion)
 
 from sqlalchemy import and_
 from sqlalchemy.orm import aliased
@@ -383,6 +382,22 @@
     def search(self, trans):
         self.query = trans.sa_session.query( StoredWorkflow )
 
+
+
+##################
+#Job Searching
+##################
+
+class JobView(ViewQueryBaseClass):
+    DOMAIN = "job"
+    FIELDS = {
+    }
+
+    def search(self, trans):
+        self.query = trans.sa_session.query( Job )
+
+
+
 """
 The view mapping takes a user's name for a table and maps it to a View class that will
 handle queries
@@ -398,7 +413,8 @@
     'hda' : HistoryDatasetView,
     'history' : HistoryView,
     'workflow' : WorkflowView,
-    'tool' : ToolView
+    'tool' : ToolView,
+    'job' : JobView,
 }
 
 """

diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/web/base/controller.py
--- a/lib/galaxy/web/base/controller.py
+++ b/lib/galaxy/web/base/controller.py
@@ -146,7 +146,7 @@
     def get_role( self, trans, id, check_ownership=False, check_accessible=False, deleted=None ):
         return self.get_object( trans, id, 'Role', check_ownership=False, check_accessible=False, deleted=deleted )
 
-    def encode_all_ids( self, trans, rval ):
+    def encode_all_ids( self, trans, rval, recursive=False ):
         """
         Encodes all integer values in the dict rval whose keys are 'id' or end with '_id'
 
@@ -160,6 +160,9 @@
                     rval[k] = trans.security.encode_id( v )
                 except:
                     pass # probably already encoded
+            else:
+                if recursive and type(v) == dict:
+                    rval[k] = self.encode_all_ids(trans, v, recursive)
         return rval
 
 Root = BaseController

diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/webapps/galaxy/api/search.py
--- a/lib/galaxy/webapps/galaxy/api/search.py
+++ b/lib/galaxy/webapps/galaxy/api/search.py
@@ -46,5 +46,5 @@
                                 append = True
                     if append:
                         row = query.item_to_api_value(item)
-                        out.append( self.encode_all_ids( trans, row) )
+                        out.append( self.encode_all_ids( trans, row, True) )
         return { 'results' : out }


https://bitbucket.org/galaxy/galaxy-central/commits/0f52fea874ae/
Changeset:   0f52fea874ae
Branch:      search
User:        Kyle Ellrott
Date:        2013-06-17 21:56:36
Summary:     Adding input/output hda selection filters to job selection view.
Affected #:  2 files

diff -r f48f86a2328245ddf48230609c1e53125db00740 -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -367,14 +367,20 @@
         rval = super( Job, self ).get_api_value( view=view )
         rval['tool_name'] = self.tool_id
         param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
+        rval['params'] = param_dict
+
+        input_dict = {}
         for i in self.input_datasets:
             if i.dataset is not None:
-                param_dict[i.name] = {"hda_id" : i.dataset.id}
+                input_dict[i.name] = {"hda_id" : i.dataset.id}
         for i in self.input_library_datasets:
             if i.dataset is not None:
-                param_dict[i.name] = {"ldda_id" : i.dataset.id}
-        rval['params'] = param_dict
-        
+                input_dict[i.name] = {"ldda_id" : i.dataset.id}
+        for k in input_dict:
+            if k in param_dict:
+                del param_dict[k]
+        rval['inputs'] = input_dict
+
         output_dict = {}
         for i in self.output_datasets:
             if i.dataset is not None:
@@ -383,7 +389,7 @@
             if i.dataset is not None:
                 output_dict[i.name] = {"ldda_id" : i.dataset.id}
         rval['outputs'] = output_dict
-        
+
         return rval
 
 class Task( object ):

diff -r f48f86a2328245ddf48230609c1e53125db00740 -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 lib/galaxy/model/search.py
--- a/lib/galaxy/model/search.py
+++ b/lib/galaxy/model/search.py
@@ -34,8 +34,10 @@
 from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, 
 History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, 
 StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation,
-ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, ToolVersion)
+ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, JobParameter, 
+JobToInputDatasetAssociation, JobToOutputDatasetAssociation, ToolVersion)
 
+from galaxy.util.json import to_json_string
 from sqlalchemy import and_
 from sqlalchemy.orm import aliased
 
@@ -388,9 +390,51 @@
 #Job Searching
 ##################
 
+
+
+def job_param_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobParameter )
+    param_name = re.sub(r'^param.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.value == to_json_string(right)
+        )
+    )
+
+def job_input_hda_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobToInputDatasetAssociation )
+    param_name = re.sub(r'^input_hda.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.dataset_id == right
+        )
+    )
+
+def job_output_hda_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobToOutputDatasetAssociation )
+    param_name = re.sub(r'^output_hda.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.dataset_id == right
+        )
+    )
+
+
 class JobView(ViewQueryBaseClass):
     DOMAIN = "job"
     FIELDS = {
+        'param' : ViewField('param', handler=job_param_filter),
+        'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True),
+        'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True)
     }
 
     def search(self, trans):


https://bitbucket.org/galaxy/galaxy-central/commits/d0e2ed998c9d/
Changeset:   d0e2ed998c9d
Branch:      search
User:        Kyle Ellrott
Date:        2013-06-18 00:56:53
Summary:     Checking user_id for output for job info from search engine
Affected #:  1 file

diff -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 -r d0e2ed998c9d2fdbdab22467aaa994edc0eadaba lib/galaxy/webapps/galaxy/api/search.py
--- a/lib/galaxy/webapps/galaxy/api/search.py
+++ b/lib/galaxy/webapps/galaxy/api/search.py
@@ -40,8 +40,10 @@
                         if type( item ) in ( trans.app.model.LibraryFolder, trans.app.model.LibraryDatasetDatasetAssociation, trans.app.model.LibraryDataset ):
                             if (trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), item, trans.user ) ):
                                 append = True
-                    if not append:
-                        if hasattr(item, 'dataset'):
+                        elif type( item ) in trans.app.model.Job:
+                            if item.used_id == trans.user or trans.user_is_admin():
+                                append = True
+                        elif hasattr(item, 'dataset'):
                             if trans.app.security_agent.can_access_dataset( current_user_roles, item.dataset ):
                                 append = True
                     if append:


https://bitbucket.org/galaxy/galaxy-central/commits/fd3a82d33bb6/
Changeset:   fd3a82d33bb6
Branch:      search
User:        Kyle Ellrott
Date:        2013-06-18 01:17:55
Summary:     Adding tool_name filter to Job view search engine
Affected #:  1 file

diff -r d0e2ed998c9d2fdbdab22467aaa994edc0eadaba -r fd3a82d33bb6896ba6395a5e83add5c6ac7f7fbf lib/galaxy/model/search.py
--- a/lib/galaxy/model/search.py
+++ b/lib/galaxy/model/search.py
@@ -432,6 +432,7 @@
 class JobView(ViewQueryBaseClass):
     DOMAIN = "job"
     FIELDS = {
+        'tool_name' : ViewField('tool_name', sqlalchemy_field=Job.tool_id),
         'param' : ViewField('param', handler=job_param_filter),
         'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True),
         'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True)


https://bitbucket.org/galaxy/galaxy-central/commits/170dd4c157b8/
Changeset:   170dd4c157b8
User:        dannon
Date:        2013-06-18 23:16:56
Summary:     Merged in kellrott/galaxy-central/search (pull request #182)

Add ability to search Job data via the search api
Affected #:  4 files

diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -157,7 +157,10 @@
         return total
 
 
-class Job( object ):
+class Job( object, APIItem ):
+    api_collection_visible_keys = [ 'id'  ]
+    api_element_visible_keys = [ 'id' ]
+
     """
     A job represents a request to run a tool given input datasets, tool
     parameters, and output datasets.
@@ -360,6 +363,34 @@
                 dataset.blurb = 'deleted'
                 dataset.peek = 'Job deleted'
                 dataset.info = 'Job output deleted by user before job completed'
+    def get_api_value( self, view='collection' ):
+        rval = super( Job, self ).get_api_value( view=view )
+        rval['tool_name'] = self.tool_id
+        param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] )
+        rval['params'] = param_dict
+
+        input_dict = {}
+        for i in self.input_datasets:
+            if i.dataset is not None:
+                input_dict[i.name] = {"hda_id" : i.dataset.id}
+        for i in self.input_library_datasets:
+            if i.dataset is not None:
+                input_dict[i.name] = {"ldda_id" : i.dataset.id}
+        for k in input_dict:
+            if k in param_dict:
+                del param_dict[k]
+        rval['inputs'] = input_dict
+
+        output_dict = {}
+        for i in self.output_datasets:
+            if i.dataset is not None:
+                output_dict[i.name] = {"hda_id" : i.dataset.id}
+        for i in self.output_library_datasets:
+            if i.dataset is not None:
+                output_dict[i.name] = {"ldda_id" : i.dataset.id}
+        rval['outputs'] = output_dict
+
+        return rval
 
 class Task( object ):
     """

diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/model/search.py
--- a/lib/galaxy/model/search.py
+++ b/lib/galaxy/model/search.py
@@ -31,10 +31,13 @@
 eggs.require("Parsley")
 import parsley
 
-from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset
-from galaxy.model import StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation
-from galaxy.model import ToolVersion
+from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, 
+History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, 
+StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation,
+ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, JobParameter, 
+JobToInputDatasetAssociation, JobToOutputDatasetAssociation, ToolVersion)
 
+from galaxy.util.json import to_json_string
 from sqlalchemy import and_
 from sqlalchemy.orm import aliased
 
@@ -269,12 +272,30 @@
 #History Dataset Searching
 ##################
 
+def history_dataset_handle_tag(view, left, operator, right):
+    if operator == "=":
+        view.do_query = True
+        #aliasing the tag association table, so multiple links to different tags can be formed during a single query
+        tag_table = aliased(HistoryDatasetAssociationTagAssociation)
+
+        view.query = view.query.filter(
+           HistoryDatasetAssociation.id == tag_table.history_dataset_association_id
+        )
+        tmp = right.split(":")
+        view.query = view.query.filter( tag_table.user_tname == tmp[0] )
+        if len(tmp) > 1:
+            view.query = view.query.filter( tag_table.user_value == tmp[1] )
+    else:
+        raise GalaxyParseError("Invalid comparison operator: %s" % (operator))
+
 
 class HistoryDatasetView(ViewQueryBaseClass):
     DOMAIN = "history_dataset"
     FIELDS = {
         'name' : ViewField('name', sqlalchemy_field=HistoryDatasetAssociation.name),
-        'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True)
+        'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True),
+        'tag' : ViewField("tag", handler=history_dataset_handle_tag)
+
     }
 
     def search(self, trans):
@@ -289,13 +310,14 @@
 def history_handle_tag(view, left, operator, right):
     if operator == "=":
         view.do_query = True
+        tag_table = aliased(HistoryTagAssociation)
         view.query = view.query.filter(
-           History.id == HistoryTagAssociation.history_id
+           History.id == tag_table.history_id
         )
         tmp = right.split(":")
-        view.query = view.query.filter( HistoryTagAssociation.user_tname == tmp[0] )
+        view.query = view.query.filter( tag_table.user_tname == tmp[0] )
         if len(tmp) > 1:
-            view.query = view.query.filter( HistoryTagAssociation.user_value == tmp[1] )
+            view.query = view.query.filter( tag_table.user_value == tmp[1] )
     else:
         raise GalaxyParseError("Invalid comparison operator: %s" % (operator))
 
@@ -362,6 +384,65 @@
     def search(self, trans):
         self.query = trans.sa_session.query( StoredWorkflow )
 
+
+
+##################
+#Job Searching
+##################
+
+
+
+def job_param_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobParameter )
+    param_name = re.sub(r'^param.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.value == to_json_string(right)
+        )
+    )
+
+def job_input_hda_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobToInputDatasetAssociation )
+    param_name = re.sub(r'^input_hda.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.dataset_id == right
+        )
+    )
+
+def job_output_hda_filter(view, left, operator, right):
+    view.do_query = True
+    alias = aliased( JobToOutputDatasetAssociation )
+    param_name = re.sub(r'^output_hda.', '', left)
+    view.query = view.query.filter(
+        and_(
+            Job.id == alias.job_id,
+            alias.name == param_name,
+            alias.dataset_id == right
+        )
+    )
+
+
+class JobView(ViewQueryBaseClass):
+    DOMAIN = "job"
+    FIELDS = {
+        'tool_name' : ViewField('tool_name', sqlalchemy_field=Job.tool_id),
+        'param' : ViewField('param', handler=job_param_filter),
+        'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True),
+        'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True)
+    }
+
+    def search(self, trans):
+        self.query = trans.sa_session.query( Job )
+
+
+
 """
 The view mapping takes a user's name for a table and maps it to a View class that will
 handle queries
@@ -377,7 +458,8 @@
     'hda' : HistoryDatasetView,
     'history' : HistoryView,
     'workflow' : WorkflowView,
-    'tool' : ToolView
+    'tool' : ToolView,
+    'job' : JobView,
 }
 
 """

diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/web/base/controller.py
--- a/lib/galaxy/web/base/controller.py
+++ b/lib/galaxy/web/base/controller.py
@@ -146,7 +146,7 @@
     def get_role( self, trans, id, check_ownership=False, check_accessible=False, deleted=None ):
         return self.get_object( trans, id, 'Role', check_ownership=False, check_accessible=False, deleted=deleted )
 
-    def encode_all_ids( self, trans, rval ):
+    def encode_all_ids( self, trans, rval, recursive=False ):
         """
         Encodes all integer values in the dict rval whose keys are 'id' or end with '_id'
 
@@ -160,6 +160,9 @@
                     rval[k] = trans.security.encode_id( v )
                 except:
                     pass # probably already encoded
+            else:
+                if recursive and type(v) == dict:
+                    rval[k] = self.encode_all_ids(trans, v, recursive)
         return rval
 
 Root = BaseController

diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/webapps/galaxy/api/search.py
--- a/lib/galaxy/webapps/galaxy/api/search.py
+++ b/lib/galaxy/webapps/galaxy/api/search.py
@@ -40,11 +40,13 @@
                         if type( item ) in ( trans.app.model.LibraryFolder, trans.app.model.LibraryDatasetDatasetAssociation, trans.app.model.LibraryDataset ):
                             if (trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), item, trans.user ) ):
                                 append = True
-                    if not append:
-                        if hasattr(item, 'dataset'):
+                        elif type( item ) in trans.app.model.Job:
+                            if item.used_id == trans.user or trans.user_is_admin():
+                                append = True
+                        elif hasattr(item, 'dataset'):
                             if trans.app.security_agent.can_access_dataset( current_user_roles, item.dataset ):
                                 append = True
                     if append:
                         row = query.item_to_api_value(item)
-                        out.append( self.encode_all_ids( trans, row) )
+                        out.append( self.encode_all_ids( trans, row, True) )
         return { 'results' : out }

Repository URL: https://bitbucket.org/galaxy/galaxy-central/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.

    

commits-noreply＠bitbucket.org

tags

participants (1)