1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/9345e15ec4b7/
Changeset: 9345e15ec4b7
User: greg
Date: 2013-08-30 19:08:22
Summary: Fix for zero-based comparison in the tool shed.
Affected #: 1 file
diff -r 55bbaa8f5017e32dbe7968514a4f87b941ed2a0d -r 9345e15ec4b7f81271a071f0703e89fff0c5b3ac lib/galaxy/webapps/tool_shed/controllers/repository.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py
@@ -2695,7 +2695,7 @@
tool_shed_status_dict[ 'revision_upgrade' ] = 'False'
break
if metadata_changeset_revision == changeset_revision:
- if num_metadata_revisions - index > 0:
+ if num_metadata_revisions - index > 1:
tool_shed_status_dict[ 'revision_upgrade' ] = 'True'
else:
tool_shed_status_dict[ 'revision_upgrade' ] = 'False'
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/55bbaa8f5017/
Changeset: 55bbaa8f5017
User: dannon
Date: 2013-08-30 18:06:10
Summary: Re-apply changes that got reverted in 10492:8f6f926f912e
Affected #: 2 files
diff -r 8f6f926f912e2aea3f11905498fd5f04f857ec97 -r 55bbaa8f5017e32dbe7968514a4f87b941ed2a0d lib/galaxy/webapps/tool_shed/controllers/repository.py
--- a/lib/galaxy/webapps/tool_shed/controllers/repository.py
+++ b/lib/galaxy/webapps/tool_shed/controllers/repository.py
@@ -2664,7 +2664,7 @@
if repository:
repo_dir = repository.repo_path( trans.app )
repo = hg.repository( suc.get_configured_ui(), repo_dir )
- tool_shed_status_dict = {}
+ tool_shed_status_dict = {}
# Handle repository deprecation.
tool_shed_status_dict[ 'repository_deprecated' ] = str( repository.deprecated )
# Handle latest installable revision.
@@ -2680,7 +2680,7 @@
if changeset_revision == repository.tip( trans.app ):
tool_shed_status_dict[ 'revision_update' ] = 'False'
else:
- repository_metadata = suc.get_repository_metadata_by_changeset_revision( trans,
+ repository_metadata = suc.get_repository_metadata_by_changeset_revision( trans,
trans.security.encode_id( repository.id ),
changeset_revision )
if repository_metadata:
diff -r 8f6f926f912e2aea3f11905498fd5f04f857ec97 -r 55bbaa8f5017e32dbe7968514a4f87b941ed2a0d lib/tool_shed/util/shed_util_common.py
--- a/lib/tool_shed/util/shed_util_common.py
+++ b/lib/tool_shed/util/shed_util_common.py
@@ -31,7 +31,7 @@
eggs.require( 'markupsafe' )
import markupsafe
-
+
log = logging.getLogger( __name__ )
CHUNK_SIZE = 2**20 # 1Mb
@@ -209,7 +209,7 @@
return tool_shed_url.split( ':' )[ 0 ]
return tool_shed_url.rstrip( '/' )
-def clone_repository( repository_clone_url, repository_file_dir, ctx_rev ):
+def clone_repository( repository_clone_url, repository_file_dir, ctx_rev ):
"""Clone the repository up to the specified changeset_revision. No subsequent revisions will be present in the cloned repository."""
try:
commands.clone( get_configured_ui(),
@@ -263,7 +263,7 @@
# was later uninstalled, this value should be received as the value of that change set to which the repository had been updated just prior
# to it being uninstalled.
current_changeset_revision = installed_changeset_revision
- sa_session = app.model.context.current
+ sa_session = app.model.context.current
tool_shed = get_tool_shed_from_clone_url( repository_clone_url )
if not owner:
owner = get_repository_owner_from_clone_url( repository_clone_url )
@@ -351,7 +351,7 @@
new_elem = XmlET.SubElement( elem, key )
new_elem.text = value
return elem
-
+
def generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None, **kwd ):
return generate_repository_info_elem( tool_shed_repository.tool_shed,
tool_shed_repository.name,
@@ -394,7 +394,7 @@
def generate_tool_guid( repository_clone_url, tool ):
"""
Generate a guid for the installed tool. It is critical that this guid matches the guid for
- the tool in the Galaxy tool shed from which it is being installed. The form of the guid is
+ the tool in the Galaxy tool shed from which it is being installed. The form of the guid is
<tool shed host>/repos/<repository owner>/<repository name>/<tool id>/<tool version>
"""
tmp_url = clean_repository_clone_url( repository_clone_url )
@@ -417,7 +417,7 @@
tool_config = tool_dict[ 'tool_config' ]
file_name = strip_path( tool_config )
guids_and_configs[ guid ] = file_name
- # Parse the shed_tool_conf file in which all of this repository's tools are defined and generate the tool_panel_dict.
+ # Parse the shed_tool_conf file in which all of this repository's tools are defined and generate the tool_panel_dict.
tree, error_message = xml_util.parse_xml( shed_tool_conf )
if tree is None:
return tool_panel_dict
@@ -535,7 +535,7 @@
Send a request to the tool shed to retrieve the ctx_rev for a repository defined by the combination of a name, owner and changeset
revision.
"""
- url = url_join( tool_shed_url,
+ url = url_join( tool_shed_url,
'repository/get_ctx_rev?name=%s&owner=%s&changeset_revision=%s' % ( name, owner, changeset_revision ) )
ctx_rev = common_util.tool_shed_get( app, tool_shed_url, url )
return ctx_rev
@@ -1385,7 +1385,7 @@
def reset_previously_installed_repository( trans, repository ):
"""
- Reset the atrributes of a tool_shed_repository that was previsouly installed. The repository will be in some state other than with a
+ Reset the atrributes of a tool_shed_repository that was previsouly installed. The repository will be in some state other than with a
status of INSTALLED, so all atributes will be set to the default (NEW( state. This will enable the repository to be freshly installed.
"""
repository.deleted = False
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/f33687b0e590/
Changeset: f33687b0e590
User: dannon
Date: 2013-08-30 05:32:37
Summary: Strip trailing whitespace in data_providers
Affected #: 1 file
diff -r c05e0e9714f5bd2ecfa96cfd6b6f7152b815e82f -r f33687b0e59013a924a696f1c9630cae960b74a2 lib/galaxy/visualization/data_providers/basic.py
--- a/lib/galaxy/visualization/data_providers/basic.py
+++ b/lib/galaxy/visualization/data_providers/basic.py
@@ -15,37 +15,37 @@
self.original_dataset = original_dataset
self.dependencies = dependencies
self.error_max_vals = error_max_vals
-
+
def has_data( self, **kwargs ):
"""
Returns true if dataset has data in the specified genome window, false
otherwise.
"""
raise Exception( "Unimplemented Function" )
-
+
def get_iterator( self, **kwargs ):
"""
Returns an iterator that provides data in the region chrom:start-end
"""
raise Exception( "Unimplemented Function" )
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Process data from an iterator to a format that can be provided to client.
"""
- raise Exception( "Unimplemented Function" )
-
+ raise Exception( "Unimplemented Function" )
+
def get_data( self, chrom, start, end, start_val=0, max_vals=sys.maxint, **kwargs ):
- """
- Returns data as specified by kwargs. start_val is the first element to
+ """
+ Returns data as specified by kwargs. start_val is the first element to
return and max_vals indicates the number of values to return.
-
+
Return value must be a dictionary with the following attributes:
dataset_type, data
"""
iterator = self.get_iterator( chrom, start, end )
return self.process_data( iterator, start_val, max_vals, **kwargs )
-
+
def write_data_to_file( self, filename, **kwargs ):
"""
Write data in region defined by chrom, start, and end to a file.
@@ -56,20 +56,20 @@
class ColumnDataProvider( BaseDataProvider ):
""" Data provider for columnar data """
MAX_LINES_RETURNED = 30000
-
+
def __init__( self, original_dataset, max_lines_returned=MAX_LINES_RETURNED ):
# Compatibility check.
if not isinstance( original_dataset.datatype, Tabular ):
raise Exception( "Data provider can only be used with tabular data" )
-
+
# Attribute init.
self.original_dataset = original_dataset
# allow throttling
self.max_lines_returned = max_lines_returned
-
+
def get_data( self, columns=None, start_val=0, max_vals=None, skip_comments=True, **kwargs ):
"""
- Returns data from specified columns in dataset. Format is list of lists
+ Returns data from specified columns in dataset. Format is list of lists
where each list is a line of data.
"""
if not columns:
@@ -81,20 +81,20 @@
max_vals = min([ max_vals, self.max_lines_returned ])
except ( ValueError, TypeError ):
max_vals = self.max_lines_returned
-
+
try:
start_val = int( start_val )
start_val = max([ start_val, 0 ])
except ( ValueError, TypeError ):
start_val = 0
-
+
# skip comment lines (if any/avail)
# pre: should have original_dataset and
if( skip_comments
and self.original_dataset.metadata.comment_lines
and start_val < self.original_dataset.metadata.comment_lines ):
start_val = int( self.original_dataset.metadata.comment_lines )
-
+
# columns is an array of ints for now (should handle column names later)
columns = from_json_string( columns )
for column in columns:
@@ -103,7 +103,7 @@
"column index (%d) must be positive and less" % ( column )
+ " than the number of columns: %d" % ( self.original_dataset.metadata.columns ) )
#print columns, start_val, max_vals, skip_comments, kwargs
-
+
# set up the response, column lists
response = {}
response[ 'data' ] = data = [ [] for column in columns ]
@@ -113,9 +113,9 @@
'count' : 0,
'sum' : 0
} for column in columns ]
-
+
column_types = [ self.original_dataset.metadata.column_types[ column ] for column in columns ]
-
+
# function for casting by column_types
def cast_val( val, type ):
""" Cast value based on type. Return None if can't be cast """
@@ -126,12 +126,12 @@
try: val = float( val )
except: return None
return val
-
+
returning_data = False
f = open( self.original_dataset.file_name )
#TODO: add f.seek if given fptr in kwargs
for count, line in enumerate( f ):
-
+
# check line v. desired start, end
if count < start_val:
continue
@@ -139,7 +139,7 @@
break
returning_data = True
-
+
fields = line.split()
fields_len = len( fields )
#NOTE: this will return None/null for abberrant column values (including bad indeces)
@@ -149,39 +149,39 @@
if column < fields_len:
column_val = cast_val( fields[ column ], column_type )
if column_val != None:
-
+
# if numeric, maintain min, max, sum
if( column_type == 'float' or column_type == 'int' ):
if( ( meta[ index ][ 'min' ] == None ) or ( column_val < meta[ index ][ 'min' ] ) ):
meta[ index ][ 'min' ] = column_val
-
+
if( ( meta[ index ][ 'max' ] == None ) or ( column_val > meta[ index ][ 'max' ] ) ):
meta[ index ][ 'max' ] = column_val
-
+
meta[ index ][ 'sum' ] += column_val
-
+
# maintain a count - for other stats
meta[ index ][ 'count' ] += 1
data[ index ].append( column_val )
-
+
response[ 'endpoint' ] = dict( last_line=( count - 1 ), file_ptr=f.tell() )
f.close()
if not returning_data: return None
-
+
for index, meta in enumerate( response[ 'meta' ] ):
column_type = column_types[ index ]
count = meta[ 'count' ]
-
+
if( ( column_type == 'float' or column_type == 'int' )
and count ):
meta[ 'mean' ] = float( meta[ 'sum' ] ) / count
-
+
sorted_data = sorted( response[ 'data' ][ index ] )
middle_index = ( count / 2 ) - 1
if count % 2 == 0:
meta[ 'median' ] = ( ( sorted_data[ middle_index ] + sorted_data[( middle_index + 1 )] ) / 2.0 )
-
+
else:
meta[ 'median' ] = sorted_data[ middle_index ]
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/c05e0e9714f5/
Changeset: c05e0e9714f5
User: dannon
Date: 2013-08-30 05:31:55
Summary: Realizing it's abstract, still require appropriate inputs to get_data in BaseDataProvider
Affected #: 1 file
diff -r c9b77dbc13165b020dfbe67b88faa99e5bf4d9fb -r c05e0e9714f5bd2ecfa96cfd6b6f7152b815e82f lib/galaxy/visualization/data_providers/basic.py
--- a/lib/galaxy/visualization/data_providers/basic.py
+++ b/lib/galaxy/visualization/data_providers/basic.py
@@ -35,7 +35,7 @@
"""
raise Exception( "Unimplemented Function" )
- def get_data( self, start_val=0, max_vals=sys.maxint, **kwargs ):
+ def get_data( self, chrom, start, end, start_val=0, max_vals=sys.maxint, **kwargs ):
"""
Returns data as specified by kwargs. start_val is the first element to
return and max_vals indicates the number of values to return.
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
2 new commits in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/7038567aedc2/
Changeset: 7038567aedc2
User: dannon
Date: 2013-08-30 05:24:24
Summary: Add missing os import to ngsindex.py
Affected #: 1 file
diff -r 068acf051f9acfb8058f2bc50b0361d9a59d8cdb -r 7038567aedc2978d43386838441d3536a3d34650 lib/galaxy/datatypes/ngsindex.py
--- a/lib/galaxy/datatypes/ngsindex.py
+++ b/lib/galaxy/datatypes/ngsindex.py
@@ -1,7 +1,9 @@
"""
NGS indexes
"""
+import os
import logging
+
from metadata import MetadataElement
from images import Html
https://bitbucket.org/galaxy/galaxy-central/commits/c9b77dbc1316/
Changeset: c9b77dbc1316
User: dannon
Date: 2013-08-30 05:26:57
Summary: Data manager element loading assertion would fail if executed; use elem.tag and not root.tag
Affected #: 1 file
diff -r 7038567aedc2978d43386838441d3536a3d34650 -r c9b77dbc13165b020dfbe67b88faa99e5bf4d9fb lib/galaxy/tools/data_manager/manager.py
--- a/lib/galaxy/tools/data_manager/manager.py
+++ b/lib/galaxy/tools/data_manager/manager.py
@@ -2,7 +2,7 @@
pkg_resources.require( "simplejson" )
-import os, shutil, errno
+import os, errno
import simplejson
from galaxy import util
@@ -112,7 +112,7 @@
if elem is not None:
self.load_from_element( elem, tool_path or self.data_managers.tool_path )
def load_from_element( self, elem, tool_path ):
- assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( root.tag )
+ assert elem.tag == 'data_manager', 'A data manager configuration must have a "data_manager" tag as the root. "%s" is present' % ( elem.tag )
self.declared_id = elem.get( 'id', None )
self.guid = elem.get( 'guid', None )
path = elem.get( 'tool_file', None )
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
5 new commits in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/fcabfb819232/
Changeset: fcabfb819232
User: dannon
Date: 2013-08-30 05:06:53
Summary: Explicitly require source argument to RawBedDataProvider.get_iterator instead of relying on it in kwargs
Affected #: 1 file
diff -r facc879fe0543f25e6b4d65e3e5d5efe716ff455 -r fcabfb81923220651c0dd95181eed24d1b21ac68 lib/galaxy/visualization/data_providers/genome.py
--- a/lib/galaxy/visualization/data_providers/genome.py
+++ b/lib/galaxy/visualization/data_providers/genome.py
@@ -568,7 +568,7 @@
for large datasets.
"""
- def get_iterator( self, chrom=None, start=None, end=None, **kwargs ):
+ def get_iterator( self, source, chrom=None, start=None, end=None, **kwargs ):
# Read first line in order to match chrom naming format.
line = source.readline()
dataset_chrom = line.split()[0]
https://bitbucket.org/galaxy/galaxy-central/commits/3314e402ebaf/
Changeset: 3314e402ebaf
User: dannon
Date: 2013-08-30 05:10:02
Summary: Add missing import parse_gff_attributes to genome data provider
Affected #: 1 file
diff -r fcabfb81923220651c0dd95181eed24d1b21ac68 -r 3314e402ebaf326d57665615ba0e2e7b83dcc330 lib/galaxy/visualization/data_providers/genome.py
--- a/lib/galaxy/visualization/data_providers/genome.py
+++ b/lib/galaxy/visualization/data_providers/genome.py
@@ -9,7 +9,7 @@
pkg_resources.require( "pysam" )
pkg_resources.require( "numpy" )
import numpy
-from galaxy.datatypes.util.gff_util import GFFReaderWrapper, GFFInterval, GFFFeature, convert_gff_coords_to_bed
+from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed, GFFFeature, GFFInterval, GFFReaderWrapper, parse_gff_attributes
from galaxy.util.json import from_json_string
from bx.interval_index_file import Indexes
from bx.bbi.bigwig_file import BigWigFile
https://bitbucket.org/galaxy/galaxy-central/commits/4b86e65ee645/
Changeset: 4b86e65ee645
User: dannon
Date: 2013-08-30 05:14:17
Summary: Strip unused imports (and trailing whitespace) from genome data provider
Affected #: 1 file
diff -r 3314e402ebaf326d57665615ba0e2e7b83dcc330 -r 4b86e65ee645caa6b5923b05e759a06a9f06113f lib/galaxy/visualization/data_providers/genome.py
--- a/lib/galaxy/visualization/data_providers/genome.py
+++ b/lib/galaxy/visualization/data_providers/genome.py
@@ -3,7 +3,6 @@
"""
import os, sys, re
-from math import ceil, log
import pkg_resources
pkg_resources.require( "bx-python" )
pkg_resources.require( "pysam" )
@@ -14,7 +13,6 @@
from bx.interval_index_file import Indexes
from bx.bbi.bigwig_file import BigWigFile
from bx.bbi.bigbed_file import BigBedFile
-from galaxy.util.lrucache import LRUCache
from galaxy.visualization.data_providers.basic import BaseDataProvider
from galaxy.visualization.data_providers.cigar import get_ref_based_read_seq_and_cigar
from galaxy.datatypes.interval import Bed, Gff, Gtf
@@ -33,7 +31,7 @@
return None
else:
return float(n)
-
+
def get_bounds( reads, start_pos_index, end_pos_index ):
'''
Returns the minimum and maximum position for a set of reads.
@@ -76,7 +74,7 @@
line_len = int( textloc_file.readline() )
file_len = os.path.getsize( self.converted_dataset.file_name )
query = query.lower()
-
+
# Find query in file using binary search.
low = 0
high = file_len / line_len
@@ -91,42 +89,42 @@
low = mid + 1
else:
high = mid
-
+
position = low * line_len
-
+
# At right point in file, generate hits.
result = []
while True:
line = textloc_file.readline()
- if not line.startswith( query ):
+ if not line.startswith( query ):
break
- if line[ -1: ] == '\n':
+ if line[ -1: ] == '\n':
line = line[ :-1 ]
result.append( line.split()[1:] )
- textloc_file.close()
+ textloc_file.close()
return result
-
+
class GenomeDataProvider( BaseDataProvider ):
- """
- Base class for genome data providers. All genome providers use BED coordinate
+ """
+ Base class for genome data providers. All genome providers use BED coordinate
format (0-based, half-open coordinates) for both queries and returned data.
"""
dataset_type = None
-
- """
+
+ """
Mapping from column name to payload data; this mapping is used to create
- filters. Key is column name, value is a dict with mandatory key 'index' and
+ filters. Key is column name, value is a dict with mandatory key 'index' and
optional key 'name'. E.g. this defines column 4
col_name_data_attr_mapping = {4 : { index: 5, name: 'Score' } }
"""
col_name_data_attr_mapping = {}
-
+
def __init__( self, converted_dataset=None, original_dataset=None, dependencies=None,
error_max_vals="Only the first %i %s in this region are displayed." ):
- super( GenomeDataProvider, self ).__init__( converted_dataset=converted_dataset,
+ super( GenomeDataProvider, self ).__init__( converted_dataset=converted_dataset,
original_dataset=original_dataset,
dependencies=dependencies,
error_max_vals=error_max_vals )
@@ -135,44 +133,44 @@
# queries, such as is necessary for genome-wide data.
# TODO: add functions to (a) create data_file and (b) clean up data_file.
self.data_file = None
-
+
def write_data_to_file( self, regions, filename ):
"""
Write data in region defined by chrom, start, and end to a file.
"""
raise Exception( "Unimplemented Function" )
-
+
def valid_chroms( self ):
"""
Returns chroms/contigs that the dataset contains
"""
return None # by default
-
+
def has_data( self, chrom, start, end, **kwargs ):
"""
Returns true if dataset has data in the specified genome window, false
otherwise.
"""
raise Exception( "Unimplemented Function" )
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
"""
Returns an iterator that provides data in the region chrom:start-end
"""
raise Exception( "Unimplemented Function" )
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Process data from an iterator to a format that can be provided to client.
"""
- raise Exception( "Unimplemented Function" )
-
+ raise Exception( "Unimplemented Function" )
+
def get_data( self, chrom=None, low=None, high=None, start_val=0, max_vals=sys.maxint, **kwargs ):
- """
+ """
Returns data in region defined by chrom, start, and end. start_val and
- max_vals are used to denote the data to return: start_val is the first element to
+ max_vals are used to denote the data to return: start_val is the first element to
return and max_vals indicates the number of values to return.
-
+
Return value must be a dictionary with the following attributes:
dataset_type, data
"""
@@ -204,12 +202,12 @@
'dataset_type': self.dataset_type
}
-
+
def get_filters( self ):
- """
- Returns filters for provider's data. Return value is a list of
+ """
+ Returns filters for provider's data. Return value is a list of
filters; each filter is a dictionary with the keys 'name', 'index', 'type'.
- NOTE: This method uses the original dataset's datatype and metadata to
+ NOTE: This method uses the original dataset's datatype and metadata to
create the filters.
"""
# Get column names.
@@ -220,18 +218,18 @@
column_names = range( self.original_dataset.metadata.columns )
except: # Give up
return []
-
+
# Dataset must have column types; if not, cannot create filters.
try:
column_types = self.original_dataset.metadata.column_types
except AttributeError:
return []
-
+
# Create and return filters.
filters = []
if self.original_dataset.metadata.viz_filter_cols:
for viz_col_index in self.original_dataset.metadata.viz_filter_cols:
- # Some columns are optional, so can't assume that a filter
+ # Some columns are optional, so can't assume that a filter
# column is in dataset.
if viz_col_index >= len( column_names ):
continue;
@@ -248,7 +246,7 @@
def get_default_max_vals( self ):
return 5000
-
+
#
# -- Base mixins and providers --
#
@@ -256,26 +254,26 @@
class FilterableMixin:
def get_filters( self ):
""" Returns a dataset's filters. """
-
+
# is_ functions taken from Tabular.set_meta
def is_int( column_text ):
try:
int( column_text )
return True
- except:
+ except:
return False
def is_float( column_text ):
try:
float( column_text )
return True
- except:
+ except:
if column_text.strip().lower() == 'na':
return True #na is special cased to be a float
return False
-
+
#
# Get filters.
- # TODOs:
+ # TODOs:
# (a) might be useful to move this into each datatype's set_meta method;
# (b) could look at first N lines to ensure GTF attribute types are consistent.
#
@@ -284,9 +282,9 @@
filter_col = 8
if isinstance( self.original_dataset.datatype, Gff ):
# Can filter by score and GTF attributes.
- filters = [ { 'name': 'Score',
- 'type': 'number',
- 'index': filter_col,
+ filters = [ { 'name': 'Score',
+ 'type': 'number',
+ 'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c6' } ]
filter_col += 1
@@ -294,10 +292,10 @@
# Create filters based on dataset metadata.
for name, a_type in self.original_dataset.metadata.attribute_types.items():
if a_type in [ 'int', 'float' ]:
- filters.append(
+ filters.append(
{ 'name': name,
- 'type': 'number',
- 'index': filter_col,
+ 'type': 'number',
+ 'index': filter_col,
'tool_id': 'gff_filter_by_attribute',
'tool_exp_name': name } )
filter_col += 1
@@ -324,9 +322,9 @@
'''
elif isinstance( self.original_dataset.datatype, Bed ):
# Can filter by score column only.
- filters = [ { 'name': 'Score',
- 'type': 'number',
- 'index': filter_col,
+ filters = [ { 'name': 'Score',
+ 'type': 'number',
+ 'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c5'
} ]
@@ -340,19 +338,19 @@
"""
Tabix index data provider for the Galaxy track browser.
"""
-
+
col_name_data_attr_mapping = { 4 : { 'index': 4 , 'name' : 'Score' } }
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
start, end = int(start), int(end)
if end >= (2<<29):
end = (2<<29 - 1) # Tabix-enforced maximum
-
+
bgzip_fname = self.dependencies['bgzip'].file_name
-
+
if not self.data_file:
self.data_file = ctabix.Tabixfile(bgzip_fname, index_filename=self.converted_dataset.file_name)
-
+
# Get iterator using either naming scheme.
iterator = iter( [] )
if chrom in self.data_file.contigs:
@@ -365,10 +363,10 @@
return iterator
-
+
def write_data_to_file( self, regions, filename ):
out = open( filename, "w" )
-
+
for region in regions:
# Write data in region.
chrom = region.chrom
@@ -377,7 +375,7 @@
iterator = self.get_iterator( chrom, start, end )
for line in iterator:
out.write( "%s\n" % line )
-
+
out.close()
#
@@ -389,20 +387,20 @@
"""
Processes interval data from native format to payload format.
-
+
Payload format: [ uid (offset), start, end, name, strand, thick_start, thick_end, blocks ]
"""
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
raise Exception( "Unimplemented Function" )
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Provides
"""
# Build data to return. Payload format is:
# [ <guid/offset>, <start>, <end>, <name>, <strand> ]
- #
+ #
# First three entries are mandatory, others are optional.
#
filter_cols = from_json_string( kwargs.get( "filter_cols", "[]" ) )
@@ -421,7 +419,7 @@
if max_vals and count-start_val >= max_vals:
message = self.error_max_vals % ( max_vals, "features" )
break
-
+
feature = line.split()
length = len(feature)
# Unique id is just a hash of the line
@@ -439,7 +437,7 @@
if not name_col: payload.append( "" )
payload.append( feature[strand_col] )
- # Score (filter data)
+ # Score (filter data)
if length >= 5 and filter_cols and filter_cols[0] == "Score":
try:
payload.append( float( feature[4] ) )
@@ -467,23 +465,23 @@
class BedDataProvider( GenomeDataProvider ):
"""
Processes BED data from native format to payload format.
-
+
Payload format: [ uid (offset), start, end, name, strand, thick_start, thick_end, blocks ]
"""
dataset_type = 'interval_index'
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
raise Exception( "Unimplemented Method" )
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Provides
"""
# Build data to return. Payload format is:
- # [ <guid/offset>, <start>, <end>, <name>, <strand>, <thick_start>,
+ # [ <guid/offset>, <start>, <end>, <name>, <strand>, <thick_start>,
# <thick_end>, <blocks> ]
- #
+ #
# First three entries are mandatory, others are optional.
#
filter_cols = from_json_string( kwargs.get( "filter_cols", "[]" ) )
@@ -524,10 +522,10 @@
blocks = zip( block_sizes, block_starts )
payload.append( [ ( int(feature[1]) + block[1], int(feature[1]) + block[1] + block[0] ) for block in blocks ] )
- # Score (filter data)
+ # Score (filter data)
if length >= 5 and filter_cols and filter_cols[0] == "Score":
- # If dataset doesn't have name/strand/thick start/thick end/blocks,
- # add placeholders. There should be 8 entries if all attributes
+ # If dataset doesn't have name/strand/thick start/thick end/blocks,
+ # add placeholders. There should be 8 entries if all attributes
# are present.
payload.extend( [ None for i in range( 8 - len( payload ) ) ] )
@@ -542,7 +540,7 @@
def write_data_to_file( self, regions, filename ):
out = open( filename, "w" )
-
+
for region in regions:
# Write data in region.
chrom = region.chrom
@@ -551,15 +549,15 @@
iterator = self.get_iterator( chrom, start, end )
for line in iterator:
out.write( "%s\n" % line )
-
+
out.close()
-
+
class BedTabixDataProvider( TabixDataProvider, BedDataProvider ):
"""
Provides data from a BED file indexed via tabix.
"""
pass
-
+
class RawBedDataProvider( BedDataProvider ):
"""
Provide data from BED file.
@@ -590,7 +588,7 @@
or ( end is not None and feature_end < start ):
continue
yield line
-
+
return line_filter_iter()
#
@@ -601,10 +599,10 @@
"""
Abstract class that processes VCF data from native format to payload format.
- Payload format: An array of entries for each locus in the file. Each array
+ Payload format: An array of entries for each locus in the file. Each array
has the following entries:
1. GUID (unused)
- 2. location (0-based)
+ 2. location (0-based)
3. reference base(s)
4. alternative base(s)
5. quality score
@@ -613,20 +611,20 @@
denotes the reference genotype
8-end: allele counts for each alternative
"""
-
+
col_name_data_attr_mapping = { 'Qual' : { 'index': 6 , 'name' : 'Qual' } }
dataset_type = 'variant'
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Returns a dict with the following attributes::
- data - a list of variants with the format
+ data - a list of variants with the format
.. raw:: text
- [<guid>, <start>, <end>, <name>, cigar, seq]
+ [<guid>, <start>, <end>, <name>, cigar, seq]
message - error/informative message
@@ -636,8 +634,8 @@
def get_mapping( ref, alt ):
"""
- Returns ( offset, new_seq, cigar ) tuple that defines mapping of
- alt to ref. Cigar format is an array of [ op_index, length ] pairs
+ Returns ( offset, new_seq, cigar ) tuple that defines mapping of
+ alt to ref. Cigar format is an array of [ op_index, length ] pairs
where op_index is the 0-based index into the string "MIDNSHP=X"
"""
@@ -676,7 +674,7 @@
samples_data = feature [ 9: ]
# VCF is 1-based.
pos = int( pos ) - 1
-
+
# FIXME: OK to skip?
if alt == '.':
count -= 1
@@ -707,7 +705,7 @@
has_alleles = True
except ValueError:
pass
-
+
# If no alleles, use empty string as proxy.
if not has_alleles:
genotype = ''
@@ -732,7 +730,7 @@
def write_data_to_file( self, regions, filename ):
out = open( filename, "w" )
-
+
for region in regions:
# Write data in region.
chrom = region.chrom
@@ -747,7 +745,7 @@
"""
Provides data from a VCF file indexed via tabix.
"""
-
+
dataset_type = 'variant'
class RawVcfDataProvider( VcfDataProvider ):
@@ -797,17 +795,17 @@
for data_line in source:
if line_in_region( data_line, chrom, start, end ):
yield data_line
-
+
return line_filter_iter()
class BamDataProvider( GenomeDataProvider, FilterableMixin ):
"""
- Provides access to intervals from a sorted indexed BAM file. Coordinate
+ Provides access to intervals from a sorted indexed BAM file. Coordinate
data is reported in BED format: 0-based, half-open.
"""
dataset_type = 'bai'
-
+
def get_filters( self ):
"""
Returns filters for dataset.
@@ -815,31 +813,31 @@
# HACK: first 7 fields are for drawing, so start filter column index at 7.
filter_col = 7
filters = []
- filters.append( { 'name': 'Mapping Quality',
- 'type': 'number',
+ filters.append( { 'name': 'Mapping Quality',
+ 'type': 'number',
'index': filter_col
} )
return filters
-
-
+
+
def write_data_to_file( self, regions, filename ):
"""
Write reads in regions to file.
"""
-
+
# Open current BAM file using index.
bamfile = csamtools.Samfile( filename=self.original_dataset.file_name, mode='rb', \
index_filename=self.converted_dataset.file_name )
# TODO: write headers as well?
new_bamfile = csamtools.Samfile( template=bamfile, filename=filename, mode='wb' )
-
+
for region in regions:
# Write data from region.
chrom = region.chrom
start = region.start
end = region.end
-
+
try:
data = bamfile.fetch(start=start, end=end, reference=chrom)
except ValueError, e:
@@ -853,11 +851,11 @@
# Write reads in region.
for i, read in enumerate( data ):
new_bamfile.write( read )
-
+
# Cleanup.
new_bamfile.close()
bamfile.close()
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
"""
Returns an iterator that provides data in the region chrom:start-end
@@ -865,7 +863,7 @@
start, end = int( start ), int( end )
orig_data_filename = self.original_dataset.file_name
index_filename = self.converted_dataset.file_name
-
+
# Attempt to open the BAM file with index
bamfile = csamtools.Samfile( filename=orig_data_filename, mode='rb', index_filename=index_filename )
try:
@@ -878,12 +876,12 @@
except ValueError:
return None
return data
-
+
def process_data( self, iterator, start_val=0, max_vals=None, ref_seq=None, start=0, **kwargs ):
"""
Returns a dict with the following attributes::
- data - a list of reads with the format
+ data - a list of reads with the format
[<guid>, <start>, <end>, <name>, <read_1>, <read_2>, [empty], <mapq_scores>]
where <read_1> has the format
@@ -895,10 +893,10 @@
Field 7 is empty so that mapq scores' location matches that in single-end reads.
For single-end reads, read has format:
[<guid>, <start>, <end>, <name>, <cigar>, <strand>, <seq>, <mapq_score>]
-
+
NOTE: read end and sequence data are not valid for reads outside of
requested region and should not be used.
-
+
max_low - lowest coordinate for the returned reads
max_high - highest coordinate for the returned reads
message - error/informative message
@@ -919,7 +917,7 @@
return "+"
else:
return "-"
-
+
#
# Encode reads as list of lists.
#
@@ -933,13 +931,13 @@
if ( count - start_val - unmapped ) >= max_vals:
message = self.error_max_vals % ( max_vals, "reads" )
break
-
+
# If not mapped, skip read.
is_mapped = ( read.flag & 0x0004 == 0 )
if not is_mapped:
unmapped += 1
continue
-
+
qname = read.qname
seq = read.seq
strand = decode_strand( read.flag, 0x0010 )
@@ -951,11 +949,11 @@
if read.is_proper_pair:
if qname in paired_pending: # one in dict is always first
pair = paired_pending[qname]
- results.append( [ "%i_%s" % ( pair['start'], qname ),
- pair['start'],
- read.pos + read_len,
- qname,
- [ pair['start'], pair['end'], pair['cigar'], pair['strand'], pair['seq'] ],
+ results.append( [ "%i_%s" % ( pair['start'], qname ),
+ pair['start'],
+ read.pos + read_len,
+ qname,
+ [ pair['start'], pair['end'], pair['cigar'], pair['strand'], pair['seq'] ],
[ read.pos, read.pos + read_len, read.cigar, strand, seq ],
None, [ pair['mapq'], read.mapq ]
] )
@@ -964,10 +962,10 @@
paired_pending[qname] = { 'start': read.pos, 'end': read.pos + read_len, 'seq': seq, 'mate_start': read.mpos,
'rlen': read_len, 'strand': strand, 'cigar': read.cigar, 'mapq': read.mapq }
else:
- results.append( [ "%i_%s" % ( read.pos, qname ),
- read.pos, read.pos + read_len, qname,
+ results.append( [ "%i_%s" % ( read.pos, qname ),
+ read.pos, read.pos + read_len, qname,
read.cigar, strand, read.seq, read.mapq ] )
-
+
# Take care of reads whose mates are out of range.
# TODO: count paired reads when adhering to max_vals?
for qname, read in paired_pending.iteritems():
@@ -989,7 +987,7 @@
r2 = [ read['mate_start'], read['mate_start'] ]
results.append( [ "%i_%s" % ( read_start, qname ), read_start, read_end, qname, r1, r2, [read[ 'mapq' ], 125] ] )
-
+
# Clean up. TODO: is this needed? If so, we'll need a cleanup function after processing the data.
# bamfile.close()
@@ -999,10 +997,10 @@
'''
Process a read using the designated fields.
'''
- read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(),
- read[ start_field ],
- ref_seq,
- start,
+ read_seq, read_cigar = get_ref_based_read_seq_and_cigar( read[ seq_field ].upper(),
+ read[ start_field ],
+ ref_seq,
+ start,
read[ cigar_field ] )
read[ seq_field ] = read_seq
read[ cigar_field ] = read_cigar
@@ -1012,7 +1010,7 @@
Process single-end read.
'''
process_read( read, 1, 4, 6)
-
+
def process_pe_read( read ):
'''
Process paired-end read.
@@ -1034,28 +1032,28 @@
process_se_read( read )
max_low, max_high = get_bounds( results, 1, 2 )
-
+
return { 'data': results, 'message': message, 'max_low': max_low, 'max_high': max_high }
-
+
class SamDataProvider( BamDataProvider ):
dataset_type = 'bai'
-
+
def __init__( self, converted_dataset=None, original_dataset=None, dependencies=None ):
""" Create SamDataProvider. """
super( SamDataProvider, self ).__init__( converted_dataset=converted_dataset,
original_dataset=original_dataset,
dependencies=dependencies )
-
- # To use BamDataProvider, original dataset must be BAM and
+
+ # To use BamDataProvider, original dataset must be BAM and
# converted dataset must be BAI. Use BAI from BAM metadata.
if converted_dataset:
self.original_dataset = converted_dataset
self.converted_dataset = converted_dataset.metadata.bam_index
-
+
class BBIDataProvider( GenomeDataProvider ):
"""
- BBI data provider for the Galaxy track browser.
+ BBI data provider for the Galaxy track browser.
"""
dataset_type = 'bigwig'
@@ -1063,7 +1061,7 @@
def valid_chroms( self ):
# No way to return this info as of now
return None
-
+
def has_data( self, chrom ):
f, bbi = self._get_dataset()
all_dat = bbi.query( chrom, 0, 2147483647, 1 ) or \
@@ -1081,18 +1079,18 @@
return bbi.summarize( chrom, start, end, num_points ) or \
bbi.summarize( _convert_between_ucsc_and_ensemble_naming( chrom ) , start, end, num_points )
- # Bigwig can be a standalone bigwig file, in which case we use
- # original_dataset, or coming from wig->bigwig conversion in
+ # Bigwig can be a standalone bigwig file, in which case we use
+ # original_dataset, or coming from wig->bigwig conversion in
# which we use converted_dataset
f, bbi = self._get_dataset()
-
+
# If stats requested, compute overall summary data for the range
- # start:endbut no reduced data. This is currently used by client
+ # start:endbut no reduced data. This is currently used by client
# to determine the default range.
if 'stats' in kwargs:
summary = _summarize_bbi( bbi, chrom, start, end, 1 )
f.close()
-
+
min_val = 0
max_val = 0
mean = 0
@@ -1127,12 +1125,12 @@
summary = _summarize_bbi( bbi, chrom, start, end, num_points )
if summary:
#mean = summary.sum_data / summary.valid_count
-
+
## Standard deviation by bin, not yet used
## var = summary.sum_squares - mean
## var /= minimum( valid_count - 1, 1 )
## sd = sqrt( var )
-
+
pos = start
step_size = (end - start) / num_points
@@ -1150,34 +1148,34 @@
num_points = end - start + 1
end += 1
else:
- #
- # The goal is to sample the region between start and end uniformly
- # using ~N (num_samples) data points. The challenge is that the size of
- # sampled intervals rarely is full bases, so sampling using N points
- # will leave the end of the region unsampled due to remainders for
- # each interval. To recitify this, a new N is calculated based on the
+ #
+ # The goal is to sample the region between start and end uniformly
+ # using ~N (num_samples) data points. The challenge is that the size of
+ # sampled intervals rarely is full bases, so sampling using N points
+ # will leave the end of the region unsampled due to remainders for
+ # each interval. To recitify this, a new N is calculated based on the
# step size that covers as much of the region as possible.
#
- # However, this still leaves some of the region unsampled. This
- # could be addressed by repeatedly sampling remainder using a
- # smaller and smaller step_size, but that would require iteratively
+ # However, this still leaves some of the region unsampled. This
+ # could be addressed by repeatedly sampling remainder using a
+ # smaller and smaller step_size, but that would require iteratively
# going to BBI, which could be time consuming.
#
# Start with N samples.
num_points = num_samples
step_size = ( end - start ) / num_points
- # Add additional points to sample in the remainder not covered by
+ # Add additional points to sample in the remainder not covered by
# the initial N samples.
remainder_start = start + step_size * num_points
additional_points = ( end - remainder_start ) / step_size
num_points += additional_points
-
+
result = summarize_region( bbi, chrom, start, end, num_points )
-
+
# Cleanup and return.
f.close()
- return {
+ return {
'data': result,
'dataset_type': self.dataset_type
}
@@ -1190,7 +1188,7 @@
class BigWigDataProvider ( BBIDataProvider ):
"""
- Provides data from BigWig files; position data is reported in 1-based
+ Provides data from BigWig files; position data is reported in 1-based
coordinate system, i.e. wiggle format.
"""
def _get_dataset( self ):
@@ -1199,7 +1197,7 @@
else:
f = open( self.original_dataset.file_name )
return f, BigWigFile(file=f)
-
+
class IntervalIndexDataProvider( FilterableMixin, GenomeDataProvider ):
"""
Interval index files used for GFF, Pileup files.
@@ -1207,7 +1205,7 @@
col_name_data_attr_mapping = { 4 : { 'index': 4 , 'name' : 'Score' } }
dataset_type = 'interval_index'
-
+
def write_data_to_file( self, regions, filename ):
source = open( self.original_dataset.file_name )
index = Indexes( self.converted_dataset.file_name )
@@ -1230,10 +1228,10 @@
feature = reader.next()
for interval in feature.intervals:
out.write( '\t'.join( interval.fields ) + '\n' )
-
+
source.close()
out.close()
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
"""
Returns an array with values: (a) source file and (b) an iterator that
@@ -1246,7 +1244,7 @@
if chrom not in index.indexes:
# Try alternative naming.
chrom = _convert_between_ucsc_and_ensemble_naming( chrom )
-
+
return index.find(chrom, start, end)
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
@@ -1258,7 +1256,7 @@
# Build data to return. Payload format is:
# [ <guid/offset>, <start>, <end>, <name>, <score>, <strand>, <thick_start>,
# <thick_end>, <blocks> ]
- #
+ #
# First three entries are mandatory, others are optional.
#
filter_cols = from_json_string( kwargs.get( "filter_cols", "[]" ) )
@@ -1272,7 +1270,7 @@
break
source.seek( offset )
# TODO: can we use column metadata to fill out payload?
-
+
# GFF dataset.
reader = GFFReaderWrapper( source, fix_strand=True )
feature = reader.next()
@@ -1286,13 +1284,13 @@
class RawGFFDataProvider( GenomeDataProvider ):
"""
Provide data from GFF file that has not been indexed.
-
+
NOTE: this data provider does not use indices, and hence will be very slow
for large datasets.
"""
dataset_type = 'interval_index'
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
"""
Returns an iterator that provides data in the region chrom:start-end as well as
@@ -1302,18 +1300,18 @@
# Read first line in order to match chrom naming format.
line = source.readline()
-
+
# If line empty, assume file is empty and return empty iterator.
if len( line ) == 0:
return iter([])
-
+
# Determine chromosome naming format.
dataset_chrom = line.split()[0]
if not _chrom_naming_matches( chrom, dataset_chrom ):
chrom = _convert_between_ucsc_and_ensemble_naming( chrom )
# Undo read.
source.seek( 0 )
-
+
def features_in_region_iter():
offset = 0
for feature in GFFReaderWrapper( source, fix_strand=True ):
@@ -1324,7 +1322,7 @@
offset += feature.raw_size
return features_in_region_iter()
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Process data from an iterator to a format that can be provided to client.
@@ -1340,22 +1338,22 @@
if count-start_val >= max_vals:
message = self.error_max_vals % ( max_vals, "reads" )
break
-
+
payload = package_gff_feature( feature, no_detail=no_detail, filter_cols=filter_cols )
payload.insert( 0, offset )
results.append( payload )
-
+
return { 'data': results, 'dataset_type': self.dataset_type, 'message': message }
-
+
class GtfTabixDataProvider( TabixDataProvider ):
"""
Returns data from GTF datasets that are indexed via tabix.
"""
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
# Loop through lines and group by transcript_id; each group is a feature.
-
+
# TODO: extend this code or use code in gff_util to process GFF/3 as well
# and then create a generic GFFDataProvider that can be used with both
# raw and tabix datasets.
@@ -1369,7 +1367,7 @@
feature = []
features[ transcript_id ] = feature
feature.append( GFFInterval( None, line.split( '\t') ) )
-
+
# Process data.
filter_cols = from_json_string( kwargs.get( "filter_cols", "[]" ) )
no_detail = ( "no_detail" in kwargs )
@@ -1382,12 +1380,12 @@
if count-start_val >= max_vals:
message = self.error_max_vals % ( max_vals, "reads" )
break
-
- feature = GFFFeature( None, intervals=intervals )
+
+ feature = GFFFeature( None, intervals=intervals )
payload = package_gff_feature( feature, no_detail=no_detail, filter_cols=filter_cols )
payload.insert( 0, feature.intervals[ 0 ].attributes[ 'transcript_id' ] )
results.append( payload )
-
+
return { 'data': results, 'message': message }
#
@@ -1397,26 +1395,26 @@
class ENCODEPeakDataProvider( GenomeDataProvider ):
"""
Abstract class that processes ENCODEPeak data from native format to payload format.
-
+
Payload format: [ uid (offset), start, end, name, strand, thick_start, thick_end, blocks ]
"""
-
+
def get_iterator( self, chrom, start, end, **kwargs ):
raise "Unimplemented Method"
-
+
def process_data( self, iterator, start_val=0, max_vals=None, **kwargs ):
"""
Provides
"""
-
+
## FIXMEs:
# (1) should be able to unify some of this code with BedDataProvider.process_data
# (2) are optional number of parameters supported?
-
+
# Build data to return. Payload format is:
# [ <guid/offset>, <start>, <end>, <name>, <strand>, <thick_start>,
# <thick_end>, <blocks> ]
- #
+ #
# First three entries are mandatory, others are optional.
#
no_detail = ( "no_detail" in kwargs )
@@ -1431,16 +1429,16 @@
feature = line.split()
length = len( feature )
-
+
# Feature initialization.
payload = [
# GUID is just a hash of the line
hash( line ),
# Add start, end.
- int( feature[1] ),
+ int( feature[1] ),
int( feature[2] )
]
-
+
if no_detail:
rval.append( payload )
continue
@@ -1448,7 +1446,7 @@
# Extend with additional data.
payload.extend( [
# Add name, strand.
- feature[3],
+ feature[3],
feature[5],
# Thick start, end are feature start, end for now.
int( feature[1] ),
@@ -1465,12 +1463,12 @@
rval.append( payload )
return { 'data': rval, 'message': message }
-
+
class ENCODEPeakTabixDataProvider( TabixDataProvider, ENCODEPeakDataProvider ):
"""
Provides data from an ENCODEPeak dataset indexed via tabix.
"""
-
+
def get_filters( self ):
"""
Returns filters for dataset.
@@ -1478,26 +1476,26 @@
# HACK: first 8 fields are for drawing, so start filter column index at 9.
filter_col = 8
filters = []
- filters.append( { 'name': 'Score',
- 'type': 'number',
+ filters.append( { 'name': 'Score',
+ 'type': 'number',
'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c6' } )
filter_col += 1
- filters.append( { 'name': 'Signal Value',
- 'type': 'number',
+ filters.append( { 'name': 'Signal Value',
+ 'type': 'number',
'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c7' } )
filter_col += 1
- filters.append( { 'name': 'pValue',
- 'type': 'number',
+ filters.append( { 'name': 'pValue',
+ 'type': 'number',
'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c8' } )
filter_col += 1
- filters.append( { 'name': 'qValue',
- 'type': 'number',
+ filters.append( { 'name': 'qValue',
+ 'type': 'number',
'index': filter_col,
'tool_id': 'Filter1',
'tool_exp_name': 'c9' } )
@@ -1523,7 +1521,7 @@
feature = line.split()
length = len( feature )
-
+
s1 = int( feature[1] )
e1 = int( feature[2] )
c = feature[3]
@@ -1538,14 +1536,14 @@
# Add start1, end1, chr2, start2, end2, value.
s1, e1, c, s2, e2, v
]
-
+
rval.append( payload )
return { 'data': rval, 'message': message }
def get_default_max_vals( self ):
return 100000;
-
+
class ChromatinInteractionsTabixDataProvider( TabixDataProvider, ChromatinInteractionsDataProvider ):
def get_iterator( self, chrom, start=0, end=sys.maxint, interchromosomal=False, **kwargs ):
"""
@@ -1556,7 +1554,7 @@
def filter( iter ):
for line in iter:
feature = line.split()
- s1 = int( feature[1] )
+ s1 = int( feature[1] )
e1 = int( feature[2] )
c = feature[3]
s2 = int( feature[4] )
@@ -1568,22 +1566,22 @@
if interchromosomal and c != chrom:
yield line
return filter( TabixDataProvider.get_iterator( self, chrom, filter_start, end ) )
-
-#
+
+#
# -- Helper methods. --
#
def package_gff_feature( feature, no_detail=False, filter_cols=[] ):
""" Package a GFF feature in an array for data providers. """
feature = convert_gff_coords_to_bed( feature )
-
+
# No detail means only start, end.
if no_detail:
return [ feature.start, feature.end ]
-
+
# Return full feature.
- payload = [ feature.start,
- feature.end,
+ payload = [ feature.start,
+ feature.end,
feature.name(),
feature.strand,
# No notion of thick start, end in GFF, so make everything
@@ -1591,9 +1589,9 @@
feature.start,
feature.end
]
-
- # HACK: ignore interval with name 'transcript' from feature.
- # Cufflinks puts this interval in each of its transcripts,
+
+ # HACK: ignore interval with name 'transcript' from feature.
+ # Cufflinks puts this interval in each of its transcripts,
# and they mess up trackster by covering the feature's blocks.
# This interval will always be a feature's first interval,
# and the GFF's third column is its feature name.
@@ -1605,7 +1603,7 @@
block_starts = [ ( interval.start - feature.start ) for interval in feature_intervals ]
blocks = zip( block_sizes, block_starts )
payload.append( [ ( feature.start + block[1], feature.start + block[1] + block[0] ) for block in blocks ] )
-
+
# Add filter data to payload.
for col in filter_cols:
if col == "Score":
https://bitbucket.org/galaxy/galaxy-central/commits/d2a624fd6dc2/
Changeset: d2a624fd6dc2
User: dannon
Date: 2013-08-30 05:17:36
Summary: dataprovider dataset cleanup, add missing bx.bbi import
Affected #: 1 file
diff -r 4b86e65ee645caa6b5923b05e759a06a9f06113f -r d2a624fd6dc2fecdc319848f3d35c2f4b66a389e lib/galaxy/datatypes/dataproviders/dataset.py
--- a/lib/galaxy/datatypes/dataproviders/dataset.py
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -6,19 +6,18 @@
(e.g. parsing genomic regions from their source)
"""
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( 'bx-python' )
-from bx import seq as bx_seq
-from bx import wiggle as bx_wig
-
-import exceptions
import base
import line
import column
import external
+from galaxy import eggs
+eggs.require( 'bx-python' )
+from bx import seq as bx_seq
+from bx import wiggle as bx_wig
+from bx import bbi as bx_bbi
+
_TODO = """
use bx as much as possible
gff3 hierarchies
https://bitbucket.org/galaxy/galaxy-central/commits/068acf051f9a/
Changeset: 068acf051f9a
User: dannon
Date: 2013-08-30 05:20:05
Summary: Variable confusion in dataproviders/dataset -- clarify and use correct indices var
Affected #: 1 file
diff -r d2a624fd6dc2fecdc319848f3d35c2f4b66a389e -r 068acf051f9acfb8058f2bc50b0361d9a59d8cdb lib/galaxy/datatypes/dataproviders/dataset.py
--- a/lib/galaxy/datatypes/dataproviders/dataset.py
+++ b/lib/galaxy/datatypes/dataproviders/dataset.py
@@ -145,10 +145,10 @@
:returns: list of column indeces for the named columns.
"""
region_column_names = ( 'chromCol', 'startCol', 'endCol' )
- region_indeces = [ self.get_metadata_column_index_by_name( name ) for name in region_column_names ]
- if check and not all( map( lambda i: i != None, indeces ) ):
- raise ValueError( "Could not determine proper column indeces for chrom, start, end: %s" %( str( indeces ) ) )
- return region_indeces
+ region_indices = [ self.get_metadata_column_index_by_name( name ) for name in region_column_names ]
+ if check and not all( map( lambda i: i != None, region_indices) ):
+ raise ValueError( "Could not determine proper column indices for chrom, start, end: %s" %( str( region_indices ) ) )
+ return region_indices
class ConvertedDatasetDataProvider( DatasetDataProvider ):
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.