commit/galaxy-central: 8 new changesets
8 new commits in galaxy-central: https://bitbucket.org/galaxy/galaxy-central/commits/d969c2604171/ Changeset: d969c2604171 Branch: search User: kellrott Date: 2013-06-07 18:56:47 Summary: Central Merge Affected #: 559 files diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 .hgignore --- a/.hgignore +++ b/.hgignore @@ -20,6 +20,7 @@ database/pbs database/tmp database/*.sqlite +database/openid_consumer_cache # Python bytecode *.pyc @@ -35,6 +36,11 @@ tool_shed_webapp.pid hgweb.config* +# Reports Runtime Files +reports_webapp.lock +reports_webapp.log +reports_webapp.pid + # Config files universe_wsgi.ini reports_wsgi.ini @@ -54,7 +60,7 @@ job_conf.xml data_manager_conf.xml shed_data_manager_conf.xml - +visualizations_conf.xml static/welcome.html.* static/welcome.html @@ -75,6 +81,7 @@ # Test output run_functional_tests.html +test/tool_shed/tmp/* # Project files *.kpf diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 .hgtags --- a/.hgtags +++ b/.hgtags @@ -1,3 +1,5 @@ a4113cc1cb5eaa68091c9a73375f00555b66dd11 release_2013.01.13 1c717491139269651bb59687563da9410b84c65d release_2013.02.08 75f09617abaadbc8cc732bb8ee519decaeb56ea7 release_2013.04.01 +2cc8d10988e03257dc7b97f8bb332c7df745d1dd security_2013.04.08 +524f246ca85395082719ae7a6ff72260d7ad5612 release_2013.06.03 diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 buildbot_setup.sh --- a/buildbot_setup.sh +++ b/buildbot_setup.sh @@ -93,26 +93,49 @@ JARS="/galaxy/software/jars" -for link in $LINKS; do - echo "Linking $link" - rm -f tool-data/`basename $link` - ln -sf $link tool-data -done - -if [ -d "$HYPHY" ]; then - echo "Linking $HYPHY" - rm -f tool-data/HYPHY - ln -sf $HYPHY tool-data/HYPHY +if [ ! $1 ]; then + type="standard" +elif [ $1 == "-ec2" ]; then + type="external-ec2" +else + type="unknown" fi -if [ -d "$JARS" ]; then - echo "Linking $JARS" - rm -f tool-data/shared/jars - ln -sf $JARS tool-data/shared/jars -fi +case $type in + external*) + echo "Running standalone buildbot setup..." + for sample in tool-data/*.sample; do + basename=${sample%.sample} + if [ ! -f $basename ]; then + echo "Copying $sample to $basename" + cp "$sample" "$basename" + fi + done + ;; + *) + echo "Running standard buildbot setup..." + for link in $LINKS; do + echo "Linking $link" + rm -f tool-data/`basename $link` + ln -sf $link tool-data + done + + if [ -d "$HYPHY" ]; then + echo "Linking $HYPHY" + rm -f tool-data/HYPHY + ln -sf $HYPHY tool-data/HYPHY + fi + + if [ -d "$JARS" ]; then + echo "Linking $JARS" + rm -f tool-data/shared/jars + ln -sf $JARS tool-data/shared/jars + fi + ;; +esac for sample in $SAMPLES; do - file=`echo $sample | sed -e 's/\.sample$//'` + file=${sample%.sample} echo "Copying $sample to $file" cp $sample $file done diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 datatypes_conf.xml.sample --- a/datatypes_conf.xml.sample +++ b/datatypes_conf.xml.sample @@ -9,12 +9,7 @@ <datatype extension="fli" type="galaxy.datatypes.tabular:FeatureLocationIndex" display_in_upload="false"/><datatype extension="bam" type="galaxy.datatypes.binary:Bam" mimetype="application/octet-stream" display_in_upload="true"><converter file="bam_to_bai.xml" target_datatype="bai"/> - <converter file="bam_to_summary_tree_converter.xml" target_datatype="summary_tree"/> - <!-- - Caution: (a) this converter requires bedtools to be installed and (b) it is very memory intensive and - is not recommended for most laptops/desktops. - <converter file="bam_to_bigwig_converter.xml" target_datatype="bigwig"/> - --> + <converter file="bam_to_bigwig_converter.xml" target_datatype="bigwig"/><display file="ucsc/bam.xml" /><display file="ensembl/ensembl_bam.xml" /><display file="igv/bam.xml" /> @@ -22,10 +17,9 @@ </datatype><datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true"><converter file="bed_to_gff_converter.xml" target_datatype="gff"/> - <converter file="interval_to_coverage.xml" target_datatype="coverage"/><converter file="bed_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="bed_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> - <converter file="bed_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><converter file="bed_to_fli_converter.xml" target_datatype="fli"/><!-- <display file="ucsc/interval_as_bed.xml" /> --><display file="igb/bed.xml" /> @@ -51,7 +45,7 @@ <datatype extension="chrint" type="galaxy.datatypes.interval:ChromatinInteractions" display_in_upload="True"><converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> - <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><!-- MSI added Datatypes --><datatype extension="csv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="true" /><!-- FIXME: csv is 'tabular'ized data, but not 'tab-delimited'; the class used here is intended for 'tab-delimited' --> @@ -93,7 +87,7 @@ <datatype extension="gff" type="galaxy.datatypes.interval:Gff" display_in_upload="true"><converter file="gff_to_bed_converter.xml" target_datatype="bed"/><converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/> - <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><converter file="gff_to_fli_converter.xml" target_datatype="fli"/><display file="ensembl/ensembl_gff.xml" inherit="True"/><!-- <display file="gbrowse/gbrowse_gff.xml" inherit="True" /> --> @@ -103,7 +97,7 @@ <datatype extension="gmaj.zip" type="galaxy.datatypes.images:Gmaj" mimetype="application/zip"/><datatype extension="gtf" type="galaxy.datatypes.interval:Gtf" display_in_upload="true"><converter file="gff_to_interval_index_converter.xml" target_datatype="interval_index"/> - <converter file="gff_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="toolshed.gz" type="galaxy.datatypes.binary:Binary" mimetype="multipart/x-gzip" subclass="True" /><datatype extension="h5" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" subclass="True" /> @@ -115,7 +109,7 @@ <converter file="interval_to_bed12_converter.xml" target_datatype="bed12"/><converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> - <converter file="interval_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="interval_to_bigwig_converter.xml" target_datatype="bigwig"/><!-- <display file="ucsc/interval_as_bed.xml" inherit="True" /> --><display file="ensembl/ensembl_interval_as_bed.xml" inherit="True"/><display file="gbrowse/gbrowse_interval_as_bed.xml" inherit="True"/> @@ -156,10 +150,13 @@ <datatype extension="encodepeak" type="galaxy.datatypes.interval:ENCODEPeak" display_in_upload="True"><converter file="encodepeak_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/><converter file="encodepeak_to_bgzip_converter.xml" target_datatype="bgzip"/> - <converter file="encodepeak_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="pdf" type="galaxy.datatypes.images:Pdf" mimetype="application/pdf"/> - <datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true" /> + <datatype extension="pileup" type="galaxy.datatypes.tabular:Pileup" display_in_upload="true"> + <converter file="interval_to_bgzip_converter.xml" target_datatype="bgzip"/> + <converter file="interval_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> + </datatype><datatype extension="png" type="galaxy.datatypes.images:Png" mimetype="image/png"/><datatype extension="qual" type="galaxy.datatypes.qualityscore:QualityScore" /><datatype extension="qualsolexa" type="galaxy.datatypes.qualityscore:QualityScoreSolexa" display_in_upload="true"/> @@ -169,7 +166,7 @@ <datatype extension="Roadmaps" type="galaxy.datatypes.assembly:Roadmaps" display_in_upload="false"/><datatype extension="sam" type="galaxy.datatypes.tabular:Sam" display_in_upload="true"><converter file="sam_to_bam.xml" target_datatype="bam"/> - <converter file="sam_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="sam_to_bigwig_converter.xml" target_datatype="bigwig"/></datatype><datatype extension="scf" type="galaxy.datatypes.binary:Scf" mimetype="application/octet-stream" display_in_upload="true"/><datatype extension="Sequences" type="galaxy.datatypes.assembly:Sequences" display_in_upload="false"/> @@ -187,7 +184,7 @@ <converter file="vcf_to_bgzip_converter.xml" target_datatype="bgzip"/><converter file="vcf_to_vcf_bgzip_converter.xml" target_datatype="vcf_bgzip"/><converter file="vcf_to_tabix_converter.xml" target_datatype="tabix" depends_on="bgzip"/> - <converter file="vcf_to_summary_tree_converter.xml" target_datatype="summary_tree"/> + <converter file="bed_gff_or_vcf_to_bigwig_converter.xml" target_datatype="bigwig"/><display file="ucsc/vcf.xml" /><display file="igv/vcf.xml" /><display file="rviewer/vcf.xml" inherit="True"/> @@ -200,7 +197,6 @@ <!-- <display file="gbrowse/gbrowse_wig.xml" /> --><display file="igb/wig.xml" /></datatype> - <datatype extension="summary_tree" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="interval_index" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="tabix" type="galaxy.datatypes.binary:Binary" subclass="True" /><datatype extension="bgzip" type="galaxy.datatypes.binary:Binary" subclass="True" /> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 eggs.ini --- a/eggs.ini +++ b/eggs.ini @@ -14,7 +14,6 @@ [eggs:platform] bx_python = 0.7.1 Cheetah = 2.2.2 -ctypes = 1.0.2 DRMAA_python = 0.2 MarkupSafe = 0.12 mercurial = 2.2.3 @@ -29,6 +28,7 @@ simplejson = 2.1.1 threadframe = 0.2 guppy = 0.1.8 +SQLAlchemy = 0.7.9 ; msgpack_python = 0.2.4 [eggs:noplatform] @@ -46,17 +46,17 @@ nose = 0.11.1 NoseHTML = 0.4.1 NoseTestDiff = 0.1 +Parsley = 1.1 Paste = 1.7.5.1 PasteDeploy = 1.5.0 pexpect = 2.4 python_openid = 2.2.5 python_daemon = 1.5.5 Routes = 1.12.3 -SQLAlchemy = 0.5.6 -sqlalchemy_migrate = 0.5.4 +sqlalchemy_migrate = 0.7.2 ssh = 1.7.14 SVGFig = 1.1.6 -Tempita = 0.1 +Tempita = 0.5.1 twill = 0.9 WebError = 0.8a WebHelpers = 0.2 @@ -75,7 +75,6 @@ MySQL_python = _5.1.41_static bx_python = _7b95ff194725 GeneTrack = _dev_48da9e998f0caf01c5be731e926f4b0481f658f0 -SQLAlchemy = _dev_r6498 pysam = _kanwei_b10f6e722e9a ; dependency source urls, necessary for scrambling. for an explanation, see diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 job_conf.xml.sample_advanced --- a/job_conf.xml.sample_advanced +++ b/job_conf.xml.sample_advanced @@ -7,7 +7,7 @@ <plugin id="local" type="runner" load="galaxy.jobs.runners.local:LocalJobRunner"/><plugin id="pbs" type="runner" load="galaxy.jobs.runners.pbs:PBSJobRunner" workers="2"/><plugin id="drmaa" type="runner" load="galaxy.jobs.runners.drmaa:DRMAAJobRunner"/> - <plugin id="lwr" type="runner" load="galaxy.jobs.runners.lwr.LwrJobRunner" /><!-- https://lwr.readthedocs.org --> + <plugin id="lwr" type="runner" load="galaxy.jobs.runners.lwr:LwrJobRunner" /><!-- https://lwr.readthedocs.org --><plugin id="cli" type="runner" load="galaxy.jobs.runners.cli:ShellJobRunner" /><plugin id="condor" type="runner" load="galaxy.jobs.runners.condor:CondorJobRunner" /></plugins> @@ -40,7 +40,6 @@ </destination><destination id="dynamic" runner="dynamic"><!-- A destination that represents a method in the dynamic runner. --> - <param id="type">python</param><param id="function">foo</param></destination><destination id="secure_lwr" runner="lwr"> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/fpconst.py --- a/lib/fpconst.py +++ /dev/null @@ -1,163 +0,0 @@ -"""Utilities for handling IEEE 754 floating point special values - -This python module implements constants and functions for working with -IEEE754 double-precision special values. It provides constants for -Not-a-Number (NaN), Positive Infinity (PosInf), and Negative Infinity -(NegInf), as well as functions to test for these values. - -The code is implemented in pure python by taking advantage of the -'struct' standard module. Care has been taken to generate proper -results on both big-endian and little-endian machines. Some efficiency -could be gained by translating the core routines into C. - -See <http://babbage.cs.qc.edu/courses/cs341/IEEE-754references.html> -for reference material on the IEEE 754 floating point standard. - -Further information on this package is available at -<http://www.analytics.washington.edu/statcomp/projects/rzope/fpconst/>. - -Author: Gregory R. Warnes <gregory_r_warnes@groton.pfizer.com> -Date:: 2003-04-08 -Copyright: (c) 2003, Pfizer, Inc. -""" - -__version__ = "0.7.0" -ident = "$Id: fpconst.py,v 1.12 2004/05/22 04:38:17 warnes Exp $" - -import struct, operator - -# check endianess -_big_endian = struct.pack('i',1)[0] != '\x01' - -# and define appropriate constants -if(_big_endian): - NaN = struct.unpack('d', '\x7F\xF8\x00\x00\x00\x00\x00\x00')[0] - PosInf = struct.unpack('d', '\x7F\xF0\x00\x00\x00\x00\x00\x00')[0] - NegInf = -PosInf -else: - NaN = struct.unpack('d', '\x00\x00\x00\x00\x00\x00\xf8\xff')[0] - PosInf = struct.unpack('d', '\x00\x00\x00\x00\x00\x00\xf0\x7f')[0] - NegInf = -PosInf - -def _double_as_bytes(dval): - "Use struct.unpack to decode a double precision float into eight bytes" - tmp = list(struct.unpack('8B',struct.pack('d', dval))) - if not _big_endian: - tmp.reverse() - return tmp - -## -## Functions to extract components of the IEEE 754 floating point format -## - -def _sign(dval): - "Extract the sign bit from a double-precision floating point value" - bb = _double_as_bytes(dval) - return bb[0] >> 7 & 0x01 - -def _exponent(dval): - """Extract the exponentent bits from a double-precision floating - point value. - - Note that for normalized values, the exponent bits have an offset - of 1023. As a consequence, the actual exponentent is obtained - by subtracting 1023 from the value returned by this function - """ - bb = _double_as_bytes(dval) - return (bb[0] << 4 | bb[1] >> 4) & 0x7ff - -def _mantissa(dval): - """Extract the _mantissa bits from a double-precision floating - point value.""" - - bb = _double_as_bytes(dval) - mantissa = bb[1] & 0x0f << 48 - mantissa += bb[2] << 40 - mantissa += bb[3] << 32 - mantissa += bb[4] - return mantissa - -def _zero_mantissa(dval): - """Determine whether the mantissa bits of the given double are all - zero.""" - bb = _double_as_bytes(dval) - return ((bb[1] & 0x0f) | reduce(operator.or_, bb[2:])) == 0 - -## -## Functions to test for IEEE 754 special values -## - -def isNaN(value): - "Determine if the argument is a IEEE 754 NaN (Not a Number) value." - return (_exponent(value)==0x7ff and not _zero_mantissa(value)) - -def isInf(value): - """Determine if the argument is an infinite IEEE 754 value (positive - or negative inifinity)""" - return (_exponent(value)==0x7ff and _zero_mantissa(value)) - -def isFinite(value): - """Determine if the argument is an finite IEEE 754 value (i.e., is - not NaN, positive or negative inifinity)""" - return (_exponent(value)!=0x7ff) - -def isPosInf(value): - "Determine if the argument is a IEEE 754 positive infinity value" - return (_sign(value)==0 and _exponent(value)==0x7ff and \ - _zero_mantissa(value)) - -def isNegInf(value): - "Determine if the argument is a IEEE 754 negative infinity value" - return (_sign(value)==1 and _exponent(value)==0x7ff and \ - _zero_mantissa(value)) - -## -## Functions to test public functions. -## - -def test_isNaN(): - assert( not isNaN(PosInf) ) - assert( not isNaN(NegInf) ) - assert( isNaN(NaN ) ) - assert( not isNaN( 1.0) ) - assert( not isNaN( -1.0) ) - -def test_isInf(): - assert( isInf(PosInf) ) - assert( isInf(NegInf) ) - assert( not isInf(NaN ) ) - assert( not isInf( 1.0) ) - assert( not isInf( -1.0) ) - -def test_isFinite(): - assert( not isFinite(PosInf) ) - assert( not isFinite(NegInf) ) - assert( not isFinite(NaN ) ) - assert( isFinite( 1.0) ) - assert( isFinite( -1.0) ) - -def test_isPosInf(): - assert( isPosInf(PosInf) ) - assert( not isPosInf(NegInf) ) - assert( not isPosInf(NaN ) ) - assert( not isPosInf( 1.0) ) - assert( not isPosInf( -1.0) ) - -def test_isNegInf(): - assert( not isNegInf(PosInf) ) - assert( isNegInf(NegInf) ) - assert( not isNegInf(NaN ) ) - assert( not isNegInf( 1.0) ) - assert( not isNegInf( -1.0) ) - -# overall test -def test(): - test_isNaN() - test_isInf() - test_isFinite() - test_isPosInf() - test_isNegInf() - -if __name__ == "__main__": - test() - diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/__init__.py --- a/lib/galaxy/__init__.py +++ b/lib/galaxy/__init__.py @@ -95,10 +95,15 @@ pkg_resources.Distribution._insert_on = pkg_resources.Distribution.insert_on pkg_resources.Distribution.insert_on = _insert_on -# patch to add the NullHandler class to logging -if sys.version_info[:2] < ( 2, 7 ): - import logging +# compat: BadZipFile introduced in Python 2.7 +import zipfile +if not hasattr( zipfile, 'BadZipFile' ): + zipfile.BadZipFile = zipfile.error + +# compat: patch to add the NullHandler class to logging +import logging +if not hasattr( logging, 'NullHandler' ): class NullHandler( logging.Handler ): def emit( self, record ): pass - logging.NullHandler = NullHandler + logging.NullHandler = NullHandler \ No newline at end of file diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -15,6 +15,7 @@ from galaxy.tags.tag_handler import GalaxyTagHandler from galaxy.visualization.genomes import Genomes from galaxy.visualization.data_providers.registry import DataProviderRegistry +from galaxy.visualization.registry import VisualizationsRegistry from galaxy.tools.imp_exp import load_history_imp_exp_tools from galaxy.tools.genome_index import load_genome_index_tools from galaxy.sample_tracking import external_service_types @@ -61,7 +62,8 @@ self.config.database_engine_options, database_query_profiling_proxy = self.config.database_query_profiling_proxy, object_store = self.object_store, - trace_logger=self.trace_logger ) + trace_logger=self.trace_logger, + use_pbkdf2=self.config.get_bool( 'use_pbkdf2', True ) ) # Manage installed tool shed repositories. self.installed_repository_manager = tool_shed.galaxy_install.InstalledRepositoryManager( self ) # Create an empty datatypes registry. @@ -90,7 +92,7 @@ # Load additional entries defined by self.config.shed_tool_data_table_config into tool data tables. self.tool_data_tables.load_from_config_file( config_filename=self.config.shed_tool_data_table_config, tool_data_path=self.tool_data_tables.tool_data_path, - from_shed_config=True ) + from_shed_config=False ) # Initialize the job management configuration self.job_config = jobs.JobConfiguration(self) # Initialize the tools, making sure the list of tool configs includes the reserved migrated_tools_conf.xml file. @@ -120,6 +122,9 @@ load_history_imp_exp_tools( self.toolbox ) # Load genome indexer tool. load_genome_index_tools( self.toolbox ) + # visualizations registry: associates resources with visualizations, controls how to render + self.visualizations_registry = ( VisualizationsRegistry( self.config.root, self.config.visualizations_conf_path ) + if self.config.visualizations_conf_path else None ) # Load security policy. self.security_agent = self.model.security_agent self.host_security_agent = galaxy.security.HostAgent( model=self.security_agent.model, permitted_actions=self.security_agent.permitted_actions ) diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -65,6 +65,11 @@ else: tcf = 'tool_conf.xml' self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ] + self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None ) + if self.shed_tool_data_path: + self.shed_tool_data_path = resolve_path( self.shed_tool_data_path, self.root ) + else: + self.shed_tool_data_path = self.tool_data_path self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root ) self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root ) self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) ) @@ -86,7 +91,6 @@ self.galaxy_data_manager_data_path = kwargs.get( 'galaxy_data_manager_data_path', self.tool_data_path ) self.tool_secret = kwargs.get( "tool_secret", "" ) self.id_secret = kwargs.get( "id_secret", "USING THE DEFAULT IS NOT SECURE!" ) - self.set_metadata_externally = string_as_bool( kwargs.get( "set_metadata_externally", "False" ) ) self.retry_metadata_internally = string_as_bool( kwargs.get( "retry_metadata_internally", "True" ) ) self.use_remote_user = string_as_bool( kwargs.get( "use_remote_user", "False" ) ) self.remote_user_maildomain = kwargs.get( "remote_user_maildomain", None ) @@ -155,6 +159,10 @@ self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",") self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",") self.brand = kwargs.get( 'brand', None ) + # Configuration for the message box directly below the masthead. + self.message_box_visible = kwargs.get( 'message_box_visible', False ) + self.message_box_content = kwargs.get( 'message_box_content', None ) + self.message_box_class = kwargs.get( 'message_box_class', 'info' ) self.support_url = kwargs.get( 'support_url', 'http://wiki.g2.bx.psu.edu/Support' ) self.wiki_url = kwargs.get( 'wiki_url', 'http://g2.trac.bx.psu.edu/' ) self.blog_url = kwargs.get( 'blog_url', None ) @@ -166,6 +174,7 @@ self.enable_whoosh_library_search = string_as_bool( kwargs.get( 'enable_whoosh_library_search', False ) ) self.whoosh_index_dir = resolve_path( kwargs.get( "whoosh_index_dir", "database/whoosh_indexes" ), self.root ) self.ftp_upload_dir = kwargs.get( 'ftp_upload_dir', None ) + self.ftp_upload_dir_identifier = kwargs.get( 'ftp_upload_dir_identifier', 'email' ) # attribute on user - email, username, id, etc... self.ftp_upload_site = kwargs.get( 'ftp_upload_site', None ) self.allow_library_path_paste = kwargs.get( 'allow_library_path_paste', False ) self.disable_library_comptypes = kwargs.get( 'disable_library_comptypes', '' ).lower().split( ',' ) @@ -271,6 +280,8 @@ self.fluent_log = string_as_bool( kwargs.get( 'fluent_log', False ) ) self.fluent_host = kwargs.get( 'fluent_host', 'localhost' ) self.fluent_port = int( kwargs.get( 'fluent_port', 24224 ) ) + # visualizations registry config path + self.visualizations_conf_path = kwargs.get( 'visualizations_conf_path', None ) @property def sentry_dsn_public( self ): diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/assembly.py --- a/lib/galaxy/datatypes/assembly.py +++ b/lib/galaxy/datatypes/assembly.py @@ -5,14 +5,14 @@ """ import data +import logging +import os +import re +import sys from galaxy.datatypes import sequence -import logging, os, sys, time, tempfile, shutil, string, glob, re -import galaxy.model -from galaxy.datatypes import metadata +from galaxy.datatypes.images import Html from galaxy.datatypes.metadata import MetadataElement -from galaxy import util -from galaxy.datatypes.images import Html -from sniff import * + log = logging.getLogger(__name__) @@ -174,7 +174,6 @@ gen_msg = '' try: efp = dataset.extra_files_path - flist = os.listdir(efp) log_path = os.path.join(efp,'Log') f = open(log_path,'r') log_content = f.read(1000) @@ -223,5 +222,5 @@ self.regenerate_primary_file(dataset) if __name__ == '__main__': - import doctest, sys + import doctest doctest.testmod(sys.modules[__name__]) diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -2,18 +2,26 @@ Binary classes """ -import data, logging, binascii +import binascii +import data +import gzip +import logging +import os +import shutil +import struct +import subprocess +import tempfile +import zipfile + +from urllib import urlencode, quote_plus +from galaxy import eggs +eggs.require( "bx-python" ) + +from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE + from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * -from galaxy import eggs -import pkg_resources -pkg_resources.require( "bx-python" ) -from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE -from urllib import urlencode, quote_plus -import zipfile, gzip -import os, subprocess, tempfile -import struct log = logging.getLogger(__name__) @@ -94,6 +102,9 @@ class Bam( Binary ): """Class describing a BAM binary file""" file_ext = "bam" + track_type = "ReadTrack" + data_sources = { "data": "bai", "index": "bigwig" } + MetadataElement( name="bam_index", desc="BAM Index File", param=metadata.FileParameter, file_ext="bai", readonly=True, no_value=None, visible=False, optional=True ) def _get_samtools_version( self ): @@ -244,9 +255,7 @@ return dataset.peek except: return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) ) - def get_track_type( self ): - return "ReadTrack", { "data": "bai", "index": [ "bigwig", "summary_tree" ] } - + Binary.register_sniffable_binary_format("bam", "bam", Bam) class H5( Binary ): @@ -324,6 +333,9 @@ The supplemental info in the paper has the binary details: http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btq351v1 """ + track_type = "LineTrack" + data_sources = { "data_standalone": "bigwig" } + def __init__( self, **kwd ): Binary.__init__( self, **kwd ) self._magic = 0x888FFC26 @@ -348,19 +360,18 @@ return dataset.peek except: return "Binary UCSC %s file (%s)" % ( self._name, data.nice_size( dataset.get_size() ) ) - def get_track_type( self ): - return "LineTrack", {"data_standalone": "bigwig"} - + Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig) class BigBed(BigWig): """BigBed support from UCSC.""" + + data_sources = { "data_standalone": "bigbed" } + def __init__( self, **kwd ): Binary.__init__( self, **kwd ) self._magic = 0x8789F2EB self._name = "BigBed" - def get_track_type( self ): - return "LineTrack", {"data_standalone": "bigbed"} Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed) diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/checkers.py --- a/lib/galaxy/datatypes/checkers.py +++ b/lib/galaxy/datatypes/checkers.py @@ -58,7 +58,7 @@ for chars in temp: for char in chars: chars_read += 1 - if ord( char ) > 128: + if util.is_binary( char ): is_binary = True break if chars_read > 100: diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/chrominfo.py --- a/lib/galaxy/datatypes/chrominfo.py +++ b/lib/galaxy/datatypes/chrominfo.py @@ -1,7 +1,3 @@ -import data -from galaxy import util -from galaxy.datatypes.sniff import * -from galaxy.web import url_for from tabular import Tabular from galaxy.datatypes import metadata from galaxy.datatypes.metadata import MetadataElement diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml --- a/lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml +++ b/lib/galaxy/datatypes/converters/bam_to_bigwig_converter.xml @@ -1,7 +1,14 @@ <tool id="CONVERTER_bam_to_bigwig_0" name="Convert BAM to BigWig" version="1.0.0" hidden="true"><!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --><command> - bedtools genomecov -bg -split -ibam $input -g $chromInfo | wigToBigWig stdin $chromInfo $output + bedtools genomecov -bg -split -ibam $input -g $chromInfo + + ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this + ## should only be used on systems with large RAM. + ## | wigToBigWig stdin $chromInfo $output + + ## This can be used anywhere. + > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output </command><inputs><param format="bam" name="input" type="data" label="Choose BAM file"/> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/bam_to_summary_tree_converter.xml +++ /dev/null @@ -1,14 +0,0 @@ -<tool id="CONVERTER_bam_to_summary_tree_0" name="Convert BAM to Summary Tree" version="1.0.0" hidden="true"> - <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python"> - sam_or_bam_to_summary_tree_converter.py --bam $input1 $input1.metadata.bam_index $output1 - </command> - <inputs> - <param format="bam" name="input1" type="data" label="Choose BAM file"/> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bed_gff_or_vcf_to_bigwig_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/bed_gff_or_vcf_to_bigwig_converter.xml @@ -0,0 +1,25 @@ +<tool id="CONVERTER_bed_gff_or_vcf_to_bigwig_0" name="Convert BED, GFF, or VCF to BigWig" version="1.0.0" hidden="true"> + <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command> + ## Remove comments and sort by chromosome. + grep -v '^#' $input | sort -k1,1 | + + ## Generate coverage bedgraph. + bedtools genomecov -bg -split -i stdin -g $chromInfo + + ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this + ## should only be used on systems with large RAM. + ## | wigToBigWig stdin $chromInfo $output + + ## This can be used anywhere. + > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output + </command> + <inputs> + <param format="bed,gff,vcf" name="input" type="data" label="Choose input file"/> + </inputs> + <outputs> + <data format="bigwig" name="output"/> + </outputs> + <help> + </help> +</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/bed_to_summary_tree_converter.xml +++ /dev/null @@ -1,14 +0,0 @@ -<tool id="CONVERTER_bed_to_summary_tree_0" name="Convert BED to Summary Tree" version="1.0.0" hidden="true"> -<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1</command> - <inputs> - <page> - <param format="bed" name="input1" type="data" label="Choose BED file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/encodepeak_to_summary_tree_converter.xml +++ /dev/null @@ -1,20 +0,0 @@ -<tool id="CONVERTER_encodepeak_to_summary_tree_0" name="Convert ENCODEPeak to Summary Tree" version="1.0.0" hidden="true"> -<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">interval_to_summary_tree_converter.py - -c ${input1.metadata.chromCol} - -s ${input1.metadata.startCol} - -e ${input1.metadata.endCol} - $input1 $output1 - </command> - - <inputs> - <page> - <param format="ENCODEPeak" name="input1" type="data" label="Choose ENCODEPeak file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/gff_to_summary_tree_converter.xml +++ /dev/null @@ -1,14 +0,0 @@ -<tool id="CONVERTER_gff_to_summary_tree_0" name="Convert GFF to Summary Tree" version="1.0.0" hidden="true"> -<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">interval_to_summary_tree_converter.py $input1 $output1 --gff</command> - <inputs> - <page> - <param format="gff" name="input1" type="data" label="Choose GFF file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_bigwig_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/interval_to_bigwig_converter.xml @@ -0,0 +1,33 @@ +<tool id="CONVERTER_interval_to_bigwig_0" name="Convert Genomic Intervals To Coverage"> + <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <!-- Used on the metadata edit page. --> + <command> + + ## Remove comments and sort by chromosome. + grep -v '^#' $input1 | sort -k${input1.metadata.chromCol},${input1.metadata.chromCol} | + + ## Create simple BED by cutting chrom, start, and end columns. + awk -v OFS=' ' '{print $${input1.metadata.chromCol},$${input1.metadata.startCol},$${input1.metadata.endCol} }' | + + ## Generate coverage bedgraph. + bedtools genomecov -bg -split -i stdin -g $chromInfo + + ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this + ## should only be used on systems with large RAM. + ## | wigToBigWig stdin $chromInfo $output + + ## This can be used anywhere. + > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output + + </command> + <inputs> + <page> + <param format="interval" name="input1" type="data" label="Choose intervals"/> + </page> + </inputs> + <outputs> + <data format="bigwig" name="output"/> + </outputs> + <help> + </help> +</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -""" -Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals). - -usage: %prog <options> in_file out_file - -c, --chr-col: chromosome column, default=1 - -s, --start-col: start column, default=2 - -e, --end-col: end column, default=3 - -t, --strand-col: strand column, default=6 - -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval -""" -from __future__ import division - -import sys, fileinput, optparse -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from galaxy.visualization.tracks.summary import * -from bx.intervals.io import * -from galaxy.datatypes.util.gff_util import * - -def main(): - # Read options, args. - parser = optparse.OptionParser() - parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 ) - parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 ) - parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 ) - parser.add_option( '-t', '--strand-col', type='int', dest='strand_col', default=6 ) - parser.add_option( '-G', '--gff', dest="gff_format", action="store_true" ) - (options, args) = parser.parse_args() - input_fname, output_fname = args - - # Convert column indices to 0-based. - options.chrom_col -= 1 - options.start_col -= 1 - options.end_col -= 1 - options.strand_col -= 1 - - # Do conversion. - if options.gff_format: - reader_wrapper_class = GFFReaderWrapper - chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 ) - else: - reader_wrapper_class = NiceReaderWrapper - chr_col, start_col, end_col, strand_col = ( options.chrom_col, options.start_col, options.end_col, options.strand_col ) - reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ), - chrom_col=chr_col, - start_col=start_col, - end_col=end_col, - strand_col=strand_col, - fix_strand=True ) - st = SummaryTree() - for feature in list( reader_wrapper ): - if isinstance( feature, GenomicInterval ): - # Tree expects BED coordinates. - if type( feature ) is GFFFeature: - convert_gff_coords_to_bed( feature ) - st.insert_range( feature.chrom, long( feature.start ), long( feature.end ) ) - - st.write( output_fname ) - -if __name__ == "__main__": - main() diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.xml +++ /dev/null @@ -1,20 +0,0 @@ -<tool id="CONVERTER_interval_to_summary_tree_0" name="Convert Interval to Summary Tree" version="1.0.0" hidden="true"> -<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">interval_to_summary_tree_converter.py - -c ${input1.metadata.chromCol} - -s ${input1.metadata.startCol} - -e ${input1.metadata.endCol} - $input1 $output1 - </command> - - <inputs> - <page> - <param format="interval" name="input1" type="data" label="Choose Interval file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py --- /dev/null +++ b/lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +Convert from pileup file to interval index file. + +usage: %prog <options> in_file out_file +""" + +from __future__ import division + +import sys, fileinput, optparse +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from galaxy.visualization.tracks.summary import * +from galaxy.datatypes.util.gff_util import convert_gff_coords_to_bed +from bx.interval_index_file import Indexes + +def main(): + + # Read options, args. + parser = optparse.OptionParser() + (options, args) = parser.parse_args() + input_fname, output_fname = args + + # Do conversion. + index = Indexes() + offset = 0 + for line in open( input_fname, "r" ): + chrom, start = line.split()[ 0:2 ] + # Pileup format is 1-based. + start = int( start ) - 1 + index.add( chrom, start, start + 1, offset ) + offset += len( line ) + + index.write( open(output_fname, "w") ) + +if __name__ == "__main__": + main() + \ No newline at end of file diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.xml @@ -0,0 +1,15 @@ +<tool id="CONVERTER_pileup_to_interval_index_0" name="Convert Pileup to Interval Index" version="1.0.0" hidden="true"> +<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> + <command interpreter="python">pileup_to_interval_index_converter.py $input $output + </command> + <inputs> + <page> + <param format="pileup" name="input" type="data" label="Choose Pileup file"/> + </page> + </inputs> + <outputs> + <data format="interval_index" name="output"/> + </outputs> + <help> + </help> +</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/sam_or_bam_to_summary_tree_converter.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division - -import sys, os, optparse -sys.stderr = open(os.devnull, 'w') # suppress stderr as cython produces warning on some systems: - # csamtools.so:6: RuntimeWarning: __builtin__.file size changed - -from galaxy import eggs -import pkg_resources - -if sys.version_info[:2] == (2, 4): - pkg_resources.require( "ctypes" ) -pkg_resources.require( "pysam" ) - -from pysam import csamtools -from galaxy.visualization.tracks.summary import * - -def main(): - parser = optparse.OptionParser() - parser.add_option( '-S', '--sam', action="store_true", dest="is_sam" ) - parser.add_option( '-B', '--bam', action="store_true", dest="is_bam" ) - options, args = parser.parse_args() - - if options.is_bam: - input_fname = args[0] - index_fname = args[1] - out_fname = args[2] - samfile = csamtools.Samfile( filename=input_fname, mode='rb', index_filename=index_fname ) - elif options.is_sam: - input_fname = args[0] - out_fname = args[1] - samfile = csamtools.Samfile( filename=input_fname, mode='r' ) - - st = SummaryTree() - for read in samfile.fetch(): - st.insert_range( samfile.getrname( read.rname ), read.pos, read.pos + read.rlen ) - - st.write(out_fname) - -if __name__ == "__main__": - main() diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_to_bigwig_converter.xml --- /dev/null +++ b/lib/galaxy/datatypes/converters/sam_to_bigwig_converter.xml @@ -0,0 +1,20 @@ +<tool id="CONVERTER_sam_to_bigwig_0" name="Convert SAM to BigWig" version="1.0.0" hidden="true"> + <command> + samtools view -bh $input | bedtools genomecov -bg -split -ibam stdin -g $chromInfo + + ## Streaming the bedgraph file to wigToBigWig is fast but very memory intensive; hence, this + ## should only be used on systems with large RAM. + ## | wigToBigWig stdin $chromInfo $output + + ## This can be used anywhere. + > temp.bg ; bedGraphToBigWig temp.bg $chromInfo $output + </command> + <inputs> + <param format="bam" name="input" type="data" label="Choose BAM file"/> + </inputs> + <outputs> + <data format="bigwig" name="output"/> + </outputs> + <help> + </help> +</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/sam_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/sam_to_summary_tree_converter.xml +++ /dev/null @@ -1,14 +0,0 @@ -<tool id="CONVERTER_sam_to_summary_tree_0" name="Convert SAM to Summary Tree" version="1.0.0" hidden="true"> -<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">sam_or_bam_to_summary_tree_converter.py --sam $input1 $output1</command> - <inputs> - <page> - <param format="sam" name="input1" type="data" label="Choose sam file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py --- a/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -""" -Convert from VCF file to summary tree file. - -usage: %prog in_file out_file -""" -from __future__ import division - -import optparse -import galaxy_utils.sequence.vcf -from galaxy.visualization.tracks.summary import SummaryTree - -def main(): - # Read options, args. - parser = optparse.OptionParser() - (options, args) = parser.parse_args() - in_file, out_file = args - - # Do conversion. - st = SummaryTree() - for line in list( galaxy_utils.sequence.vcf.Reader( open( in_file ) ) ): - # VCF format provides a chrom and 1-based position for each variant. - # SummaryTree expects 0-based coordinates. - st.insert_range( line.chrom, long( line.pos-1 ), long( line.pos ) ) - - st.write(out_file) - -if __name__ == "__main__": - main() \ No newline at end of file diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.xml --- a/lib/galaxy/datatypes/converters/vcf_to_summary_tree_converter.xml +++ /dev/null @@ -1,14 +0,0 @@ -<tool id="CONVERTER_vcf_to_summary_tree_0" name="Convert VCF to Summary Tree" version="1.0.0" hidden="true"> - <!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> --> - <command interpreter="python">vcf_to_summary_tree_converter.py $input1 $output1</command> - <inputs> - <page> - <param format="vcf" name="input1" type="data" label="Choose VCF file"/> - </page> - </inputs> - <outputs> - <data format="summary_tree" name="output1"/> - </outputs> - <help> - </help> -</tool> diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/coverage.py --- a/lib/galaxy/datatypes/coverage.py +++ b/lib/galaxy/datatypes/coverage.py @@ -2,21 +2,14 @@ Coverage datatypes """ -import pkg_resources -pkg_resources.require( "bx-python" ) -import logging, os, sys, time, tempfile, shutil -import data -from galaxy import util -from galaxy.datatypes.sniff import * -from galaxy.web import url_for -from cgi import escape -import urllib -from bx.intervals.io import * +import logging +import math + +from galaxy import eggs from galaxy.datatypes import metadata from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes.tabular import Tabular -import math log = logging.getLogger(__name__) @@ -34,7 +27,7 @@ Assumes we have a numpy file. """ # Maybe if we import here people will still be able to use Galaxy when numpy kills it - pkg_resources.require("numpy>=1.2.1") + eggs.require("numpy>=1.2.1") #from numpy.lib import format import numpy diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -2,6 +2,7 @@ import metadata import mimetypes import os +import shutil import sys import tempfile import zipfile @@ -17,12 +18,6 @@ eggs.require( "Paste" ) import paste - -if sys.version_info[:2] < ( 2, 6 ): - zipfile.BadZipFile = zipfile.error -if sys.version_info[:2] < ( 2, 5 ): - zipfile.LargeZipFile = zipfile.error - log = logging.getLogger(__name__) tmpd = tempfile.mkdtemp() @@ -103,6 +98,12 @@ #A per datatype setting (inherited): max file size (in bytes) for setting optional metadata _max_optional_metadata_filesize = None + # Trackster track type. + track_type = None + + # Data sources. + data_sources = {} + def __init__(self, **kwd): """Initialize the datatype""" object.__init__(self, **kwd) @@ -545,21 +546,21 @@ return False - def merge( split_files, output_file): """ - TODO: Do we need to merge gzip files using gzjoin? cat seems to work, - but might be brittle. Need to revisit this. + Merge files with copy.copyfileobj() will not hit the + max argument limitation of cat. gz and bz2 files are also working. """ if not split_files: raise ValueError('Asked to merge zero files as %s' % output_file) elif len(split_files) == 1: - cmd = 'mv -f %s %s' % ( split_files[0], output_file ) + shutil.copyfileobj(open(split_files[0], 'rb'), open(output_file, 'wb')) else: - cmd = 'cat %s > %s' % ( ' '.join(split_files), output_file ) - result = os.system(cmd) - if result != 0: - raise Exception('Result %s from %s' % (result, cmd)) + fdst = open(output_file, 'wb') + for fsrc in split_files: + shutil.copyfileobj(open(fsrc, 'rb'), fdst) + fdst.close() + merge = staticmethod(merge) def get_visualizations( self, dataset ): @@ -567,7 +568,7 @@ Returns a list of visualizations for datatype. """ - if hasattr( self, 'get_track_type' ): + if self.track_type: return [ 'trackster', 'circster' ] return [] @@ -864,7 +865,7 @@ text = "%s file" % file_type else: try: - text = unicode( '\n'.join( lines ), 'utf-8' ) + text = util.unicodify( '\n'.join( lines ) ) except UnicodeDecodeError: text = "binary/unknown file" return text diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/application.py --- a/lib/galaxy/datatypes/display_applications/application.py +++ b/lib/galaxy/datatypes/display_applications/application.py @@ -12,7 +12,7 @@ log = logging.getLogger( __name__ ) #Any basic functions that we want to provide as a basic part of parameter dict should be added to this dict -BASE_PARAMS = { 'qp': quote_plus, 'url_for':url_for } #url_for has route memory... +BASE_PARAMS = { 'qp': quote_plus, 'url_for':url_for } class DisplayApplicationLink( object ): @classmethod @@ -40,7 +40,7 @@ self.name = None def get_display_url( self, data, trans ): dataset_hash, user_hash = encode_dataset_user( trans, data, None ) - return url_for( controller='/dataset', + return url_for( controller='dataset', action="display_application", dataset_id=dataset_hash, user_id=user_hash, diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/link_generator.py --- a/lib/galaxy/datatypes/display_applications/link_generator.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Classes to generate links for old-style display applications. - -Separating Transaction based elements of display applications from datatypes. -""" - -#FIXME: The code contained within this file is for old-style display applications, but -#this module namespace is intended to only handle the new-style display applications. - -import urllib - -# for the url_for hack -import pkg_resources -pkg_resources.require( "Routes" ) -import routes - -from galaxy import util -from galaxy.web import url_for -from galaxy.datatypes.interval import Interval, Gff, Wiggle, CustomTrack - -#TODO: Ideally, these classes would be instantiated in the trans (or some other semi-persistant fixture) -# Currently, these are instantiated per HDA which is not the best solution - -#TODO: these could be extended to handle file_function and parse/contain the builds.txt files - -#HACK: these duplicate functionality from the individual datatype classes themselves - -def get_display_app_link_generator( display_app_name ): - """Returns an instance of the proper link generator class - based on the display_app_name or DisplayAppLinkGenerator - if the display_app_name is unrecognized. - """ - if display_app_name == 'ucsc': - return UCSCDisplayAppLinkGenerator() - - elif display_app_name == 'gbrowse': - return GBrowseDisplayAppLinkGenerator() - - return DisplayAppLinkGenerator() - - -class DisplayAppLinkGenerator( object ): - """Base class for display application link generators. - - This class returns an empty list of links for all datatypes. - """ - def __init__( self ): - self.display_app_name = '' - - def no_links_available( self, dataset, app, base_url, url_for=url_for ): - """Called when no display application links are available - for this display app name and datatype combination. - """ - return [] - - def _link_function_from_datatype( self, datatype ): - """Dispatch to proper link generating function on datatype. - """ - return self.no_links_available - - def generate_links( self, trans, dataset ): - # here's the hack - which is expensive (time) - web_url_for = routes.URLGenerator( trans.webapp.mapper, trans.environ ) - - link_function = self._link_function_from_datatype( dataset.datatype ) - display_links = link_function( dataset, trans.app, trans.request.base, url_for=web_url_for ) - - return display_links - - -class UCSCDisplayAppLinkGenerator( DisplayAppLinkGenerator ): - """Class for generating links to display data in the - UCSC genome browser. - - This class returns links for the following datatypes and their subclasses: - Interval, Wiggle, Gff, CustomTrack - """ - def __init__( self ): - self.display_app_name = 'ucsc' - - def _link_function_from_datatype( self, datatype ): - """Dispatch to proper link generating function based on datatype. - """ - if( ( isinstance( datatype, Interval ) ) - or ( isinstance( datatype, Wiggle ) ) - or ( isinstance( datatype, Gff ) ) - or ( isinstance( datatype, CustomTrack ) ) ): - return self.ucsc_links - else: - return super( UCSCDisplayAppLinkGenerator, self )._link_function_from_datatype( datatype ) - - def ucsc_links( self, dataset, app, base_url, url_for=url_for ): - """Generate links to UCSC genome browser sites based on the dbkey - and content of dataset. - """ - # this is a refactor of Interval.ucsc_links, GFF.ucsc_links, Wiggle.ucsc_links, and CustomTrack.ucsc_links - #TODO: app vars can be moved into init (and base_url as well) - chrom, start, stop = dataset.datatype.get_estimated_display_viewport( dataset ) - if chrom is None: - return [] - ret_val = [] - for site_name, site_url in util.get_ucsc_by_build(dataset.dbkey): - if site_name in app.config.ucsc_display_sites: - internal_url = url_for( controller='dataset', dataset_id=dataset.id, - action='display_at', filename='%s_%s' % ( self.display_app_name, site_name ) ) - base_url = app.config.get( "display_at_callback", base_url ) - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" - % (base_url, url_for( controller='root' ), dataset.id, self.display_app_name) ) - redirect_url = urllib.quote_plus( "%sdb=%s&position=%s:%s-%s&hgt.customText=%%s" - % (site_url, dataset.dbkey, chrom, start, stop ) ) - - link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) - ret_val.append( ( site_name, link ) ) - - return ret_val - - -class GBrowseDisplayAppLinkGenerator( DisplayAppLinkGenerator ): - """Class for generating links to display data in the - GBrowse genome browser. - - This class returns links for the following datatypes and their subclasses: - Gff, Wiggle - """ - def __init__( self ): - self.display_app_name = 'gbrowse' - - def _link_function_from_datatype( self, datatype ): - """Dispatch to proper link generating function based on datatype. - """ - if( ( isinstance( datatype, Gff ) ) - or ( isinstance( datatype, Wiggle ) ) ): - return self.gbrowse_links - else: - return super( GBrowseDisplayAppLinkGenerator, self )._link_function_from_datatype( datatype ) - - def gbrowse_links( self, dataset, app, base_url, url_for=url_for ): - """Generate links to GBrowse genome browser sites based on the dbkey - and content of dataset. - """ - # when normalized for var names, Gff.gbrowse_links and Wiggle.gbrowse_links are the same - # also: almost identical to ucsc_links except for the 'chr' stripping, sites_by_build, config key - # could be refactored even more - chrom, start, stop = dataset.datatype.get_estimated_display_viewport( dataset ) - if chrom is None: - return [] - ret_val = [] - for site_name, site_url in util.get_gbrowse_sites_by_build( dataset.dbkey ): - if site_name in app.config.gbrowse_display_sites: - # strip chr from seqid - if chrom.startswith( 'chr' ) and len ( chrom ) > 3: - chrom = chrom[3:] - internal_url = url_for( controller='dataset', dataset_id=dataset.id, - action='display_at', filename='%s_%s' % ( self.display_app_name, site_name ) ) - redirect_url = urllib.quote_plus( "%s/?q=%s:%s..%s&eurl=%%s" % ( site_url, chrom, start, stop ) ) - base_url = app.config.get( "display_at_callback", base_url ) - display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" - % ( base_url, url_for( controller='root' ), dataset.id, self.display_app_name ) ) - link = '%s?redirect_url=%s&display_url=%s' % ( internal_url, redirect_url, display_url ) - ret_val.append( ( site_name, link ) ) - - return ret_val diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/display_applications/parameters.py --- a/lib/galaxy/datatypes/display_applications/parameters.py +++ b/lib/galaxy/datatypes/display_applications/parameters.py @@ -163,7 +163,7 @@ if self.parameter.strip_https and base_url[ : 5].lower() == 'https': base_url = "http%s" % base_url[ 5: ] return "%s%s" % ( base_url, - url_for( controller='/dataset', + url_for( controller='dataset', action="display_application", dataset_id=self._dataset_hash, user_id=self._user_hash, diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/interval.py --- a/lib/galaxy/datatypes/interval.py +++ b/lib/galaxy/datatypes/interval.py @@ -46,6 +46,8 @@ """Tab delimited data containing interval information""" file_ext = "interval" line_class = "region" + track_type = "FeatureTrack" + data_sources = { "data": "tabix", "index": "bigwig" } """Add metadata elements""" MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) @@ -242,7 +244,7 @@ # Accumulate links for valid sites ret_val = [] for site_name, site_url in valid_sites: - internal_url = url_for( controller='/dataset', dataset_id=dataset.id, + internal_url = url_for( controller='dataset', dataset_id=dataset.id, action='display_at', filename='ucsc_' + site_name ) display_url = urllib.quote_plus( "%s%s/display_as?id=%i&display_app=%s&authz_method=display_at" % (base_url, url_for( controller='root' ), dataset.id, type) ) @@ -328,17 +330,13 @@ def get_track_resolution( self, dataset, start, end): return None - - def get_track_type( self ): - return "FeatureTrack", {"data": "tabix", "index": "summary_tree"} class BedGraph( Interval ): """Tab delimited chrom/start/end/datavalue dataset""" file_ext = "bedgraph" - - def get_track_type( self ): - return "LineTrack", { "data": "bigwig", "index": "bigwig" } + track_type = "LineTrack" + data_sources = { "data": "bigwig", "index": "bigwig" } def as_ucsc_display_file( self, dataset, **kwd ): """ @@ -356,6 +354,8 @@ class Bed( Interval ): """Tab delimited data in BED format""" file_ext = "bed" + data_sources = { "data": "tabix", "index": "bigwig", "feature_search": "fli" } + track_type = Interval.track_type """Add metadata elements""" MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) @@ -510,9 +510,6 @@ else: return False return True except: return False - - def get_track_type( self ): - return "FeatureTrack", {"data": "tabix", "index": "summary_tree", "feature_search": "fli"} class BedStrict( Bed ): """Tab delimited data in strict BED format - no non-standard columns allowed""" @@ -572,6 +569,8 @@ """Tab delimited data in Gff format""" file_ext = "gff" column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Group' ] + data_sources = { "data": "interval_index", "index": "bigwig", "feature_search": "fli" } + track_type = Interval.track_type """Add metadata elements""" MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False ) @@ -783,10 +782,6 @@ return True except: return False - - def get_track_type( self ): - return "FeatureTrack", {"data": "interval_index", "index": "summary_tree", "feature_search": "fli"} - class Gff3( Gff ): """Tab delimited data in Gff3 format""" @@ -794,6 +789,7 @@ valid_gff3_strand = ['+', '-', '.', '?'] valid_gff3_phase = ['.', '0', '1', '2'] column_names = [ 'Seqid', 'Source', 'Type', 'Start', 'End', 'Score', 'Strand', 'Phase', 'Attributes' ] + track_type = Interval.track_type """Add metadata elements""" MetadataElement( name="column_types", default=['str','str','str','int','int','float','str','int','list'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) @@ -898,6 +894,7 @@ """Tab delimited data in Gtf format""" file_ext = "gtf" column_names = [ 'Seqname', 'Source', 'Feature', 'Start', 'End', 'Score', 'Strand', 'Frame', 'Attributes' ] + track_type = Interval.track_type """Add metadata elements""" MetadataElement( name="columns", default=9, desc="Number of columns", readonly=True, visible=False ) @@ -966,6 +963,8 @@ class Wiggle( Tabular, _RemoteCallMixin ): """Tab delimited data in wiggle format""" file_ext = "wig" + track_type = "LineTrack" + data_sources = { "data": "bigwig", "index": "bigwig" } MetadataElement( name="columns", default=3, desc="Number of columns", readonly=True, visible=False ) @@ -1146,9 +1145,6 @@ resolution = min( resolution, 100000 ) resolution = max( resolution, 1 ) return resolution - - def get_track_type( self ): - return "LineTrack", { "data": "bigwig", "index": "bigwig" } class CustomTrack ( Tabular ): """UCSC CustomTrack""" @@ -1292,6 +1288,7 @@ file_ext = "encodepeak" column_names = [ 'Chrom', 'Start', 'End', 'Name', 'Score', 'Strand', 'SignalValue', 'pValue', 'qValue', 'Peak' ] + data_sources = { "data": "tabix", "index": "bigwig" } """Add metadata elements""" MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) @@ -1303,15 +1300,14 @@ def sniff( self, filename ): return False - def get_track_type( self ): - return "FeatureTrack", {"data": "tabix", "index": "summary_tree"} - class ChromatinInteractions( Interval ): ''' Chromatin interactions obtained from 3C/5C/Hi-C experiments. ''' file_ext = "chrint" + track_type = "DiagonalHeatmapTrack" + data_sources = { "data": "tabix", "index": "bigwig" } column_names = [ 'Chrom1', 'Start1', 'End1', 'Chrom2', 'Start2', 'End2', 'Value' ] @@ -1328,11 +1324,6 @@ def sniff( self, filename ): return False - - def get_track_type( self ): - return "DiagonalHeatmapTrack", {"data": "tabix", "index": "summary_tree"} - - if __name__ == '__main__': import doctest, sys diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/metadata.py --- a/lib/galaxy/datatypes/metadata.py +++ b/lib/galaxy/datatypes/metadata.py @@ -1,22 +1,33 @@ -import sys, logging, copy, shutil, weakref, cPickle, tempfile, os +""" +Galaxy Metadata + +""" +from galaxy import eggs +eggs.require("simplejson") + + +import copy +import cPickle +import logging +import os +import shutil +import simplejson +import sys +import tempfile +import weakref + from os.path import abspath -from galaxy.util import string_as_bool, stringify_dictionary_keys, listify +import galaxy.model +from galaxy.util import listify, stringify_dictionary_keys, string_as_bool from galaxy.util.odict import odict from galaxy.web import form_builder -import galaxy.model from sqlalchemy.orm import object_session -import pkg_resources -pkg_resources.require("simplejson") -import simplejson - -log = logging.getLogger( __name__ ) +log = logging.getLogger(__name__) STATEMENTS = "__galaxy_statements__" #this is the name of the property in a Datatype class where new metadata spec element Statements are stored -DATABASE_CONNECTION_AVAILABLE = True #When False, certain metadata parameter types (see FileParameter) will behave differently - class Statement( object ): """ This class inserts its target into a list in the surrounding @@ -74,8 +85,8 @@ def __getattr__( self, name ): if name in self.spec: if name in self.parent._metadata: - return self.spec[name].wrap( self.parent._metadata[name] ) - return self.spec[name].wrap( self.spec[name].default ) + return self.spec[name].wrap( self.parent._metadata[name], object_session( self.parent ) ) + return self.spec[name].wrap( self.spec[name].default, object_session( self.parent ) ) if name in self.parent._metadata: return self.parent._metadata[name] def __setattr__( self, name, value ): @@ -202,7 +213,7 @@ self.validate( value ) return value - def wrap( self, value ): + def wrap( self, value, session ): """ Turns a value into its usable form. """ @@ -245,11 +256,11 @@ def get( self, name, default=None ): return self.__dict__.get(name, default) - def wrap( self, value ): + def wrap( self, value, session ): """ Turns a stored value into its usable form. """ - return self.param.wrap( value ) + return self.param.wrap( value, session ) def unwrap( self, value ): """ @@ -312,7 +323,7 @@ return ", ".join( map( str, value ) ) return MetadataParameter.get_html( self, value, context=context, other_values=other_values, values=values, **kwd ) - def wrap( self, value ): + def wrap( self, value, session ): value = self.marshal( value ) #do we really need this (wasteful)? - yes because we are not sure that all existing selects have been stored previously as lists. Also this will handle the case where defaults/no_values are specified and are single non-list values. if self.multiple: return value @@ -424,26 +435,16 @@ def get_html( self, value=None, context={}, other_values={}, **kwd ): return "<div>No display available for Metadata Files</div>" - def wrap( self, value ): + def wrap( self, value, session ): if value is None: return None if isinstance( value, galaxy.model.MetadataFile ) or isinstance( value, MetadataTempFile ): return value - if DATABASE_CONNECTION_AVAILABLE: - try: - # FIXME: this query requires a monkey patch in assignmapper.py since - # MetadataParameters do not have a handle to the sqlalchemy session - return galaxy.model.MetadataFile.get( value ) - except: - #value was not a valid id - return None - else: - mf = galaxy.model.MetadataFile() - mf.id = value #we assume this is a valid id, since we cannot check it - return mf + mf = session.query( galaxy.model.MetadataFile ).get( value ) + return mf def make_copy( self, value, target_context, source_context ): - value = self.wrap( value ) + value = self.wrap( value, object_session( target_context.parent ) ) if value: new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name ) object_session( target_context.parent ).add( new_value ) @@ -485,13 +486,13 @@ return value def new_file( self, dataset = None, **kwds ): - if DATABASE_CONNECTION_AVAILABLE: + if object_session( dataset ): mf = galaxy.model.MetadataFile( name = self.spec.name, dataset = dataset, **kwds ) object_session( dataset ).add( mf ) object_session( dataset ).flush() #flush to assign id return mf else: - #we need to make a tmp file that is accessable to the head node, + #we need to make a tmp file that is accessable to the head node, #we will be copying its contents into the MetadataFile objects filename after restoring from JSON #we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us return MetadataTempFile( **kwds ) diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/registry.py --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -163,7 +163,7 @@ # Use default mime type as per datatype spec mimetype = self.datatypes_by_extension[ extension ].get_mime() self.mimetypes_by_extension[ extension ] = mimetype - if hasattr( datatype_class, "get_track_type" ): + if datatype_class.track_type: self.available_tracks.append( extension ) if display_in_upload: self.upload_file_formats.append( extension ) @@ -379,6 +379,36 @@ if not included: self.sniff_order.append(datatype) append_to_sniff_order() + + def get_datatype_class_by_name( self, name ): + """ + Return the datatype class where the datatype's `type` attribute + (as defined in the datatype_conf.xml file) contains `name`. + """ + #TODO: too roundabout - would be better to generate this once as a map and store in this object + found_class = None + for ext, datatype_obj in self.datatypes_by_extension.items(): + datatype_obj_class = datatype_obj.__class__ + datatype_obj_class_str = str( datatype_obj_class ) + #print datatype_obj_class_str + if name in datatype_obj_class_str: + return datatype_obj_class + return None + # these seem to be connected to the dynamic classes being generated in this file, lines 157-158 + # they appear when a one of the three are used in inheritance with subclass="True" + #TODO: a possible solution is to def a fn in datatypes __init__ for creating the dynamic classes + + #remap = { + # 'galaxy.datatypes.registry.Tabular' : galaxy.datatypes.tabular.Tabular, + # 'galaxy.datatypes.registry.Text' : galaxy.datatypes.data.Text, + # 'galaxy.datatypes.registry.Binary' : galaxy.datatypes.binary.Binary + #} + #datatype_str = str( datatype ) + #if datatype_str in remap: + # datatype = remap[ datatype_str ] + # + #return datatype + def get_available_tracks(self): return self.available_tracks def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ): @@ -397,7 +427,7 @@ except KeyError: builder = data.Text() return builder - def change_datatype(self, data, ext, set_meta = True ): + def change_datatype(self, data, ext): data.extension = ext # call init_meta and copy metadata from itself. The datatype # being converted *to* will handle any metadata copying and @@ -405,10 +435,6 @@ if data.has_data(): data.set_size() data.init_meta( copy_from=data ) - if set_meta: - #metadata is being set internally - data.set_meta( overwrite = False ) - data.set_peek() return data def old_change_datatype(self, data, ext): """Creates and returns a new datatype based on an existing data and an extension""" diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/datatypes/tabular.py --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -265,7 +265,7 @@ while cursor and ck_data[-1] != '\n': ck_data += cursor cursor = f.read(1) - return to_json_string({'ck_data': ck_data, 'ck_index': ck_index+1}) + return to_json_string( { 'ck_data': util.unicodify( ck_data ), 'ck_index': ck_index + 1 } ) def display_data(self, trans, dataset, preview=False, filename=None, to_ext=None, chunk=None): preview = util.string_as_bool( preview ) @@ -328,7 +328,6 @@ """ Returns a list of visualizations for datatype. """ - # Can visualize tabular data as scatterplot if there are 2+ numerical # columns. num_numerical_cols = 0 @@ -358,6 +357,9 @@ class Sam( Tabular ): file_ext = 'sam' + track_type = "ReadTrack" + data_sources = { "data": "bam", "index": "bigwig" } + def __init__(self, **kwd): """Initialize taxonomy datatype""" Tabular.__init__( self, **kwd ) @@ -467,17 +469,16 @@ raise Exception('Result %s from %s' % (result, cmd)) merge = staticmethod(merge) - def get_track_type( self ): - return "ReadTrack", {"data": "bam", "index": "summary_tree"} - class Pileup( Tabular ): """Tab delimited data in pileup (6- or 10-column) format""" file_ext = "pileup" line_class = "genomic coordinate" + data_sources = { "data": "tabix" } """Add metadata elements""" MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) MetadataElement( name="startCol", default=2, desc="Start column", param=metadata.ColumnParameter ) + MetadataElement( name="endCol", default=2, desc="End column", param=metadata.ColumnParameter ) MetadataElement( name="baseCol", default=3, desc="Reference base column", param=metadata.ColumnParameter ) def init_meta( self, dataset, copy_from=None ): @@ -525,8 +526,7 @@ return True except: return False - - + class ElandMulti( Tabular ): file_ext = 'elandmulti' @@ -535,23 +535,39 @@ class Vcf( Tabular ): """ Variant Call Format for describing SNPs and other simple genome variations. """ + track_type = "VariantTrack" + data_sources = { "data": "tabix", "index": "bigwig" } file_ext = 'vcf' column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ] MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False ) MetadataElement( name="column_types", default=['str','int','str','str','str','int','str','list','str','str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) - MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, multiple=True ) + MetadataElement( name="viz_filter_cols", desc="Score column for visualization", default=[5], param=metadata.ColumnParameter, multiple=True, visible=False ) + MetadataElement( name="sample_names", default=[], desc="Sample names", readonly=True, visible=False, optional=True, no_value=[] ) def sniff( self, filename ): headers = get_headers( filename, '\n', count=1 ) return headers[0][0].startswith("##fileformat=VCF") + def display_peek( self, dataset ): """Returns formated html of peek""" return Tabular.make_html_table( self, dataset, column_names=self.column_names ) - def get_track_type( self ): - return "VcfTrack", {"data": "tabix", "index": "summary_tree"} + def set_meta( self, dataset, **kwd ): + Tabular.set_meta( self, dataset, **kwd ) + source = open( dataset.file_name ) + + # Skip comments. + line = None + for line in source: + if not line.startswith( '##' ): + break + + if line and line.startswith( '#' ): + # Found header line, get sample names. + dataset.metadata.sample_names = line.split()[ 9: ] + class Eland( Tabular ): """Support for the export.txt.gz file used by Illumina's ELANDv2e aligner""" diff -r 8c4d07e3581dfe2ceb52d38e570a2d63d149a9cd -r d969c260417179cf48dc0bc7a4ea233224064264 lib/galaxy/eggs/__init__.py --- a/lib/galaxy/eggs/__init__.py +++ b/lib/galaxy/eggs/__init__.py @@ -387,7 +387,6 @@ "guppy": lambda: self.config.get( "app:main", "use_memdump" ), "python_openid": lambda: self.config.get( "app:main", "enable_openid" ), "python_daemon": lambda: sys.version_info[:2] >= ( 2, 5 ), - "ctypes": lambda: ( "drmaa" in self.config.get( "app:main", "start_job_runners" ).split(",") ) and sys.version_info[:2] == ( 2, 4 ), "pysam": lambda: check_pysam() }.get( egg_name, lambda: True )() except: This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/575dedd2d0ba/ Changeset: 575dedd2d0ba Branch: search User: kellrott Date: 2013-06-07 19:04:42 Summary: Fixing the tag searching for histories and history datasets Affected #: 2 files diff -r d969c260417179cf48dc0bc7a4ea233224064264 -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -32,7 +32,9 @@ import parsley from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset -from galaxy.model import StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation +from galaxy.model import (StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, +HistoryDatasetAssociationTagAssociation, +ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation) from galaxy.model import ToolVersion from sqlalchemy import and_ @@ -269,12 +271,30 @@ #History Dataset Searching ################## +def history_dataset_handle_tag(view, left, operator, right): + if operator == "=": + view.do_query = True + #aliasing the tag association table, so multiple links to different tags can be formed during a single query + tag_table = aliased(HistoryDatasetAssociationTagAssociation) + + view.query = view.query.filter( + HistoryDatasetAssociation.id == tag_table.history_dataset_association_id + ) + tmp = right.split(":") + view.query = view.query.filter( tag_table.user_tname == tmp[0] ) + if len(tmp) > 1: + view.query = view.query.filter( tag_table.user_value == tmp[1] ) + else: + raise GalaxyParseError("Invalid comparison operator: %s" % (operator)) + class HistoryDatasetView(ViewQueryBaseClass): DOMAIN = "history_dataset" FIELDS = { 'name' : ViewField('name', sqlalchemy_field=HistoryDatasetAssociation.name), - 'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True) + 'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True), + 'tag' : ViewField("tag", handler=history_dataset_handle_tag) + } def search(self, trans): @@ -289,13 +309,14 @@ def history_handle_tag(view, left, operator, right): if operator == "=": view.do_query = True + tag_table = aliased(HistoryTagAssociation) view.query = view.query.filter( - History.id == HistoryTagAssociation.history_id + History.id == tag_table.history_id ) tmp = right.split(":") - view.query = view.query.filter( HistoryTagAssociation.user_tname == tmp[0] ) + view.query = view.query.filter( tag_table.user_tname == tmp[0] ) if len(tmp) > 1: - view.query = view.query.filter( HistoryTagAssociation.user_value == tmp[1] ) + view.query = view.query.filter( tag_table.user_value == tmp[1] ) else: raise GalaxyParseError("Invalid comparison operator: %s" % (operator)) diff -r d969c260417179cf48dc0bc7a4ea233224064264 -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 lib/galaxy/webapps/galaxy/api/search.py --- a/lib/galaxy/webapps/galaxy/api/search.py +++ b/lib/galaxy/webapps/galaxy/api/search.py @@ -30,7 +30,6 @@ current_user_roles = trans.get_current_user_roles() try: results = query.process(trans) - print results except Exception, e: return {'error' : str(e)} for item in results: https://bitbucket.org/galaxy/galaxy-central/commits/a5b83353f9ee/ Changeset: a5b83353f9ee Branch: search User: Kyle Ellrott Date: 2013-06-17 19:59:32 Summary: galaxy-central merge Affected #: 66 files diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db job_conf.xml.sample_advanced --- a/job_conf.xml.sample_advanced +++ b/job_conf.xml.sample_advanced @@ -54,7 +54,15 @@ <param id="shell_hostname">foo.example.org</param><param id="Job_Execution_Time">24:00:00</param></destination> - <destination id="condor" runner="condor"/> + <destination id="condor" runner="condor"> + <!-- With no params, jobs are submitted to the 'vanilla' universe with: + notification = NEVER + getenv = true + Additional/override query ClassAd params can be specified with + <param> tags. + --> + <param id="request_cpus">8</param> + </destination></destinations><tools><!-- Tools can be configured to use specific destinations or handlers, diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -64,6 +64,9 @@ tcf = kwargs[ 'tool_config_files' ] else: tcf = 'tool_conf.xml' + self.tool_filters = listify( kwargs.get( "tool_filters", [] ) ) + self.tool_label_filters = listify( kwargs.get( "tool_label_filters", [] ) ) + self.tool_section_filters = listify( kwargs.get( "tool_section_filters", [] ) ) self.tool_configs = [ resolve_path( p, self.root ) for p in listify( tcf ) ] self.shed_tool_data_path = kwargs.get( "shed_tool_data_path", None ) if self.shed_tool_data_path: @@ -73,17 +76,21 @@ self.tool_data_table_config_path = resolve_path( kwargs.get( 'tool_data_table_config_path', 'tool_data_table_conf.xml' ), self.root ) self.shed_tool_data_table_config = resolve_path( kwargs.get( 'shed_tool_data_table_config', 'shed_tool_data_table_conf.xml' ), self.root ) self.enable_tool_shed_check = string_as_bool( kwargs.get( 'enable_tool_shed_check', False ) ) + self.hours_between_check = kwargs.get( 'hours_between_check', 12 ) try: - self.hours_between_check = kwargs.get( 'hours_between_check', 12 ) - if isinstance( self.hours_between_check, float ): + hbc_test = int( self.hours_between_check ) + self.hours_between_check = hbc_test + if self.hours_between_check < 1 or self.hours_between_check > 24: + self.hours_between_check = 12 + except: + try: # Float values are supported for functional tests. + hbc_test = float( self.hours_between_check ) + self.hours_between_check = hbc_test if self.hours_between_check < 0.001 or self.hours_between_check > 24.0: self.hours_between_check = 12.0 - else: - if self.hours_between_check < 1 or self.hours_between_check > 24: - self.hours_between_check = 12 - except: - self.hours_between_check = 12 + except: + self.hours_between_check = 12 self.update_integrated_tool_panel = kwargs.get( "update_integrated_tool_panel", True ) self.enable_data_manager_user_view = string_as_bool( kwargs.get( "enable_data_manager_user_view", "False" ) ) self.data_manager_config_file = resolve_path( kwargs.get('data_manager_config_file', 'data_manager_conf.xml' ), self.root ) @@ -159,6 +166,7 @@ self.ucsc_display_sites = kwargs.get( 'ucsc_display_sites', "main,test,archaea,ucla" ).lower().split(",") self.gbrowse_display_sites = kwargs.get( 'gbrowse_display_sites', "modencode,sgd_yeast,tair,wormbase,wormbase_ws120,wormbase_ws140,wormbase_ws170,wormbase_ws180,wormbase_ws190,wormbase_ws200,wormbase_ws204,wormbase_ws210,wormbase_ws220,wormbase_ws225" ).lower().split(",") self.brand = kwargs.get( 'brand', None ) + self.welcome_url = kwargs.get( 'welcome_url', '/static/welcome.html' ) # Configuration for the message box directly below the masthead. self.message_box_visible = kwargs.get( 'message_box_visible', False ) self.message_box_content = kwargs.get( 'message_box_content', None ) diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -22,6 +22,7 @@ from galaxy.datatypes.metadata import MetadataElement from galaxy.datatypes import metadata from galaxy.datatypes.sniff import * +import dataproviders log = logging.getLogger(__name__) @@ -74,6 +75,7 @@ trans.response.headers["Content-Disposition"] = 'attachment; filename="Galaxy%s-[%s].%s"' % (dataset.hid, fname, to_ext) return open( dataset.file_name ) + class Ab1( Binary ): """Class describing an ab1 binary sequence file""" file_ext = "ab1" @@ -93,12 +95,15 @@ Binary.register_unsniffable_binary_ext("ab1") + class GenericAsn1Binary( Binary ): """Class for generic ASN.1 binary format""" file_ext = "asn1-binary" Binary.register_unsniffable_binary_ext("asn1-binary") + +@dataproviders.decorators.has_dataproviders class Bam( Binary ): """Class describing a BAM binary file""" file_ext = "bam" @@ -255,9 +260,92 @@ return dataset.peek except: return "Binary bam alignments file (%s)" % ( data.nice_size( dataset.get_size() ) ) + + # ------------- Dataproviders + # pipe through samtools view + #ALSO: (as Sam) + # bam does not use '#' to indicate comments/headers - we need to strip out those headers from the std. providers + #TODO:?? seems like there should be an easier way to do/inherit this - metadata.comment_char? + #TODO: incorporate samtools options to control output: regions first, then flags, etc. + @dataproviders.decorators.dataprovider_factory( 'line' ) + def line_dataprovider( self, dataset, **settings ): + samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) + settings[ 'comment_char' ] = '@' + return dataproviders.line.FilteredLineDataProvider( samtools_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'regex-line' ) + def regex_line_dataprovider( self, dataset, **settings ): + samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) + settings[ 'comment_char' ] = '@' + return dataproviders.line.RegexLineDataProvider( samtools_source, **settings ) + @dataproviders.decorators.dataprovider_factory( 'column' ) + def column_dataprovider( self, dataset, **settings ): + samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) + settings[ 'comment_char' ] = '@' + return dataproviders.column.ColumnarDataProvider( samtools_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'map' ) + def map_dataprovider( self, dataset, **settings ): + samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) + settings[ 'comment_char' ] = '@' + return dataproviders.column.MapDataProvider( samtools_source, **settings ) + + # these can't be used directly - may need BamColumn, BamMap (Bam metadata -> column/map) + # OR - see genomic_region_dataprovider + #@dataproviders.decorators.dataprovider_factory( 'dataset-column' ) + #def dataset_column_dataprovider( self, dataset, **settings ): + # settings[ 'comment_char' ] = '@' + # return super( Sam, self ).dataset_column_dataprovider( dataset, **settings ) + + #@dataproviders.decorators.dataprovider_factory( 'dataset-map' ) + #def dataset_map_dataprovider( self, dataset, **settings ): + # settings[ 'comment_char' ] = '@' + # return super( Sam, self ).dataset_map_dataprovider( dataset, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'header' ) + def header_dataprovider( self, dataset, **settings ): + # in this case we can use an option of samtools view to provide just what we need (w/o regex) + samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset, '-H' ) + return dataproviders.line.RegexLineDataProvider( samtools_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'id-seq-qual' ) + def id_seq_qual_dataprovider( self, dataset, **settings ): + settings[ 'indeces' ] = [ 0, 9, 10 ] + settings[ 'column_types' ] = [ 'str', 'str', 'str' ] + settings[ 'column_names' ] = [ 'id', 'seq', 'qual' ] + return self.map_dataprovider( dataset, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'genomic-region' ) + def genomic_region_dataprovider( self, dataset, **settings ): + # GenomicRegionDataProvider currently requires a dataset as source - may not be necc. + #TODO:?? consider (at least) the possible use of a kwarg: metadata_source (def. to source.dataset), + # or remove altogether... + #samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) + #return dataproviders.dataset.GenomicRegionDataProvider( samtools_source, metadata_source=dataset, + # 2, 3, 3, **settings ) + + # instead, set manually and use in-class column gen + settings[ 'indeces' ] = [ 2, 3, 3 ] + settings[ 'column_types' ] = [ 'str', 'int', 'int' ] + return self.column_dataprovider( dataset, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'genomic-region-map' ) + def genomic_region_map_dataprovider( self, dataset, **settings ): + settings[ 'indeces' ] = [ 2, 3, 3 ] + settings[ 'column_types' ] = [ 'str', 'int', 'int' ] + settings[ 'column_names' ] = [ 'chrom', 'start', 'end' ] + return self.map_dataprovider( dataset, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'samtools' ) + def samtools_dataprovider( self, dataset, **settings ): + """Generic samtools interface - all options available through settings.""" + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.dataset.SamtoolsDataProvider( dataset_source, **settings ) + Binary.register_sniffable_binary_format("bam", "bam", Bam) + class H5( Binary ): """Class describing an HDF5 file""" file_ext = "h5" @@ -277,6 +365,7 @@ Binary.register_unsniffable_binary_ext("h5") + class Scf( Binary ): """Class describing an scf binary sequence file""" file_ext = "scf" @@ -296,6 +385,7 @@ Binary.register_unsniffable_binary_ext("scf") + class Sff( Binary ): """ Standard Flowgram Format (SFF) """ file_ext = "sff" @@ -327,6 +417,7 @@ Binary.register_sniffable_binary_format("sff", "sff", Sff) + class BigWig(Binary): """ Accessing binary BigWig files from UCSC. @@ -363,6 +454,7 @@ Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig) + class BigBed(BigWig): """BigBed support from UCSC.""" @@ -375,6 +467,7 @@ Binary.register_sniffable_binary_format("bigbed", "bigbed", BigBed) + class TwoBit (Binary): """Class describing a TwoBit format nucleotide file""" @@ -399,3 +492,5 @@ return dataset.peek except: return "Binary TwoBit format nucleotide file (%s)" % (data.nice_size(dataset.get_size())) + +Binary.register_sniffable_binary_format("twobit", "twobit", TwoBit) diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -14,6 +14,8 @@ from galaxy.util.odict import odict from galaxy.util.sanitize_html import sanitize_html +import dataproviders + from galaxy import eggs eggs.require( "Paste" ) import paste @@ -56,6 +58,7 @@ cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls metadata.Statement.process( cls ) +@dataproviders.decorators.has_dataproviders class Data( object ): """ Base class for all datatypes. Implements basic interfaces as well @@ -545,7 +548,13 @@ def has_resolution(self): return False - + def matches_any( self, target_datatypes ): + """ + Check if this datatype is of any of the target_datatypes or is + a subtype thereof. + """ + datatype_classes = tuple( [ datatype.__class__ for datatype in target_datatypes ] ) + return isinstance( self, datatype_classes ) def merge( split_files, output_file): """ Merge files with copy.copyfileobj() will not hit the @@ -572,6 +581,40 @@ return [ 'trackster', 'circster' ] return [] + # ------------- Dataproviders + def has_dataprovider( self, data_format ): + """ + Returns True if `data_format` is available in `dataproviders`. + """ + return ( data_format in self.dataproviders ) + + def dataprovider( self, dataset, data_format, **settings ): + """ + Base dataprovider factory for all datatypes that returns the proper provider + for the given `data_format` or raises a `NoProviderAvailable`. + """ + #TODO:?? is this handling super class providers? + if self.has_dataprovider( data_format ): + return self.dataproviders[ data_format ]( self, dataset, **settings ) + raise dataproviders.exceptions.NoProviderAvailable( self, data_format ) + + @dataproviders.decorators.dataprovider_factory( 'base' ) + def base_dataprovider( self, dataset, **settings ): + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.base.DataProvider( dataset_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'chunk' ) + def chunk_dataprovider( self, dataset, **settings ): + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.chunk.ChunkDataProvider( dataset_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'chunk64' ) + def chunk64_dataprovider( self, dataset, **settings ): + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.chunk.Base64ChunkDataProvider( dataset_source, **settings ) + + +@dataproviders.decorators.has_dataproviders class Text( Data ): file_ext = 'txt' line_class = 'line' @@ -741,10 +784,31 @@ f.close() split = classmethod(split) + # ------------- Dataproviders + @dataproviders.decorators.dataprovider_factory( 'line' ) + def line_dataprovider( self, dataset, **settings ): + """ + Returns an iterator over the dataset's lines (that have been `strip`ed) + optionally excluding blank lines and lines that start with a comment character. + """ + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.line.FilteredLineDataProvider( dataset_source, **settings ) + + @dataproviders.decorators.dataprovider_factory( 'regex-line' ) + def regex_line_dataprovider( self, dataset, **settings ): + """ + Returns an iterator over the dataset's lines + optionally including/excluding lines that match one or more regex filters. + """ + dataset_source = dataproviders.dataset.DatasetDataProvider( dataset ) + return dataproviders.line.RegexLineDataProvider( dataset_source, **settings ) + + class GenericAsn1( Text ): """Class for generic ASN.1 text format""" file_ext = 'asn1' + class LineCount( Text ): """ Dataset contains a single line with a single integer that denotes the @@ -752,6 +816,7 @@ """ pass + class Newick( Text ): """New Hampshire/Newick Format""" file_ext = "nhx" diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/__init__.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/__init__.py @@ -0,0 +1,28 @@ + +#TODO: ---- This is a work in progress ---- +""" +Dataproviders are iterators with context managers that provide data to some +consumer datum by datum. + +As well as subclassing and overriding to get the proper data, Dataproviders +can be piped from one to the other. +..example:: + +.. note:: be careful to NOT pipe providers into subclasses of those providers. + Subclasses provide all the functionality of their superclasses, + so there's generally no need. + +.. note:: be careful to when using piped providers that accept the same keywords + in their __init__ functions (such as limit or offset) to pass those + keywords to the proper (often final) provider. These errors that result + can be hard to diagnose. +""" +import decorators +import exceptions + +import base +import chunk +import line +import column +import external +import dataset diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/base.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/base.py @@ -0,0 +1,260 @@ +""" +Base class(es) for all DataProviders. +""" +# there's a blurry line between functionality here and functionality in datatypes module +# attempting to keep parsing to a minimum here and focus on chopping/pagination/reformat(/filtering-maybe?) +# and using as much pre-computed info/metadata from the datatypes module as possible +# also, this shouldn't be a replacement/re-implementation of the tool layer +# (which provides traceability/versioning/reproducibility) + +from collections import deque +import exceptions + +_TODO = """ +hooks into datatypes (define providers inside datatype modules) as factories +capture tell() when provider is done + def stop( self ): self.endpoint = source.tell(); raise StopIteration() +implement __len__ sensibly where it can be (would be good to have where we're giving some progress - '100 of 300') + seems like sniffed files would have this info +unit tests +add datum entry/exit point methods: possibly decode, encode + or create a class that pipes source through - how would decode work then? + +icorporate existing visualization/dataproviders +some of the sources (esp. in datasets) don't need to be re-created + +YAGNI: InterleavingMultiSourceDataProvider, CombiningMultiSourceDataProvider +""" + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- base classes +class DataProvider( object ): + """ + Base class for all data providers. Data providers: + (a) have a source (which must be another file-like object) + (b) implement both the iterator and context manager interfaces + (c) do not allow write methods + (but otherwise implement the other file object interface methods) + """ + def __init__( self, source, **kwargs ): + """ + :param source: the source that this iterator will loop over. + (Should implement the iterable interface and ideally have the + context manager interface as well) + """ + self.source = self.validate_source( source ) + + def validate_source( self, source ): + """ + Is this a valid source for this provider? + + :raises InvalidDataProviderSource: if the source is considered invalid. + + Meant to be overridden in subclasses. + """ + if not source or not hasattr( source, '__iter__' ): + # that's by no means a thorough check + raise exceptions.InvalidDataProviderSource( source ) + return source + + #TODO: (this might cause problems later...) + #TODO: some providers (such as chunk's seek and read) rely on this... remove + def __getattr__( self, name ): + if name == 'source': + # if we're inside this fn, source hasn't been set - provide some safety just for this attr + return None + # otherwise, try to get the attr from the source - allows us to get things like provider.encoding, etc. + if hasattr( self.source, name ): + return getattr( self.source, name ) + # raise the proper error + return self.__getattribute__( name ) + + # write methods should not be allowed + def truncate( self, size ): + raise NotImplementedError( 'Write methods are purposely disabled' ) + def write( self, string ): + raise NotImplementedError( 'Write methods are purposely disabled' ) + def writelines( self, sequence ): + raise NotImplementedError( 'Write methods are purposely disabled' ) + + #TODO: route read methods through next? + #def readline( self ): + # return self.next() + def readlines( self ): + return [ line for line in self ] + + # iterator interface + def __iter__( self ): + # it's generators all the way up, Timmy + with self as source: + for datum in self.source: + yield datum + def next( self ): + return self.source.next() + + # context manager interface + def __enter__( self ): + # make the source's context manager interface optional + if hasattr( self.source, '__enter__' ): + self.source.__enter__() + return self + def __exit__( self, *args ): + # make the source's context manager interface optional, call on source if there + if hasattr( self.source, '__exit__' ): + self.source.__exit__( *args ) + # alternately, call close() + elif hasattr( self.source, 'close' ): + self.source.close() + + def __str__( self ): + """ + String representation for easier debugging. + + Will call `__str__` on it's source so this will display piped dataproviders. + """ + # we need to protect against recursion (in __getattr__) if self.source hasn't been set + source_str = str( self.source ) if hasattr( self, 'source' ) else '' + return '%s(%s)' %( self.__class__.__name__, str( source_str ) ) + + +class FilteredDataProvider( DataProvider ): + """ + Passes each datum through a filter function and yields it if that function + returns a non-`None` value. + + Also maintains counters: + - `num_data_read`: how many data have been consumed from the source. + - `num_valid_data_read`: how many data have been returned from `filter`. + - `num_data_returned`: how many data has this provider yielded. + """ + def __init__( self, source, filter_fn=None, **kwargs ): + """ + :param filter_fn: a lambda or function that will be passed a datum and + return either the (optionally modified) datum or None. + """ + super( FilteredDataProvider, self ).__init__( source, **kwargs ) + self.filter_fn = filter_fn + # count how many data we got from the source + self.num_data_read = 0 + # how many valid data have we gotten from the source + # IOW, data that's passed the filter and been either provided OR have been skipped due to offset + self.num_valid_data_read = 0 + # how many lines have been provided/output + self.num_data_returned = 0 + + def __iter__( self ): + parent_gen = super( FilteredDataProvider, self ).__iter__() + for datum in parent_gen: + self.num_data_read += 1 + datum = self.filter( datum ) + if datum != None: + self.num_valid_data_read += 1 + self.num_data_returned += 1 + yield datum + + #TODO: may want to squash this into DataProvider + def filter( self, datum ): + """ + When given a datum from the provider's source, return None if the datum + 'does not pass' the filter or is invalid. Return the datum if it's valid. + + :param datum: the datum to check for validity. + :returns: the datum, a modified datum, or None + + Meant to be overridden. + """ + if self.filter_fn: + return self.filter_fn( datum ) + # also can be overriden entirely + return datum + + +class LimitedOffsetDataProvider( FilteredDataProvider ): + """ + A provider that uses the counters from FilteredDataProvider to limit the + number of data and/or skip `offset` number of data before providing. + + Useful for grabbing sections from a source (e.g. pagination). + """ + #TODO: may want to squash this into DataProvider + def __init__( self, source, offset=0, limit=None, **kwargs ): + """ + :param offset: the number of data to skip before providing. + :param limit: the final number of data to provide. + """ + super( LimitedOffsetDataProvider, self ).__init__( source, **kwargs ) + + # how many valid data to skip before we start outputing data - must be positive + # (diff to support neg. indeces - must be pos.) + self.offset = max( offset, 0 ) + + # how many valid data to return - must be positive (None indicates no limit) + self.limit = limit + if self.limit != None: + self.limit = max( self.limit, 0 ) + + def __iter__( self ): + """ + Iterate over the source until `num_valid_data_read` is greater than + `offset`, begin providing datat, and stop when `num_data_returned` + is greater than `offset`. + """ + parent_gen = super( LimitedOffsetDataProvider, self ).__iter__() + for datum in parent_gen: + + if self.limit != None and self.num_data_returned > self.limit: + break + + if self.num_valid_data_read > self.offset: + yield datum + else: + # wot a cheezy way of doing this... + self.num_data_returned -= 1 + + #TODO: skipping lines is inefficient - somehow cache file position/line_num pair and allow provider + # to seek to a pos/line and then begin providing lines + # the important catch here is that we need to have accurate pos/line pairs + # in order to preserve the functionality of limit and offset + #if file_seek and len( file_seek ) == 2: + # seek_pos, new_line_num = file_seek + # self.seek_and_set_curr_line( seek_pos, new_line_num ) + + #def seek_and_set_curr_line( self, file_seek, new_curr_line_num ): + # self.seek( file_seek, os.SEEK_SET ) + # self.curr_line_num = new_curr_line_num + + +class MultiSourceDataProvider( DataProvider ): + """ + A provider that iterates over a list of given sources and provides data + from one after another. + + An iterator over iterators. + """ + def __init__( self, source_list, **kwargs ): + """ + :param source_list: an iterator of iterables + """ + self.source_list = deque( source_list ) + + def __iter__( self ): + """ + Iterate over the source_list, then iterate over the data in each source. + + Skip a given source in `source_list` if it is `None` or invalid. + """ + for source in self.source_list: + # just skip falsy sources + if not source: + continue + try: + self.source = self.validate_source( source ) + except exceptions.InvalidDataProviderSource, invalid_source: + continue + + parent_gen = super( MultiSourceDataProvider, self ).__iter__() + for datum in parent_gen: + yield datum diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/chunk.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/chunk.py @@ -0,0 +1,80 @@ +""" +Chunk (N number of bytes at M offset to a source's beginning) provider. + +Primarily for file sources but usable by any iterator that has both +seek and read( N ). +""" +import os +import base64 + +import base +import exceptions + +_TODO = """ +""" + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- +class ChunkDataProvider( base.DataProvider ): + """ + Data provider that yields chunks of data from it's file. + + Note: this version does not account for lines and works with Binary datatypes. + """ + MAX_CHUNK_SIZE = 2**16 + DEFAULT_CHUNK_SIZE = MAX_CHUNK_SIZE + + #TODO: subclass from LimitedOffsetDataProvider? + # see web/framework/base.iterate_file, util/__init__.file_reader, and datatypes.tabular + def __init__( self, source, chunk_index=0, chunk_size=DEFAULT_CHUNK_SIZE, **kwargs ): + """ + :param chunk_index: if a source can be divided into N number of + `chunk_size` sections, this is the index of which section to + return. + :param chunk_size: how large are the desired chunks to return + (gen. in bytes). + """ + super( ChunkDataProvider, self ).__init__( source, **kwargs ) + self.chunk_size = chunk_size + self.chunk_pos = chunk_index * self.chunk_size + + def validate_source( self, source ): + """ + Does the given source have both the methods `seek` and `read`? + :raises InvalidDataProviderSource: if not. + """ + source = super( ChunkDataProvider, self ).validate_source( source ) + if( ( not hasattr( source, 'seek' ) ) + or ( not hasattr( source, 'read' ) ) ): + raise exceptions.InvalidDataProviderSource( source ) + return source + + def __iter__( self ): + # not reeeally an iterator per se + self.__enter__() + self.source.seek( self.chunk_pos, os.SEEK_SET ) + chunk = self.encode( self.source.read( self.chunk_size ) ) + yield chunk + self.__exit__() + + def encode( self, chunk ): + """ + Called on the chunk before returning. + + Overrride to modify, encode, or decode chunks. + """ + return chunk + + +class Base64ChunkDataProvider( ChunkDataProvider ): + """ + Data provider that yields chunks of base64 encoded data from it's file. + """ + def encode( self, chunk ): + """ + Return chunks encoded in base 64. + """ + return base64.b64encode( chunk ) diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/column.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/column.py @@ -0,0 +1,242 @@ +""" +Providers that provide lists of lists generally where each line of a source +is further subdivided into multiple data (e.g. columns from a line). +""" + +import line + +_TODO = """ +move ColumnarDataProvider parsers to more sensible location + +TransposedColumnarDataProvider: provides each column as a single array + - see existing visualizations/dataprovider/basic.ColumnDataProvider +""" + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- base classes +class ColumnarDataProvider( line.RegexLineDataProvider ): + """ + Data provider that provide a list of columns from the lines of it's source. + + Columns are returned in the order given in indeces, so this provider can + re-arrange columns. + + If any desired index is outside the actual number of columns + in the source, this provider will None-pad the output and you are guaranteed + the same number of columns as the number of indeces asked for (even if they + are filled with None). + """ + def __init__( self, source, indeces=None, + column_count=None, column_types=None, parsers=None, parse_columns=True, + deliminator='\t', **kwargs ): + """ + :param indeces: a list of indeces of columns to gather from each row + Optional: will default to `None`. + If `None`, this provider will return all rows (even when a + particular row contains more/less than others). + If a row/line does not contain an element at a given index, the + provider will-return/fill-with a `None` value as the element. + :type indeces: list or None + + :param column_count: an alternate means of defining indeces, use an int + here to effectively provide the first N columns. + Optional: will default to `None`. + :type column_count: int + + :param column_types: a list of string names of types that the + provider will use to look up an appropriate parser for the column. + (e.g. 'int', 'float', 'str', 'bool') + Optional: will default to parsing all columns as strings. + :type column_types: list of strings + + :param parsers: a dictionary keyed with column type strings + and with values that are functions to use when parsing those + types. + Optional: will default to using the function `_get_default_parsers`. + :type parsers: dictionary + + :param parse_columns: attempt to parse columns? + Optional: defaults to `True`. + :type parse_columns: bool + + :param deliminator: character(s) used to split each row/line of the source. + Optional: defaults to the tab character. + :type deliminator: str + + .. note: that the subclass constructors are passed kwargs - so they're + params (limit, offset, etc.) are also applicable here. + """ + #TODO: other columnar formats: csv, etc. + super( ColumnarDataProvider, self ).__init__( source, **kwargs ) + + #IMPLICIT: if no indeces, column_count, or column_types passed: return all columns + self.selected_column_indeces = indeces + self.column_count = column_count + self.column_types = column_types or [] + # if no column count given, try to infer from indeces or column_types + if not self.column_count: + if self.selected_column_indeces: + self.column_count = len( self.selected_column_indeces ) + elif self.column_types: + self.column_count = len( self.column_types ) + # if no indeces given, infer from column_count + if not self.selected_column_indeces and self.column_count: + self.selected_column_indeces = list( xrange( self.column_count ) ) + + self.deliminator = deliminator + + # how/whether to parse each column value + self.parsers = {} + if parse_columns: + self.parsers = self._get_default_parsers() + # overwrite with user desired parsers + self.parsers.update( parsers or {} ) + + def _get_default_parsers( self ): + """ + Return parser dictionary keyed for each columnar type + (as defined in datatypes). + + .. note: primitives only by default (str, int, float, boolean, None). + Other (more complex) types are retrieved as strings. + :returns: a dictionary of the form: + `{ <parser type name> : <function used to parse type> }` + """ + #TODO: move to module level (or datatypes, util) + return { + # str is default and not needed here + 'int' : int, + 'float' : float, + 'bool' : bool, + + # unfortunately, 'list' is used in dataset metadata both for + # query style maps (9th col gff) AND comma-sep strings. + # (disabled for now) + #'list' : lambda v: v.split( ',' ), + #'csv' : lambda v: v.split( ',' ), + ## i don't like how urlparses does sub-lists... + #'querystr' : lambda v: dict([ ( p.split( '=', 1 ) if '=' in p else ( p, True ) ) + # for p in v.split( ';', 1 ) ]) + + #'scifloat': #floating point which may be in scientific notation + + # always with the 1 base, biologists? + #'int1' : ( lambda i: int( i ) - 1 ), + + #'gffval': string or '.' for None + #'gffint': # int or '.' for None + #'gffphase': # 0, 1, 2, or '.' for None + #'gffstrand': # -, +, ?, or '.' for None, etc. + } + + def _parse_value( self, val, type ): + """ + Attempt to parse and return the given value based on the given type. + + :param val: the column value to parse (often a string) + :param type: the string type 'name' used to find the appropriate parser + :returns: the parsed value + or `value` if no `type` found in `parsers` + or `None` if there was a parser error (ValueError) + """ + if type == 'str' or type == None: return val + try: + return self.parsers[ type ]( val ) + except KeyError, err: + # no parser - return as string + pass + except ValueError, err: + # bad value - return None + return None + return val + + def _get_column_type( self, index ): + """ + Get the column type for the parser from `self.column_types` or `None` + if the type is unavailable. + :param index: the column index + :returns: string name of type (e.g. 'float', 'int', etc.) + """ + try: + return self.column_types[ index ] + except IndexError, ind_err: + return None + + def _parse_column_at_index( self, columns, parser_index, index ): + """ + Get the column type for the parser from `self.column_types` or `None` + if the type is unavailable. + """ + try: + return self._parse_value( columns[ index ], self._get_column_type( parser_index ) ) + # if a selected index is not within columns, return None + except IndexError, index_err: + return None + + def _parse_columns_from_line( self, line ): + """ + Returns a list of the desired, parsed columns. + :param line: the line to parse + :type line: str + """ + #TODO: too much going on in this loop - the above should all be precomputed AMAP... + all_columns = line.split( self.deliminator ) + # if no indeces were passed to init, return all columns + selected_indeces = self.selected_column_indeces or list( xrange( len( all_columns ) ) ) + parsed_columns = [] + for parser_index, column_index in enumerate( selected_indeces ): + parsed_columns.append( self._parse_column_at_index( all_columns, parser_index, column_index ) ) + return parsed_columns + + def __iter__( self ): + parent_gen = super( ColumnarDataProvider, self ).__iter__() + for line in parent_gen: + columns = self._parse_columns_from_line( line ) + yield columns + + #TODO: implement column filters here and not below - flatten hierarchy + +class FilteredByColumnDataProvider( ColumnarDataProvider ): + """ + Data provider that provide a list of columns from the lines of it's source + _only_ if they pass a given filter function. + + e.g. column #3 is type int and > N + """ + # TODO: how to do this and still have limit and offset work? + def __init__( self, source, **kwargs ): + raise NotImplementedError() + super( FilteredByColumnDataProvider, self ).__init__( source, **kwargs ) + + +class MapDataProvider( ColumnarDataProvider ): + """ + Data provider that column_names and columns from the source's contents + into a dictionary. + + A combination use of both `column_names` and `indeces` allows 'picking' + key/value pairs from the source. + + .. note: that the subclass constructors are passed kwargs - so they're + params (limit, offset, etc.) are also applicable here. + """ + def __init__( self, source, column_names=None, **kwargs ): + """ + :param column_names: an ordered list of strings that will be used as the keys + for each column in the returned dictionaries. + The number of key, value pairs each returned dictionary has will + be as short as the number of column names provided. + :type column_names: + """ + #TODO: allow passing in a map instead of name->index { 'name1': index1, ... } + super( MapDataProvider, self ).__init__( source, **kwargs ) + self.column_names = column_names or [] + + def __iter__( self ): + parent_gen = super( MapDataProvider, self ).__iter__() + for column_values in parent_gen: + map = dict( zip( self.column_names, column_values ) ) + yield map diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/dataset.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/dataset.py @@ -0,0 +1,671 @@ +""" +Dataproviders that use either: + - the file contents and/or metadata from a Galaxy DatasetInstance as + their source. + - or provide data in some way relevant to bioinformatic data + (e.g. parsing genomic regions from their source) +""" + +import pkg_resources +pkg_resources.require( 'bx-python' ) +from bx import seq as bx_seq +from bx import wiggle as bx_wig + +import galaxy.model +import galaxy.datatypes +import galaxy.datatypes.data + +#TODO: can't import these due to circular ref in model/registry +#import galaxy.datatypes.binary +#import galaxy.datatypes.tabular + +import exceptions +import base +import line +import column +import external + +_TODO = """ +use bx as much as possible +the use of DatasetInstance seems to create some import problems +gff3 hierarchies +""" + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- base for using a Glx dataset +class DatasetDataProvider( base.DataProvider ): + """ + Class that uses the file contents and/or metadata from a Galaxy DatasetInstance + as it's source. + + DatasetDataProvider can be seen as the intersection between a datatype's + metadata and a dataset's file contents. It (so far) mainly provides helper + and conv. methods for using dataset metadata to set up and control how + the data is provided. + """ + def __init__( self, dataset, **kwargs ): + """ + :param dataset: the Galaxy dataset whose file will be the source + :type dataset: model.DatasetInstance + + :raises exceptions.InvalidDataProviderSource: if not a DatsetInstance + """ + if not isinstance( dataset, galaxy.model.DatasetInstance ): + raise exceptions.InvalidDataProviderSource( "Data provider can only be used with a DatasetInstance" ) + self.dataset = dataset + # this dataset file is obviously the source + #TODO: this might be a good place to interface with the object_store... + super( DatasetDataProvider, self ).__init__( open( dataset.file_name, 'rb' ) ) + + #TODO: this is a bit of a mess + @classmethod + def get_column_metadata_from_dataset( cls, dataset ): + """ + Convenience class method to get column metadata from a dataset. + :returns: a dictionary of `column_count`, `column_types`, and `column_names` + if they're available, setting each to `None` if not. + """ + # re-map keys to fit ColumnarProvider.__init__ kwargs + params = {} + params[ 'column_count' ] = dataset.metadata.columns + params[ 'column_types' ] = dataset.metadata.column_types + params[ 'column_names' ] = dataset.metadata.column_names or getattr( dataset.datatype, 'column_names', None ) + return params + + def get_metadata_column_types( self, indeces=None ): + """ + Return the list of `column_types` for this dataset or `None` if unavailable. + :param indeces: the indeces for the columns of which to return the types. + Optional: defaults to None (return all types) + :type indeces: list of ints + """ + metadata_column_types = ( self.dataset.metadata.column_types + or getattr( self.dataset.datatype, 'column_types', None ) + or None ) + if not metadata_column_types: + return metadata_column_types + if indeces: + column_types = [] + for index in indeces: + column_type = metadata_column_types[ index ] if index < len( metadata_column_types ) else None + column_types.append( column_type ) + return column_types + return metadata_column_types + + def get_metadata_column_names( self, indeces=None ): + """ + Return the list of `column_names` for this dataset or `None` if unavailable. + :param indeces: the indeces for the columns of which to return the names. + Optional: defaults to None (return all names) + :type indeces: list of ints + """ + metadata_column_names = ( self.dataset.metadata.column_names + or getattr( self.dataset.datatype, 'column_names', None ) + or None ) + if not metadata_column_names: + return metadata_column_names + if indeces: + column_names = [] + for index in indeces: + column_type = metadata_column_names[ index ] if index < len( metadata_column_names ) else None + column_names.append( column_type ) + return column_names + return metadata_column_names + + #TODO: merge the next two + def get_indeces_by_column_names( self, list_of_column_names ): + """ + Return the list of column indeces when given a list of column_names. + :param list_of_column_names: the names of the columns of which to get indeces. + :type list_of_column_names: list of strs + :raises KeyError: if column_names are not found + :raises ValueError: if an entry in list_of_column_names is not in column_names + """ + metadata_column_names = ( self.dataset.metadata.column_names + or getattr( self.dataset.datatype, 'column_names', None ) + or None ) + if not metadata_column_names: + raise KeyError( 'No column_names found for ' + + 'datatype: %s, dataset: %s' %( str( self.dataset.datatype ), str( self.dataset ) ) ) + indeces = [] + for column_name in list_of_column_names: + indeces.append( metadata_column_names.index( column_name ) ) + return indeces + + def get_metadata_column_index_by_name( self, name ): + """ + Return the 1-base index of a sources column with the given `name`. + """ + # metadata columns are 1-based indeces + column = getattr( self.dataset.metadata, name ) + return ( column - 1 ) if isinstance( column, int ) else None + + def get_genomic_region_indeces( self, check=False ): + """ + Return a list of column indeces for 'chromCol', 'startCol', 'endCol' from + a source representing a genomic region. + + :param check: if True will raise a ValueError if any were not found. + :type check: bool + :raises ValueError: if check is `True` and one or more indeces were not found. + :returns: list of column indeces for the named columns. + """ + region_column_names = ( 'chromCol', 'startCol', 'endCol' ) + region_indeces = [ self.get_metadata_column_index_by_name( name ) for name in region_column_names ] + if check and not all( map( lambda i: i != None, indeces ) ): + raise ValueError( "Could not determine proper column indeces for chrom, start, end: %s" %( str( indeces ) ) ) + return region_indeces + + +class ConvertedDatasetDataProvider( DatasetDataProvider ): + """ + Class that uses the file contents of a dataset after conversion to a different + format. + """ + def __init__( self, dataset, **kwargs ): + raise NotImplementedError( 'Abstract class' ) + self.original_dataset = dataset + self.converted_dataset = self.convert_dataset( dataset, **kwargs ) + super( ConvertedDatasetDataProvider, self ).__init__( self.converted_dataset, **kwargs ) + #NOTE: now self.converted_dataset == self.dataset + + def convert_dataset( self, dataset, **kwargs ): + """ + Convert the given dataset in some way. + """ + return dataset + + +# ----------------------------------------------------------------------------- uses metadata for settings +class DatasetColumnarDataProvider( column.ColumnarDataProvider ): + """ + Data provider that uses a DatasetDataProvider as it's source and the + dataset's metadata to buuild settings for the ColumnarDataProvider it's + inherited from. + """ + def __init__( self, dataset, **kwargs ): + """ + All kwargs are inherited from ColumnarDataProvider. + .. seealso:: column.ColumnarDataProvider + + If no kwargs are given, this class will attempt to get those kwargs + from the dataset source's metadata. + If any kwarg is given, it will override and be used in place of + any metadata available. + """ + dataset_source = DatasetDataProvider( dataset ) + if not kwargs.get( 'column_types', None ): + indeces = kwargs.get( 'indeces', None ) + kwargs[ 'column_types' ] = dataset_source.get_metadata_column_types( indeces=indeces ) + super( DatasetColumnarDataProvider, self ).__init__( dataset_source, **kwargs ) + + +class DatasetMapDataProvider( column.MapDataProvider ): + """ + Data provider that uses a DatasetDataProvider as it's source and the + dataset's metadata to buuild settings for the MapDataProvider it's + inherited from. + """ + def __init__( self, dataset, **kwargs ): + """ + All kwargs are inherited from MapDataProvider. + .. seealso:: column.MapDataProvider + + If no kwargs are given, this class will attempt to get those kwargs + from the dataset source's metadata. + If any kwarg is given, it will override and be used in place of + any metadata available. + + The relationship between column_names and indeces is more complex: + +-----------------+-------------------------------+-----------------------+ + | | Indeces given | Indeces NOT given | + +=================+===============================+=======================+ + | Names given | pull indeces, rename w/ names | pull by name | + +=================+-------------------------------+-----------------------+ + | Names NOT given | pull indeces, name w/ meta | pull all, name w/meta | + +=================+-------------------------------+-----------------------+ + """ + dataset_source = DatasetDataProvider( dataset ) + + #TODO: getting too complicated - simplify at some lvl, somehow + # if no column_types given, get column_types from indeces (or all if indeces == None) + indeces = kwargs.get( 'indeces', None ) + column_names = kwargs.get( 'column_names', None ) + + #if indeces and column_names: + # # pull using indeces and re-name with given names - no need to alter (does as super would) + # pass + + if not indeces and column_names: + # pull columns by name + indeces = kwargs[ 'indeces' ] = dataset_source.get_indeces_by_column_names( column_names ) + + elif indeces and not column_names: + # pull using indeces, name with meta + column_names = kwargs[ 'column_names' ] = dataset_source.get_metadata_column_names( indeces=indeces ) + + elif not indeces and not column_names: + # pull all indeces and name using metadata + column_names = kwargs[ 'column_names' ] = dataset_source.get_metadata_column_names( indeces=indeces ) + + # if no column_types given, use metadata column_types + if not kwargs.get( 'column_types', None ): + kwargs[ 'column_types' ] = dataset_source.get_metadata_column_types( indeces=indeces ) + + super( DatasetMapDataProvider, self ).__init__( dataset_source, **kwargs ) + + +# ----------------------------------------------------------------------------- provides a bio-relevant datum +class GenomicRegionDataProvider( column.ColumnarDataProvider ): + """ + Data provider that parses chromosome, start, and end data from a file + using the datasets metadata settings. + + Is a ColumnarDataProvider that uses a DatasetDataProvider as it's source. + + If `named_columns` is true, will return dictionaries with the keys + 'chrom', 'start', 'end'. + """ + # dictionary keys when named_columns=True + COLUMN_NAMES = [ 'chrom', 'start', 'end' ] + + def __init__( self, dataset, chrom_column=None, start_column=None, end_column=None, named_columns=False, **kwargs ): + """ + :param dataset: the Galaxy dataset whose file will be the source + :type dataset: model.DatasetInstance + + :param chrom_column: optionally specify the chrom column index + :type chrom_column: int + :param start_column: optionally specify the start column index + :type start_column: int + :param end_column: optionally specify the end column index + :type end_column: int + + :param named_columns: optionally return dictionaries keying each column + with 'chrom', 'start', or 'end'. + Optional: defaults to False + :type named_columns: bool + """ + #TODO: allow passing in a string format e.g. "{chrom}:{start}-{end}" + dataset_source = DatasetDataProvider( dataset ) + + if chrom_column == None: + chrom_column = dataset_source.get_metadata_column_index_by_name( 'chromCol' ) + if start_column == None: + start_column = dataset_source.get_metadata_column_index_by_name( 'startCol' ) + if end_column == None: + end_column = dataset_source.get_metadata_column_index_by_name( 'endCol' ) + indeces = [ chrom_column, start_column, end_column ] + if not all( map( lambda i: i != None, indeces ) ): + raise ValueError( "Could not determine proper column indeces for" + + " chrom, start, end: %s" %( str( indeces ) ) ) + kwargs.update({ 'indeces' : indeces }) + + if not kwargs.get( 'column_types', None ): + kwargs.update({ 'column_types' : dataset_source.get_metadata_column_types( indeces=indeces ) }) + + self.named_columns = named_columns + if self.named_columns: + self.column_names = self.COLUMN_NAMES + + super( GenomicRegionDataProvider, self ).__init__( dataset_source, **kwargs ) + + def __iter__( self ): + parent_gen = super( GenomicRegionDataProvider, self ).__iter__() + for column_values in parent_gen: + if self.named_columns: + yield dict( zip( self.column_names, column_values ) ) + else: + yield column_values + + +#TODO: this optionally provides the same data as the above and makes GenomicRegionDataProvider redundant +# GenomicRegionDataProvider is a better name, tho +class IntervalDataProvider( column.ColumnarDataProvider ): + """ + Data provider that parses chromosome, start, and end data (as well as strand + and name if set in the metadata) using the dataset's metadata settings. + + If `named_columns` is true, will return dictionaries with the keys + 'chrom', 'start', 'end' (and 'strand' and 'name' if available). + """ + COLUMN_NAMES = [ 'chrom', 'start', 'end', 'strand', 'name' ] + + def __init__( self, dataset, chrom_column=None, start_column=None, end_column=None, + strand_column=None, name_column=None, named_columns=False, **kwargs ): + """ + :param dataset: the Galaxy dataset whose file will be the source + :type dataset: model.DatasetInstance + + :param named_columns: optionally return dictionaries keying each column + with 'chrom', 'start', 'end', 'strand', or 'name'. + Optional: defaults to False + :type named_columns: bool + """ + #TODO: allow passing in a string format e.g. "{chrom}:{start}-{end}" + dataset_source = DatasetDataProvider( dataset ) + + # get genomic indeces and add strand and name + if chrom_column == None: + chrom_column = dataset_source.get_metadata_column_index_by_name( 'chromCol' ) + if start_column == None: + start_column = dataset_source.get_metadata_column_index_by_name( 'startCol' ) + if end_column == None: + end_column = dataset_source.get_metadata_column_index_by_name( 'endCol' ) + if strand_column == None: + strand_column = dataset_source.get_metadata_column_index_by_name( 'strandCol' ) + if name_column == None: + name_column = dataset_source.get_metadata_column_index_by_name( 'nameCol' ) + indeces = [ chrom_column, start_column, end_column, strand_column, name_column ] + kwargs.update({ 'indeces' : indeces }) + + if not kwargs.get( 'column_types', None ): + kwargs.update({ 'column_types' : dataset_source.get_metadata_column_types( indeces=indeces ) }) + + self.named_columns = named_columns + if self.named_columns: + self.column_names = self.COLUMN_NAMES + + super( IntervalDataProvider, self ).__init__( dataset_source, **kwargs ) + + def __iter__( self ): + parent_gen = super( IntervalDataProvider, self ).__iter__() + for column_values in parent_gen: + if self.named_columns: + yield dict( zip( self.column_names, column_values ) ) + else: + yield column_values + + +#TODO: ideally with these next two - you'd allow pulling some region from the sequence +# WITHOUT reading the entire seq into memory - possibly apply some version of limit/offset +class FastaDataProvider( base.FilteredDataProvider ): + """ + Class that returns fasta format data in a list of maps of the form: + { + id: <fasta header id>, + sequence: <joined lines of nucleotide/amino data> + } + """ + def __init__( self, source, ids=None, **kwargs ): + """ + :param ids: optionally return only ids (and sequences) that are in this list. + Optional: defaults to None (provide all ids) + :type ids: list or None + """ + source = bx_seq.fasta.FastaReader( source ) + #TODO: validate is a fasta + super( FastaDataProvider, self ).__init__( source, **kwargs ) + self.ids = ids + # how to do ids? + + def __iter__( self ): + parent_gen = super( FastaDataProvider, self ).__iter__() + for fasta_record in parent_gen: + yield { + 'id' : fasta_record.name, + 'seq' : fasta_record.text + } + + +class TwoBitFastaDataProvider( DatasetDataProvider ): + """ + Class that returns fasta format data in a list of maps of the form: + { + id: <fasta header id>, + sequence: <joined lines of nucleotide/amino data> + } + """ + def __init__( self, source, ids=None, **kwargs ): + """ + :param ids: optionally return only ids (and sequences) that are in this list. + Optional: defaults to None (provide all ids) + :type ids: list or None + """ + source = bx_seq.twobit.TwoBitFile( source ) + #TODO: validate is a 2bit + super( FastaDataProvider, self ).__init__( source, **kwargs ) + # could do in order provided with twobit + self.ids = ids or self.source.keys() + + def __iter__( self ): + for id_ in self.ids: + yield { + 'id' : id_, + 'seq' : self.source[ name ] + } + + +#TODO: +class WiggleDataProvider( base.LimitedOffsetDataProvider ): + """ + Class that returns chrom, pos, data from a wiggle source. + """ + COLUMN_NAMES = [ 'chrom', 'pos', 'value' ] + + def __init__( self, source, named_columns=False, column_names=None, **kwargs ): + """ + :param named_columns: optionally return dictionaries keying each column + with 'chrom', 'start', 'end', 'strand', or 'name'. + Optional: defaults to False + :type named_columns: bool + + :param column_names: an ordered list of strings that will be used as the keys + for each column in the returned dictionaries. + The number of key, value pairs each returned dictionary has will + be as short as the number of column names provided. + :type column_names: + """ + #TODO: validate is a wig + # still good to maintain a ref to the raw source bc Reader won't + self.raw_source = source + self.parser = bx_wig.Reader( source ) + super( WiggleDataProvider, self ).__init__( self.parser, **kwargs ) + + self.named_columns = named_columns + self.column_names = column_names or self.COLUMN_NAMES + + def __iter__( self ): + parent_gen = super( WiggleDataProvider, self ).__iter__() + for three_tuple in parent_gen: + if self.named_columns: + yield dict( zip( self.column_names, three_tuple ) ) + else: + # list is not strictly necessary - but consistent + yield list( three_tuple ) + + +class BigWigDataProvider( base.LimitedOffsetDataProvider ): + """ + Class that returns chrom, pos, data from a wiggle source. + """ + COLUMN_NAMES = [ 'chrom', 'pos', 'value' ] + + def __init__( self, source, chrom, start, end, named_columns=False, column_names=None, **kwargs ): + """ + :param chrom: which chromosome within the bigbed file to extract data for + :type chrom: str + :param start: the start of the region from which to extract data + :type start: int + :param end: the end of the region from which to extract data + :type end: int + + :param named_columns: optionally return dictionaries keying each column + with 'chrom', 'start', 'end', 'strand', or 'name'. + Optional: defaults to False + :type named_columns: bool + + :param column_names: an ordered list of strings that will be used as the keys + for each column in the returned dictionaries. + The number of key, value pairs each returned dictionary has will + be as short as the number of column names provided. + :type column_names: + """ + raise NotImplementedError( 'Work in progress' ) + #TODO: validate is a wig + # still good to maintain a ref to the raw source bc Reader won't + self.raw_source = source + self.parser = bx_bbi.bigwig_file.BigWigFile( source ) + super( BigWigDataProvider, self ).__init__( self.parser, **kwargs ) + + self.named_columns = named_columns + self.column_names = column_names or self.COLUMN_NAMES + + def __iter__( self ): + parent_gen = super( BigWigDataProvider, self ).__iter__() + for three_tuple in parent_gen: + if self.named_columns: + yield dict( zip( self.column_names, three_tuple ) ) + else: + # list is not strictly necessary - but consistent + yield list( three_tuple ) + + +# ----------------------------------------------------------------------------- binary, external conversion or tool +class DatasetSubprocessDataProvider( external.SubprocessDataProvider ): + """ + Create a source from running a subprocess on a dataset's file. + + Uses a subprocess as it's source and has a dataset (gen. as an input file + for the process). + """ + #TODO: below should be a subclass of this and not RegexSubprocess + def __init__( self, dataset, *args, **kwargs ): + """ + :param args: the list of strings used to build commands. + :type args: variadic function args + """ + raise NotImplementedError( 'Abstract class' ) + super( DatasetSubprocessDataProvider, self ).__init__( *args, **kwargs ) + self.dataset = dataset + + +class SamtoolsDataProvider( line.RegexLineDataProvider ): + """ + Data provider that uses samtools on a Sam or Bam file as it's source. + + This can be piped through other providers (column, map, genome region, etc.). + + .. note:: that only the samtools 'view' command is currently implemented. + """ + FLAGS_WO_ARGS = 'bhHSu1xXcB' + FLAGS_W_ARGS = 'fFqlrs' + VALID_FLAGS = FLAGS_WO_ARGS + FLAGS_W_ARGS + + def __init__( self, dataset, options_string='', options_dict=None, regions=None, **kwargs ): + """ + :param options_string: samtools options in string form (flags separated + by spaces) + Optional: defaults to '' + :type options_string: str + :param options_dict: dictionary of samtools options + Optional: defaults to None + :type options_dict: dict or None + :param regions: list of samtools regions strings + Optional: defaults to None + :type regions: list of str or None + """ + #TODO: into validate_source + + #TODO: have to import these here due to circular ref in model/datatypes + import galaxy.datatypes.binary + import galaxy.datatypes.tabular + if( not( isinstance( dataset.datatype, galaxy.datatypes.tabular.Sam ) + or isinstance( dataset.datatype, galaxy.datatypes.binary.Bam ) ) ): + raise exceptions.InvalidDataProviderSource( + 'dataset must be a Sam or Bam datatype: %s' %( str( dataset.datatype ) ) ) + self.dataset = dataset + + options_dict = options_dict or {} + # ensure regions are strings + regions = [ str( r ) for r in regions ] if regions else [] + + #TODO: view only for now + #TODO: not properly using overriding super's validate_opts, command here + subcommand = 'view' + #TODO:?? do we need a path to samtools? + subproc_args = self.build_command_list( subcommand, options_string, options_dict, regions ) +#TODO: the composition/inheritance here doesn't make a lot sense + subproc_provider = external.SubprocessDataProvider( *subproc_args ) + super( SamtoolsDataProvider, self ).__init__( subproc_provider, **kwargs ) + + def build_command_list( self, subcommand, options_string, options_dict, regions ): + """ + Convert all init args to list form. + """ + command = [ 'samtools', subcommand ] + # add options and switches, input file, regions list (if any) + command.extend( self.to_options_list( options_string, options_dict ) ) + command.append( self.dataset.file_name ) + command.extend( regions ) + return command + + def to_options_list( self, options_string, options_dict ): + """ + Convert both options_string and options_dict to list form + while filtering out non-'valid' options. + """ + opt_list = [] + + # strip out any user supplied bash switch formating -> string of option chars + # then compress to single option string of unique, VALID flags with prefixed bash switch char '-' + options_string = options_string.strip( '- ' ) + validated_flag_list = set([ flag for flag in options_string if flag in self.FLAGS_WO_ARGS ]) + + # if sam add -S + if( ( isinstance( self.dataset.datatype, galaxy.datatypes.tabular.Sam ) + and ( 'S' not in validated_flag_list ) ) ): + validated_flag_list.append( 'S' ) + + if validated_flag_list: + opt_list.append( '-' + ''.join( validated_flag_list ) ) + + for flag, arg in options_dict.items(): + if flag in self.FLAGS_W_ARGS: + opt_list.extend([ '-' + flag, str( arg ) ]) + + return opt_list + + @classmethod + def extract_options_from_dict( cls, dictionary ): + """ + Separrates valid samtools key/value pair options from a dictionary and + returns both as a 2-tuple. + """ + # handy for extracting options from kwargs - but otherwise... + #TODO: could be abstracted to util.extract( dict, valid_keys_list ) + options_dict = {} + new_kwargs = {} + for key, value in dictionary.items(): + if key in cls.FLAGS_W_ARGS: + options_dict[ key ] = value + else: + new_kwargs[ key ] = value + return options_dict, new_kwargs + + +class BcftoolsDataProvider( line.RegexLineDataProvider ): + """ + Data provider that uses an bcftools on a bcf (or vcf?) file as it's source. + + This can be piped through other providers (column, map, genome region, etc.). + """ + def __init__( self, dataset, **kwargs ): + #TODO: as samtools + raise NotImplementedError() + super( BCFDataProvider, self ).__init__( dataset, **kwargs ) + + +class BGzipTabixDataProvider( base.DataProvider ): + """ + Data provider that uses an g(un)zip on a file as it's source. + + This can be piped through other providers (column, map, genome region, etc.). + """ + def __init__( self, dataset, **kwargs ): + #TODO: as samtools - need more info on output format + raise NotImplementedError() + super( BGzipTabixDataProvider, self ).__init__( dataset, **kwargs ) diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/decorators.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/decorators.py @@ -0,0 +1,107 @@ +""" +DataProvider related decorators. +""" + +# I'd like to decorate the factory methods that give data_providers by the name they can be accessed from. e.g.: +#@provides( 'id_seq' ) # where 'id_seq' is some 'data_format' string/alias +#def get_id_seq_provider( dataset, **settings ): + +# then in some central dispatch (maybe data.Data), have it look up the proper method by the data_format string + +# also it would be good to have this decorator maintain a list of available providers (for a datatype) + +# i don't particularly want to cut up method names ( get_([\w_]*)_provider ) +#!/usr/bin/env python + +# adapted from: http://stackoverflow.com +# /questions/14095616/python-can-i-programmatically-decorate-class-methods-from-a-class-instance + +from functools import wraps +#from types import MethodType +import copy + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- +_DATAPROVIDER_CLASS_MAP_KEY = 'dataproviders' +_DATAPROVIDER_METHOD_NAME_KEY = '_dataprovider_name' + +# ----------------------------------------------------------------------------- +def has_dataproviders( cls ): + """ + Wraps a class (generally a Datatype), finds methods within that have been + decorated with `@dataprovider` and adds them, by their name, to a map + in the class. + + This allows a class to maintain a name -> method map, effectively + 'registering' dataprovider factory methods. + + .. example:: + @has_dataproviders + class MyDtype( data.Data ): + + @dataprovider_factory( 'bler' ) + def provide_some_bler( self, dataset, **settings ): + '''blerblerbler''' + dataset_source = providers.DatasetDataProvider( dataset ) + # ... chain other, intermidiate providers here + return providers.BlerDataProvider( dataset_source, **settings ) + + # use the base method in data.Data + provider = dataset.datatype.dataprovider( dataset, 'bler', + my_setting='blah', ... ) + # OR directly from the map + provider = dataset.datatype.dataproviders[ 'bler' ]( dataset, + my_setting='blah', ... ) + """ + #log.debug( 'has_dataproviders:', cls ) + # init the class dataproviders map if necc. + if not hasattr( cls, _DATAPROVIDER_CLASS_MAP_KEY ): + setattr( cls, _DATAPROVIDER_CLASS_MAP_KEY, {} ) + else: + # need to deepcopy or subclasses will modify super.dataproviders as well + existing_dataproviders = getattr( cls, _DATAPROVIDER_CLASS_MAP_KEY ) + copied_dataproviders = copy.deepcopy( existing_dataproviders ) + setattr( cls, _DATAPROVIDER_CLASS_MAP_KEY, copied_dataproviders ) + + dataproviders = getattr( cls, _DATAPROVIDER_CLASS_MAP_KEY ) + + # scan for methods with dataprovider names and add them to the map + # note: this has a 'cascading' effect + # where it's possible to override a super's provider with a sub's + for attr_key, attr_value in cls.__dict__.iteritems(): + #log.debug( '\t key:', attr_key ) + # can't use isinstance( attr_value, MethodType ) bc of wrapping + if( ( callable( attr_value ) ) + and ( not attr_key.startswith( "__" ) ) + and ( getattr( attr_value, _DATAPROVIDER_METHOD_NAME_KEY, None ) ) ): + #log.debug( '\t\t is a dataprovider', attr_key ) + name = getattr( attr_value, _DATAPROVIDER_METHOD_NAME_KEY ) + dataproviders[ name ] = attr_value + + #log.debug( 'dataproviders:' ) + #for name, fn in cls.dataproviders.items(): + # log.debug( '\t ', name, '->', fn.__name__, fn ) + # log.debug( '\t\t ', fn.__doc__ ) + return cls + +def dataprovider_factory( name ): + """ + Wraps a class method and marks it as a dataprovider factory. + + :param name: what name/key to register the factory under in `cls.dataproviders` + :param type: any hashable var + """ + #log.debug( 'dataprovider:', name ) + def named_dataprovider_factory( func ): + #log.debug( 'named_dataprovider_factory:', name, '->', func.__name__ ) + setattr( func, _DATAPROVIDER_METHOD_NAME_KEY, name ) + #log.debug( '\t setting:', getattr( func, _DATAPROVIDER_METHOD_NAME_KEY ) ) + @wraps( func ) + def wrapped_dataprovider_factory( self, *args, **kwargs ): + #log.debug( 'wrapped_dataprovider_factory', name, self, args, kwargs ) + return func( self, *args, **kwargs ) + return wrapped_dataprovider_factory + return named_dataprovider_factory diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/exceptions.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/exceptions.py @@ -0,0 +1,33 @@ +""" +DataProvider related exceptions. +""" + +class InvalidDataProviderSource( TypeError ): + """ + Raised when a unusable source is passed to a provider. + """ + def __init__( self, source=None, msg='' ): + msg = msg or 'Invalid source for provider: %s' %( source ) + super( InvalidDataProviderSource, self ).__init__( msg ) + + +class NoProviderAvailable( TypeError ): + """ + Raised when no provider is found for the given `format_requested`. + + :param factory_source: the item that the provider was requested from + :param format_requested: the format_requested (a hashable key to access + `factory_source.datatypes` with) + + Both params are attached to this class and accessible to the try-catch + receiver. + + Meant to be used within a class that builds dataproviders (e.g. a Datatype) + """ + def __init__( self, factory_source, format_requested=None, msg='' ): + self.factory_source = factory_source + self.format_requested = format_requested + msg = msg or 'No provider available in factory_source "%s" for format requested' %( str( factory_source ) ) + if self.format_requested: + msg += ': "%s"' %( self.format_requested ) + super( NoProviderAvailable, self ).__init__( msg ) diff -r 575dedd2d0ba10f7cbfc4efa9e6aecafd79ef039 -r a5b83353f9eef6734fba8f85f90a7210a70866db lib/galaxy/datatypes/dataproviders/external.py --- /dev/null +++ b/lib/galaxy/datatypes/dataproviders/external.py @@ -0,0 +1,165 @@ +""" +Data providers that iterate over a source that is not in memory +or not in a file. +""" + +import subprocess +import urllib, urllib2 +import gzip + +import base +import line + +_TODO = """ +YAGNI: ftp, image, cryptos, sockets +job queue +admin: admin server log rgx/stats, ps aux +""" + +import logging +log = logging.getLogger( __name__ ) + + +# ----------------------------------------------------------------------------- server subprocess / external prog +class SubprocessDataProvider( base.DataProvider ): + """ + Data provider that uses the output from an intermediate program and + subprocess as it's data source. + """ + #TODO: need better ways of checking returncode, stderr for errors and raising + def __init__( self, *args, **kwargs ): + """ + :param args: the list of strings used to build commands. + :type args: variadic function args + """ + self.exit_code = None + command_list = args + self.popen = self.subprocess( *command_list, **kwargs ) + #TODO:?? not communicate()? + super( SubprocessDataProvider, self ).__init__( self.popen.stdout ) + self.exit_code = self.popen.poll() + + #NOTE: there's little protection here v. sending a ';' and a dangerous command here + # but...we're all adults here, right? ...RIGHT?! + def subprocess( self, *command_list, **kwargs ): + """ + :param args: the list of strings used as commands. + :type args: variadic function args + """ + try: + # how expensive is this? + popen = subprocess.Popen( command_list, stderr=subprocess.PIPE, stdout=subprocess.PIPE ) + log.info( 'opened subrocess (%s), PID: %s' %( str( command_list ), str( popen.pid ) ) ) + #log.debug( 'stderr:\n%s\n' %( popen.stderr.read() ) ) + + except OSError, os_err: + command_str = ' '.join( self.command ) + raise OSError( ' '.join([ str( os_err ), ':', command_str ]) ) + + return popen + + def __exit__( self, *args ): + # poll the subrocess for an exit code + self.exit_code = self.popen.poll() + log.info( '%s.__exit__, exit_code: %s' %( str( self ), str( self.exit_code ) ) ) + return super( SubprocessDataProvider, self ).__exit__( *args ) + + def __str__( self ): + # provide the pid and current return code + source_str = '' + if hasattr( self, 'popen' ): + source_str = '%s:%s' %( str( self.popen.pid ), str( self.popen.poll() ) ) + return '%s(%s)' %( self.__class__.__name__, str( source_str ) ) + + +class RegexSubprocessDataProvider( line.RegexLineDataProvider ): + """ + RegexLineDataProvider that uses a SubprocessDataProvider as it's data source. + """ + # this is a conv. class and not really all that necc... + def __init__( self, *args, **kwargs ): + # using subprocess as proxy data source in filtered line prov. + subproc_provider = SubprocessDataProvider( *args ) + super( RegexSubprocessDataProvider, self ).__init__( subproc_provider, **kwargs ) + + +# ----------------------------------------------------------------------------- other apis +class URLDataProvider( base.DataProvider ): + """ + Data provider that uses the contents of a URL for it's data source. + + This can be piped through other providers (column, map, genome region, etc.). + """ + VALID_METHODS = ( 'GET', 'POST' ) + + def __init__( self, url, method='GET', data=None, **kwargs ): + """ + :param url: the base URL to open. + :param method: the HTTP method to use. + Optional: defaults to 'GET' + :param data: any data to pass (either in query for 'GET' + or as post data with 'POST') + :type data: dict + """ + self.url = url + self.method = method + + self.data = data or {} + encoded_data = urllib.urlencode( self.data ) + + if method == 'GET': + self.url += '?%s' %( encoded_data ) + opened = urllib2.urlopen( url ) + elif method == 'POST': + opened = urllib2.urlopen( url, encoded_data ) + else: + raise ValueError( 'Not a valid method: %s' %( method ) ) + + super( URLDataProvider, self ).__init__( opened, **kwargs ) + #NOTE: the request object is now accessible as self.source + + def __enter__( self ): + pass + + def __exit__( self, *args ): + self.source.close() + + +# ----------------------------------------------------------------------------- generic compression +class GzipDataProvider( base.DataProvider ): + """ + Data provider that uses g(un)zip on a file as it's source. + + This can be piped through other providers (column, map, genome region, etc.). + """ + def __init__( self, source, **kwargs ): + unzipped = gzip.GzipFile( source, 'rb' ) + super( GzipDataProvider, self ).__init__( unzipped, **kwargs ) + #NOTE: the GzipFile is now accessible in self.source + + +# ----------------------------------------------------------------------------- intermediate tempfile +class TempfileDataProvider( base.DataProvider ): + """ + Writes the data from the given source to a temp file, allowing + it to be used as a source where a file_name is needed (e.g. as a parameter + to a command line tool: samtools view -t <this_provider.source.file_name>) + """ + def __init__( self, source, **kwargs ): + #TODO: + raise NotImplementedError() + # write the file here + self.create_file + super( TempfileDataProvider, self ).__init__( self.tmp_file, **kwargs ) + + def create_file( self ): + self.tmp_file = tempfile.NamedTemporaryFile() + return self.tmp_file + + def write_to_file( self ): + parent_gen = super( TempfileDataProvider, self ).__iter__() + #??? + with open( self.tmp_file, 'w' ) as open_file: + for datum in parent_gen: + open_file.write( datum + '\n' ) + This diff is so big that we needed to truncate the remainder. https://bitbucket.org/galaxy/galaxy-central/commits/f48f86a23282/ Changeset: f48f86a23282 Branch: search User: Kyle Ellrott Date: 2013-06-17 21:00:32 Summary: Adding the ability to search the job table Affected #: 4 files diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -157,7 +157,10 @@ return total -class Job( object ): +class Job( object, APIItem ): + api_collection_visible_keys = [ 'id' ] + api_element_visible_keys = [ 'id' ] + """ A job represents a request to run a tool given input datasets, tool parameters, and output datasets. @@ -360,6 +363,28 @@ dataset.blurb = 'deleted' dataset.peek = 'Job deleted' dataset.info = 'Job output deleted by user before job completed' + def get_api_value( self, view='collection' ): + rval = super( Job, self ).get_api_value( view=view ) + rval['tool_name'] = self.tool_id + param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] ) + for i in self.input_datasets: + if i.dataset is not None: + param_dict[i.name] = {"hda_id" : i.dataset.id} + for i in self.input_library_datasets: + if i.dataset is not None: + param_dict[i.name] = {"ldda_id" : i.dataset.id} + rval['params'] = param_dict + + output_dict = {} + for i in self.output_datasets: + if i.dataset is not None: + output_dict[i.name] = {"hda_id" : i.dataset.id} + for i in self.output_library_datasets: + if i.dataset is not None: + output_dict[i.name] = {"ldda_id" : i.dataset.id} + rval['outputs'] = output_dict + + return rval class Task( object ): """ diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -31,11 +31,10 @@ eggs.require("Parsley") import parsley -from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset -from galaxy.model import (StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, -HistoryDatasetAssociationTagAssociation, -ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation) -from galaxy.model import ToolVersion +from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, +History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, +StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation, +ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, ToolVersion) from sqlalchemy import and_ from sqlalchemy.orm import aliased @@ -383,6 +382,22 @@ def search(self, trans): self.query = trans.sa_session.query( StoredWorkflow ) + + +################## +#Job Searching +################## + +class JobView(ViewQueryBaseClass): + DOMAIN = "job" + FIELDS = { + } + + def search(self, trans): + self.query = trans.sa_session.query( Job ) + + + """ The view mapping takes a user's name for a table and maps it to a View class that will handle queries @@ -398,7 +413,8 @@ 'hda' : HistoryDatasetView, 'history' : HistoryView, 'workflow' : WorkflowView, - 'tool' : ToolView + 'tool' : ToolView, + 'job' : JobView, } """ diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/web/base/controller.py --- a/lib/galaxy/web/base/controller.py +++ b/lib/galaxy/web/base/controller.py @@ -146,7 +146,7 @@ def get_role( self, trans, id, check_ownership=False, check_accessible=False, deleted=None ): return self.get_object( trans, id, 'Role', check_ownership=False, check_accessible=False, deleted=deleted ) - def encode_all_ids( self, trans, rval ): + def encode_all_ids( self, trans, rval, recursive=False ): """ Encodes all integer values in the dict rval whose keys are 'id' or end with '_id' @@ -160,6 +160,9 @@ rval[k] = trans.security.encode_id( v ) except: pass # probably already encoded + else: + if recursive and type(v) == dict: + rval[k] = self.encode_all_ids(trans, v, recursive) return rval Root = BaseController diff -r a5b83353f9eef6734fba8f85f90a7210a70866db -r f48f86a2328245ddf48230609c1e53125db00740 lib/galaxy/webapps/galaxy/api/search.py --- a/lib/galaxy/webapps/galaxy/api/search.py +++ b/lib/galaxy/webapps/galaxy/api/search.py @@ -46,5 +46,5 @@ append = True if append: row = query.item_to_api_value(item) - out.append( self.encode_all_ids( trans, row) ) + out.append( self.encode_all_ids( trans, row, True) ) return { 'results' : out } https://bitbucket.org/galaxy/galaxy-central/commits/0f52fea874ae/ Changeset: 0f52fea874ae Branch: search User: Kyle Ellrott Date: 2013-06-17 21:56:36 Summary: Adding input/output hda selection filters to job selection view. Affected #: 2 files diff -r f48f86a2328245ddf48230609c1e53125db00740 -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -367,14 +367,20 @@ rval = super( Job, self ).get_api_value( view=view ) rval['tool_name'] = self.tool_id param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] ) + rval['params'] = param_dict + + input_dict = {} for i in self.input_datasets: if i.dataset is not None: - param_dict[i.name] = {"hda_id" : i.dataset.id} + input_dict[i.name] = {"hda_id" : i.dataset.id} for i in self.input_library_datasets: if i.dataset is not None: - param_dict[i.name] = {"ldda_id" : i.dataset.id} - rval['params'] = param_dict - + input_dict[i.name] = {"ldda_id" : i.dataset.id} + for k in input_dict: + if k in param_dict: + del param_dict[k] + rval['inputs'] = input_dict + output_dict = {} for i in self.output_datasets: if i.dataset is not None: @@ -383,7 +389,7 @@ if i.dataset is not None: output_dict[i.name] = {"ldda_id" : i.dataset.id} rval['outputs'] = output_dict - + return rval class Task( object ): diff -r f48f86a2328245ddf48230609c1e53125db00740 -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -34,8 +34,10 @@ from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation, -ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, ToolVersion) +ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, JobParameter, +JobToInputDatasetAssociation, JobToOutputDatasetAssociation, ToolVersion) +from galaxy.util.json import to_json_string from sqlalchemy import and_ from sqlalchemy.orm import aliased @@ -388,9 +390,51 @@ #Job Searching ################## + + +def job_param_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobParameter ) + param_name = re.sub(r'^param.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.value == to_json_string(right) + ) + ) + +def job_input_hda_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobToInputDatasetAssociation ) + param_name = re.sub(r'^input_hda.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.dataset_id == right + ) + ) + +def job_output_hda_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobToOutputDatasetAssociation ) + param_name = re.sub(r'^output_hda.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.dataset_id == right + ) + ) + + class JobView(ViewQueryBaseClass): DOMAIN = "job" FIELDS = { + 'param' : ViewField('param', handler=job_param_filter), + 'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True), + 'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True) } def search(self, trans): https://bitbucket.org/galaxy/galaxy-central/commits/d0e2ed998c9d/ Changeset: d0e2ed998c9d Branch: search User: Kyle Ellrott Date: 2013-06-18 00:56:53 Summary: Checking user_id for output for job info from search engine Affected #: 1 file diff -r 0f52fea874ae82b9cec08be1e0dd7ce5e44bcef3 -r d0e2ed998c9d2fdbdab22467aaa994edc0eadaba lib/galaxy/webapps/galaxy/api/search.py --- a/lib/galaxy/webapps/galaxy/api/search.py +++ b/lib/galaxy/webapps/galaxy/api/search.py @@ -40,8 +40,10 @@ if type( item ) in ( trans.app.model.LibraryFolder, trans.app.model.LibraryDatasetDatasetAssociation, trans.app.model.LibraryDataset ): if (trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), item, trans.user ) ): append = True - if not append: - if hasattr(item, 'dataset'): + elif type( item ) in trans.app.model.Job: + if item.used_id == trans.user or trans.user_is_admin(): + append = True + elif hasattr(item, 'dataset'): if trans.app.security_agent.can_access_dataset( current_user_roles, item.dataset ): append = True if append: https://bitbucket.org/galaxy/galaxy-central/commits/fd3a82d33bb6/ Changeset: fd3a82d33bb6 Branch: search User: Kyle Ellrott Date: 2013-06-18 01:17:55 Summary: Adding tool_name filter to Job view search engine Affected #: 1 file diff -r d0e2ed998c9d2fdbdab22467aaa994edc0eadaba -r fd3a82d33bb6896ba6395a5e83add5c6ac7f7fbf lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -432,6 +432,7 @@ class JobView(ViewQueryBaseClass): DOMAIN = "job" FIELDS = { + 'tool_name' : ViewField('tool_name', sqlalchemy_field=Job.tool_id), 'param' : ViewField('param', handler=job_param_filter), 'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True), 'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True) https://bitbucket.org/galaxy/galaxy-central/commits/170dd4c157b8/ Changeset: 170dd4c157b8 User: dannon Date: 2013-06-18 23:16:56 Summary: Merged in kellrott/galaxy-central/search (pull request #182) Add ability to search Job data via the search api Affected #: 4 files diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/model/__init__.py --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -157,7 +157,10 @@ return total -class Job( object ): +class Job( object, APIItem ): + api_collection_visible_keys = [ 'id' ] + api_element_visible_keys = [ 'id' ] + """ A job represents a request to run a tool given input datasets, tool parameters, and output datasets. @@ -360,6 +363,34 @@ dataset.blurb = 'deleted' dataset.peek = 'Job deleted' dataset.info = 'Job output deleted by user before job completed' + def get_api_value( self, view='collection' ): + rval = super( Job, self ).get_api_value( view=view ) + rval['tool_name'] = self.tool_id + param_dict = dict( [ ( p.name, p.value ) for p in self.parameters ] ) + rval['params'] = param_dict + + input_dict = {} + for i in self.input_datasets: + if i.dataset is not None: + input_dict[i.name] = {"hda_id" : i.dataset.id} + for i in self.input_library_datasets: + if i.dataset is not None: + input_dict[i.name] = {"ldda_id" : i.dataset.id} + for k in input_dict: + if k in param_dict: + del param_dict[k] + rval['inputs'] = input_dict + + output_dict = {} + for i in self.output_datasets: + if i.dataset is not None: + output_dict[i.name] = {"hda_id" : i.dataset.id} + for i in self.output_library_datasets: + if i.dataset is not None: + output_dict[i.name] = {"ldda_id" : i.dataset.id} + rval['outputs'] = output_dict + + return rval class Task( object ): """ diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/model/search.py --- a/lib/galaxy/model/search.py +++ b/lib/galaxy/model/search.py @@ -31,10 +31,13 @@ eggs.require("Parsley") import parsley -from galaxy.model import HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, History, Library, LibraryFolder, LibraryDataset -from galaxy.model import StoredWorkflowTagAssociation, StoredWorkflow, HistoryTagAssociation, ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation -from galaxy.model import ToolVersion +from galaxy.model import (HistoryDatasetAssociation, LibraryDatasetDatasetAssociation, +History, Library, LibraryFolder, LibraryDataset,StoredWorkflowTagAssociation, +StoredWorkflow, HistoryTagAssociation,HistoryDatasetAssociationTagAssociation, +ExtendedMetadata, ExtendedMetadataIndex, HistoryAnnotationAssociation, Job, JobParameter, +JobToInputDatasetAssociation, JobToOutputDatasetAssociation, ToolVersion) +from galaxy.util.json import to_json_string from sqlalchemy import and_ from sqlalchemy.orm import aliased @@ -269,12 +272,30 @@ #History Dataset Searching ################## +def history_dataset_handle_tag(view, left, operator, right): + if operator == "=": + view.do_query = True + #aliasing the tag association table, so multiple links to different tags can be formed during a single query + tag_table = aliased(HistoryDatasetAssociationTagAssociation) + + view.query = view.query.filter( + HistoryDatasetAssociation.id == tag_table.history_dataset_association_id + ) + tmp = right.split(":") + view.query = view.query.filter( tag_table.user_tname == tmp[0] ) + if len(tmp) > 1: + view.query = view.query.filter( tag_table.user_value == tmp[1] ) + else: + raise GalaxyParseError("Invalid comparison operator: %s" % (operator)) + class HistoryDatasetView(ViewQueryBaseClass): DOMAIN = "history_dataset" FIELDS = { 'name' : ViewField('name', sqlalchemy_field=HistoryDatasetAssociation.name), - 'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True) + 'id' : ViewField('id',sqlalchemy_field=HistoryDatasetAssociation.id, id_decode=True), + 'tag' : ViewField("tag", handler=history_dataset_handle_tag) + } def search(self, trans): @@ -289,13 +310,14 @@ def history_handle_tag(view, left, operator, right): if operator == "=": view.do_query = True + tag_table = aliased(HistoryTagAssociation) view.query = view.query.filter( - History.id == HistoryTagAssociation.history_id + History.id == tag_table.history_id ) tmp = right.split(":") - view.query = view.query.filter( HistoryTagAssociation.user_tname == tmp[0] ) + view.query = view.query.filter( tag_table.user_tname == tmp[0] ) if len(tmp) > 1: - view.query = view.query.filter( HistoryTagAssociation.user_value == tmp[1] ) + view.query = view.query.filter( tag_table.user_value == tmp[1] ) else: raise GalaxyParseError("Invalid comparison operator: %s" % (operator)) @@ -362,6 +384,65 @@ def search(self, trans): self.query = trans.sa_session.query( StoredWorkflow ) + + +################## +#Job Searching +################## + + + +def job_param_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobParameter ) + param_name = re.sub(r'^param.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.value == to_json_string(right) + ) + ) + +def job_input_hda_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobToInputDatasetAssociation ) + param_name = re.sub(r'^input_hda.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.dataset_id == right + ) + ) + +def job_output_hda_filter(view, left, operator, right): + view.do_query = True + alias = aliased( JobToOutputDatasetAssociation ) + param_name = re.sub(r'^output_hda.', '', left) + view.query = view.query.filter( + and_( + Job.id == alias.job_id, + alias.name == param_name, + alias.dataset_id == right + ) + ) + + +class JobView(ViewQueryBaseClass): + DOMAIN = "job" + FIELDS = { + 'tool_name' : ViewField('tool_name', sqlalchemy_field=Job.tool_id), + 'param' : ViewField('param', handler=job_param_filter), + 'input_hda' : ViewField('input_hda', handler=job_input_hda_filter, id_decode=True), + 'output_hda' : ViewField('output_hda', handler=job_output_hda_filter, id_decode=True) + } + + def search(self, trans): + self.query = trans.sa_session.query( Job ) + + + """ The view mapping takes a user's name for a table and maps it to a View class that will handle queries @@ -377,7 +458,8 @@ 'hda' : HistoryDatasetView, 'history' : HistoryView, 'workflow' : WorkflowView, - 'tool' : ToolView + 'tool' : ToolView, + 'job' : JobView, } """ diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/web/base/controller.py --- a/lib/galaxy/web/base/controller.py +++ b/lib/galaxy/web/base/controller.py @@ -146,7 +146,7 @@ def get_role( self, trans, id, check_ownership=False, check_accessible=False, deleted=None ): return self.get_object( trans, id, 'Role', check_ownership=False, check_accessible=False, deleted=deleted ) - def encode_all_ids( self, trans, rval ): + def encode_all_ids( self, trans, rval, recursive=False ): """ Encodes all integer values in the dict rval whose keys are 'id' or end with '_id' @@ -160,6 +160,9 @@ rval[k] = trans.security.encode_id( v ) except: pass # probably already encoded + else: + if recursive and type(v) == dict: + rval[k] = self.encode_all_ids(trans, v, recursive) return rval Root = BaseController diff -r 9702f88d6cd812032b0af856307b2e04f8608ff4 -r 170dd4c157b8b5e010804ba4a1ef3b5da08fa49d lib/galaxy/webapps/galaxy/api/search.py --- a/lib/galaxy/webapps/galaxy/api/search.py +++ b/lib/galaxy/webapps/galaxy/api/search.py @@ -40,11 +40,13 @@ if type( item ) in ( trans.app.model.LibraryFolder, trans.app.model.LibraryDatasetDatasetAssociation, trans.app.model.LibraryDataset ): if (trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), item, trans.user ) ): append = True - if not append: - if hasattr(item, 'dataset'): + elif type( item ) in trans.app.model.Job: + if item.used_id == trans.user or trans.user_is_admin(): + append = True + elif hasattr(item, 'dataset'): if trans.app.security_agent.can_access_dataset( current_user_roles, item.dataset ): append = True if append: row = query.item_to_api_value(item) - out.append( self.encode_all_ids( trans, row) ) + out.append( self.encode_all_ids( trans, row, True) ) return { 'results' : out } Repository URL: https://bitbucket.org/galaxy/galaxy-central/ -- This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.
participants (1)
-
commits-noreply@bitbucket.org