Hello List, I have a small problem with my own local Galaxy instance. So I try to set up some workflows for NGS. Everything is working fine for now, but the process for setting the Metadata on a file takes a lot of time. Currently I created a workflow --> fastq file from Library --> fastx_groomer( convert from Illumina to sanger format)-->mapping with bwa So the grooming and mapping runs fine, but after mapping the set_metadata.py takes longer than the mapping of the 2gb fastq-file. The testing server is a Dell R710 with 2x6-core cpus and 72GB of memory Below there is my Config for Galaxy. Anybody an Idea whats going wrong with my setup? many thanks Matthias # # Galaxy is configured by default to be useable in a single-user development # environment. To tune the application for a multi-user production # environment, see the documentation at: # # http://bitbucket.org/galaxy/galaxy-central/wiki/Config/ProductionServer # # Throughout this sample configuration file, except where stated otherwise, # uncommented values override the default if left unset, whereas commented # values are set to the default value. # examples of many of these options are explained in more detail in the wiki: # # Config hackers are encouraged to check there before asking for help. # ---- HTTP Server ---------------------------------------------------------- # Configuration of the internal HTTP server. [server:main] # The internal HTTP server to use. Currently only Paste is provided. This # option is required. use = egg:Paste#http # The port on which to listen. port = 8081 # The address on which to listen. By default, only listen to localhost (Galaxy # will not be accessible over the network). Use '0.0.0.0' to listen on all # available network interfaces. host = 0.0.0.0 # Use a threadpool for the web server instead of creating a thread for each # request. use_threadpool = True # Number of threads in the web server thread pool. threadpool_workers = 8 # ---- Filters -------------------------------------------------------------- # Filters sit between Galaxy and the HTTP server. # These filters are disabled by default. They can be enabled with # 'filter-with' in the [app:main] section below. # Define the gzip filter. [filter:gzip] use = egg:Paste#gzip # Define the proxy-prefix filter. [filter:proxy-prefix] use = egg:PasteDeploy#prefix prefix = /galaxy # ---- Galaxy --------------------------------------------------------------- # Configuration of the Galaxy application. [app:main] # -- Application and filtering # The factory for the WSGI application. This should not be changed. paste.app_factory = galaxy.web.buildapp:app_factory # If not running behind a proxy server, you may want to enable gzip compression # to decrease the size of data transferred over the network. If using a proxy # server, please enable gzip compression there instead. #filter-with = gzip # If running behind a proxy server and Galaxy is served from a subdirectory, # enable the proxy-prefix filter and set the prefix in the # [filter:proxy-prefix] section above. #filter-with = proxy-prefix # If proxy-prefix is enabled and you're running more than one Galaxy instance # behind one hostname, you will want to set this to the same path as the prefix # in the filter above. This value becomes the "path" attribute set in the # cookie so the cookies from each instance will not clobber each other. #cookie_path = None # -- Database # By default, Galaxy uses a SQLite database at 'database/universe.sqlite'. You # may use a SQLAlchemy connection string to specify an external database # instead. This string takes many options which are explained in detail in the # config file documentation. database_connection = mysql://xxx:xxx@localhost/galaxy?unix_socket=/data/mysql/mysql.sock # If the server logs errors about not having enough database pool connections, # you will want to increase these values, or consider running more Galaxy # processes. #database_engine_option_pool_size = 5 #database_engine_option_max_overflow = 10 # If using MySQL and the server logs the error "MySQL server has gone away", # you will want to set this to some positive value (7200 should work). database_engine_option_pool_recycle = 7200 # If large database query results are causing memory or response time issues in # the Galaxy process, leave the result on the server instead. This option is # only available for PostgreSQL and is highly recommended. #database_engine_option_server_side_cursors = False # Create only one connection to the database per thread, to reduce the # connection overhead. Recommended when not using SQLite: #database_engine_option_strategy = threadlocal # -- Files and directories # Dataset files are stored in this directory. file_path = /galaxytemp/data # Temporary files are stored in this directory. new_file_path = /galaxytemp/tmp # Tool config file, defines what tools are available in Galaxy. #tool_config_file = tool_conf.xml # Path to the directory containing the tools defined in the config. #tool_path = tools # Directory where data used by tools is located, see the samples in that # directory and the wiki for help: # http://bitbucket.org/galaxy/galaxy-central/wiki/DataIntegration #tool_data_path = tool-data # Datatypes config file, defines what data (file) types are available in # Galaxy. #datatypes_config_file = datatypes_conf.xml # -- Mail and notification # Galaxy sends mail for various things: Subscribing users to the mailing list # if they request it, emailing password resets, notification from the Galaxy # Sample Tracking system, and reporting dataset errors. To do this, it needs # to send mail through an SMTP server, which you may define here. #smtp_server = None # On the user registration form, users may choose to join the mailing list. # This is the address of the list they'll be subscribed to. #mailing_join_addr = galaxy-user-join@bx.psu.edu # Datasets in an error state include a link to report the error. Those reports # will be sent to this address. Error reports are disabled if no address is set. #error_email_to = None # -- Display sites # Galaxy can display data at various external browsers. These options specify # which browsers should be available. URLs and builds available at these # browsers are defined in the specifield files. # UCSC browsers: tool-data/shared/ucsc/ucsc_build_sites.txt #ucsc_display_sites = main,test,archaea,ucla # GBrowse servers: tool-data/shared/gbrowse/gbrowse_build_sites.txt #gbrowse_display_sites = wormbase,tair,modencode_worm,modencode_fly # GeneTrack servers: tool-data/shared/genetrack/genetrack_sites.txt #genetrack_display_sites = main,test # -- UI Localization # Append "/{brand}" to the "Galaxy" text in the masthead. #brand = None # The URL linked by the "Galaxy/brand" text. #logo_url = / # The URL linked by the "Galaxy Wiki" link in the "Help" menu. #wiki_url = http://bitbucket.org/galaxy/galaxy-central/wiki # The URL linked by the "Email comments..." link in the "Help" menu. #bugs_email = None # The URL linked by the "How to Cite..." link in the "Help" menu. #citation_url = http://bitbucket.org/galaxy/galaxy-central/wiki/Citations # Serve static content, which must be enabled if you're not serving it via a # proxy server. These options should be self explanatory and so are not # documented individually. You can use these paths (or ones in the proxy # server) to point to your own styles. static_enabled = True static_cache_time = 360 static_dir = %(here)s/static/ static_images_dir = %(here)s/static/images static_favicon_dir = %(here)s/static/favicon.ico static_scripts_dir = %(here)s/static/scripts/ static_style_dir = %(here)s/static/june_2007_style/blue # -- Logging and Debugging # Verbosity of console log messages. Acceptable values can be found here: # http://docs.python.org/library/logging.html#logging-levels #log_level = DEBUG # Print database operations to the server log (warning, quite verbose!). #database_engine_option_echo = False # Print database pool operations to the server log (warning, quite verbose!). #database_engine_option_echo_pool = False # Turn on logging of application events and some user events to the database. #log_events = True # Turn on logging of user actions to the database. Actions currently logged are # grid views, tool searches, and use of "recently" used tools menu. The # log_events and log_actions functionality will eventually be merged. #log_actions = True # Debug enables access to various config options useful for development and # debugging: use_lint, use_profile, use_printdebug and use_interactive. It # also causes the files used by PBS/SGE (submission script, output, and error) # to remain on disk after the job is complete. Debug mode is disabled if # commented, but is uncommented by default in the sample config. debug = True # Check for WSGI compliance. #use_lint = False # Run the Python profiler on each request. #use_profile = False # Intercept print statements and show them on the returned page. #use_printdebug = True # Enable live debugging in your browser. This should NEVER be enabled on a # public site. Enabled in the sample config for development. use_interactive = True # Write thread status periodically to 'heartbeat.log', (careful, uses disk # space rapidly!). Useful to determine why your processes may be consuming a # lot of CPU. #use_heartbeat = False # Enable the memory debugging interface (careful, negatively impacts server # performance). #use_memdump = False # -- Data Libraries # These library upload options are described in much more detail in the wiki: # http://bitbucket.org/galaxy/galaxy-central/wiki/DataLibraries/UploadingFiles # Add an option to the library upload form which allows administrators to # upload a directory of files. library_import_dir = /data/ # Add an option to the library upload form which allows authorized # non-administrators to upload a directory of files. The configured directory # must contain sub-directories named the same as the non-admin user's Galaxy # login ( email ). The non-admin user is restricted to uploading files or # sub-directories of files contained in their directory. #user_library_import_dir = None # Add an option to the admin library upload tool allowing admins to paste # filesystem paths to files and directories in a box, and these paths will be # added to a library. Set to True to enable. Please note the security # implication that this will give Galaxy Admins access to anything your Galaxy # user has access to. allow_library_path_paste = True # -- Users and Security # Galaxy encodes various internal values when these values will be output in # some format (for example, in a URL or cookie). You should set a key to be # used by the algorithm that encodes and decodes these values. It can be any # string. If left unchanged, anyone could construct a cookie that would grant # them access to others' sessions. #id_secret = USING THE DEFAULT IS NOT SECURE! # User authentication can be delegated to an upstream proxy server (usually # Apache). The upstream proxy should set a REMOTE_USER header in the request. # Enabling remote user disables regular logins. For more information, see: # http://bitbucket.org/galaxy/galaxy-central/wiki/Config/ApacheProxy #use_remote_user = False # If use_remote_user is enabled and your external authentication # method just returns bare usernames, set a default mail domain to be appended # to usernames, to become your Galaxy usernames (email addresses). #remote_user_maildomain = None # If use_remote_user is enabled, you can set this to a URL that will log your # users out. #remote_user_logout_href = None # Administrative users - set this to a comma-separated list of valid Galaxy # users (email addresses). These users will have access to the Admin section # of the server, and will have access to create users, groups, roles, # libraries, and more. For more information, see: # http://bitbucket.org/galaxy/galaxy-central/wiki/Admin/AdminInterface admin_users = xxxx@biotec.tu-dresden.de,xxxx@biotec.tu-dresden.de # Force everyone to log in (disable anonymous access). #require_login = False # Allow unregistered users to create new accounts (otherwise, they will have to # be created by an admin). #allow_user_creation = True # Allow administrators to delete accounts. #allow_user_deletion = False # By default, users' data will be public, but setting this to True will cause # it to be private. Does not affect existing users and data, only ones created # after this option is set. Users may still change their default back to # public. new_user_dataset_access_role_default_private = True # -- Beta features # Enable Galaxy's built-in visualization module, Trackster. #enable_tracks = False # Enable Galaxy Pages. Pages are custom webpages that include embedded Galaxy items, # such as datasets, histories, workflows, and visualizations; pages are useful for # documenting and sharing multiple analyses or workflows. Pages are created using a # WYSIWYG editor that is very similar to a word processor. #enable_pages = False # Enable the (experimental! beta!) Web API. Documentation forthcoming. #enable_api = False # -- Job Execution # If running multiple Galaxy processes, one can be designated as the job # runner. For more information, see: # http://bitbucket.org/galaxy/galaxy-central/wiki/Config/WebApplicationScaling enable_job_running = True # Should jobs be tracked through the database, rather than in memory. # Necessary if you're running the load balanced setup. track_jobs_in_database = True # Enable job recovery (if Galaxy is restarted while cluster jobs are running, # it can "recover" them when it starts). This is not safe to use if you are # running more than one Galaxy server using the same database. enable_job_recovery = True # Setting metadata on job outputs to in a separate process (or if using a # cluster, on the cluster). Thanks to Python's Global Interpreter Lock and the # hefty expense that setting metadata incurs, your Galaxy process may become # unresponsive when this operation occurs internally. set_metadata_externally = True # Although it is fairly reliable, setting metadata can occasionally fail. In # these instances, you can choose to retry setting it internally or leave it in # a failed state (since retrying internally may cause the Galaxy process to be # unresponsive). If this option is set to False, the user will be given the # option to retry externally, or set metadata manually (when possible). #retry_metadata_internally = True # Number of concurrent jobs to run (local job runner) local_job_queue_workers = 7 # Jobs can be killed after a certain amount of execution time. Format is in # hh:mm:ss. Currently only implemented for PBS. #job_walltime = None # Clustering Galaxy is not a straightforward process and requires some # pre-configuration. See the the wiki before attempting to set any of these # options: # http://bitbucket.org/galaxy/galaxy-central/wiki/Config/Cluster # Comma-separated list of job runners to start. local is always started. If # left commented, no jobs will be run on the cluster, even if a cluster URL is # explicitly defined in the [galaxy:tool_runners] section below. The runners # currently available are 'pbs' and 'drmaa'. #start_job_runners = None # The URL for the default runner to use when a tool doesn't explicity define a # runner below. #default_cluster_job_runner = local:/// # The cluster runners have their own thread pools used to prepare and finish # jobs (so that these sometimes lengthy operations do not block normal queue # operation). The value here is the number of worker threads available to each # started runner. #cluster_job_queue_workers = 3 # These options are only used when using file staging with PBS. #pbs_application_server = #pbs_stage_path = #pbs_dataset_server = # ---- Tool Job Runners ----------------------------------------------------- # Individual per-tool job runner overrides. If not listed here, a tool will # run with the runner defined with default_cluster_job_runner. [galaxy:tool_runners] biomart = local:/// encode_db1 = local:/// hbvar = local:/// microbial_import1 = local:/// ucsc_table_direct1 = local:/// ucsc_table_direct_archaea1 = local:/// ucsc_table_direct_test1 = local:/// upload1 = local:/// # ---- Galaxy Message Queue ------------------------------------------------- # Galaxy uses AMQ protocol to receive messages from external sources like # bar code scanners. Galaxy has been tested against RabbitMQ AMQP implementation. # For Galaxy to receive messages from a message queue the RabbitMQ server has # to be set up with a user account and other parameters listed below. The 'host' # and 'port' fields should point to where the RabbitMQ server is running. [galaxy_amqp] #host = 127.0.0.1 #port = 5672 #userid = galaxy #password = galaxy #virtual_host = galaxy_messaging_engine #queue = galaxy_queue #exchange = galaxy_exchange #routing_key = bar_code_scanner -- Matthias Gierth System Administrator Applied Bioinformatics Group (ABG) TU Dresden, Biotec Tatzberg 47-51 01307 Dresden Phone: +49 (0)351 463 40020 Fax: +49 (0)351 463 40087 Email: matthias.gierth@biotec.tu-dresden.de