1 new commit in galaxy-central:
https://bitbucket.org/galaxy/galaxy-central/commits/be3b0358acc9/ Changeset: be3b0358acc9 User: dannon Date: 2013-08-30 05:39:52 Summary: Strip trailing whitespace (and windows line endings) from all python files in lib Affected #: 256 files
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/app.py --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -186,6 +186,6 @@ def configure_fluent_log( self ): if self.config.fluent_log: from galaxy.util.log.fluent_log import FluentTraceLogger - self.trace_logger = FluentTraceLogger( 'galaxy', self.config.fluent_host, self.config.fluent_port ) + self.trace_logger = FluentTraceLogger( 'galaxy', self.config.fluent_host, self.config.fluent_port ) else: self.trace_logger = None
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/config.py --- a/lib/galaxy/config.py +++ b/lib/galaxy/config.py @@ -282,7 +282,7 @@ self.biostar_url = kwargs.get( 'biostar_url', None ) self.biostar_key_name = kwargs.get( 'biostar_key_name', None ) self.biostar_key = kwargs.get( 'biostar_key', None ) - # Experimental: This will not be enabled by default and will hide + # Experimental: This will not be enabled by default and will hide # nonproduction code. # The api_folders refers to whether the API exposes the /folders section. self.api_folders = string_as_bool( kwargs.get( 'api_folders', False ) ) @@ -302,7 +302,7 @@ @property def sentry_dsn_public( self ): """ - Sentry URL with private key removed for use in client side scripts, + Sentry URL with private key removed for use in client side scripts, sentry server will need to be configured to accept events """ if self.sentry_dsn: @@ -436,8 +436,8 @@ """ # Get root logger root = logging.getLogger() - # PasteScript will have already configured the logger if the - # 'loggers' section was found in the config file, otherwise we do + # PasteScript will have already configured the logger if the + # 'loggers' section was found in the config file, otherwise we do # some simple setup using the 'log_*' values from the config. if not config.global_conf_parser.has_section( "loggers" ): format = config.get( "log_format", "%(name)s %(levelname)s %(asctime)s %(message)s" )
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/assembly.py --- a/lib/galaxy/datatypes/assembly.py +++ b/lib/galaxy/datatypes/assembly.py @@ -168,7 +168,7 @@
def regenerate_primary_file(self,dataset): """ - cannot do this until we are setting metadata + cannot do this until we are setting metadata """ log.debug( "Velvet log info %s" % 'JJ regenerate_primary_file') gen_msg = ''
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/binary.py --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -203,7 +203,7 @@ stderr = open( stderr_name ).read().strip() if stderr: if exit_code != 0: - shutil.rmtree( tmp_dir) #clean up + shutil.rmtree( tmp_dir) #clean up raise Exception, "Error Grooming BAM file contents: %s" % stderr else: print stderr @@ -231,7 +231,7 @@ stderr = open( stderr_name ).read().strip() if stderr: if exit_code != 0: - os.unlink( stderr_name ) #clean up + os.unlink( stderr_name ) #clean up raise Exception, "Error Setting BAM Metadata: %s" % stderr else: print stderr @@ -240,7 +240,7 @@ os.unlink( stderr_name ) def sniff( self, filename ): # BAM is compressed in the BGZF format, and must not be uncompressed in Galaxy. - # The first 4 bytes of any bam file is 'BAM\1', and the file is binary. + # The first 4 bytes of any bam file is 'BAM\1', and the file is binary. try: header = gzip.open( filename ).read(4) if binascii.b2a_hex( header ) == binascii.hexlify( 'BAM\1' ): @@ -250,7 +250,7 @@ return False def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "Binary bam alignments file" + dataset.peek = "Binary bam alignments file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' @@ -278,7 +278,7 @@ samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) settings[ 'comment_char' ] = '@' return dataproviders.line.RegexLineDataProvider( samtools_source, **settings ) - + @dataproviders.decorators.dataprovider_factory( 'column', dataproviders.column.ColumnarDataProvider.settings ) def column_dataprovider( self, dataset, **settings ): samtools_source = dataproviders.dataset.SamtoolsDataProvider( dataset ) @@ -352,7 +352,7 @@
def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "Binary h5 file" + dataset.peek = "Binary h5 file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' @@ -372,7 +372,7 @@
def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "Binary scf sequence file" + dataset.peek = "Binary scf sequence file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' @@ -404,7 +404,7 @@ return False def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: - dataset.peek = "Binary sff file" + dataset.peek = "Binary sff file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' @@ -451,7 +451,7 @@ return dataset.peek except: return "Binary UCSC %s file (%s)" % ( self._name, data.nice_size( dataset.get_size() ) ) - + Binary.register_sniffable_binary_format("bigwig", "bigwig", BigWig)
@@ -470,9 +470,9 @@
class TwoBit (Binary): """Class describing a TwoBit format nucleotide file""" - + file_ext = "twobit" - + def sniff(self, filename): try: # All twobit files start with a 16-byte header. If the file is smaller than 16 bytes, it's obviously not a valid twobit file.
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py --- a/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py +++ b/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py @@ -23,13 +23,13 @@ if not line: raise StopIteration() if line.isspace(): - continue + continue if line[0] == "#": continue if line[0].isalpha(): if line.startswith( "track" ) or line.startswith( "browser" ): continue - + feature = line.strip().split() chrom = feature[0] chrom_start = int(feature[1]) @@ -37,19 +37,19 @@ score = float(feature[3]) return chrom, chrom_start, chrom_end, None, score def main(): - + input_fname = sys.argv[1] out_fname = sys.argv[2] - + reader = BedGraphReader( open( input_fname ) ) - + # Fill array from reader d = array_tree_dict_from_reader( reader, {}, block_size = BLOCK_SIZE ) - + for array_tree in d.itervalues(): array_tree.root.build_summary() - + FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
-if __name__ == "__main__": +if __name__ == "__main__": main() \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/bgzip.py --- a/lib/galaxy/datatypes/converters/bgzip.py +++ b/lib/galaxy/datatypes/converters/bgzip.py @@ -19,13 +19,13 @@ parser.add_option( '-P', '--preset', dest='preset' ) (options, args) = parser.parse_args() input_fname, output_fname = args - + tmpfile = tempfile.NamedTemporaryFile() sort_params = None - + if options.chrom_col and options.start_col and options.end_col: - sort_params = ["sort", - "-k%(i)s,%(i)s" % { 'i': options.chrom_col }, + sort_params = ["sort", + "-k%(i)s,%(i)s" % { 'i': options.chrom_col }, "-k%(i)i,%(i)in" % { 'i': options.start_col }, "-k%(i)i,%(i)in" % { 'i': options.end_col } ] @@ -40,9 +40,8 @@ after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile ) grepped.stdout.close() output, err = after_sort.communicate() - + ctabix.tabix_compress(tmpfile.name, output_fname, force=True) - -if __name__ == "__main__": + +if __name__ == "__main__": main() - \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/fasta_to_len.py --- a/lib/galaxy/datatypes/converters/fasta_to_len.py +++ b/lib/galaxy/datatypes/converters/fasta_to_len.py @@ -10,7 +10,7 @@ assert sys.version_info[:2] >= ( 2, 4 )
def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ): - + infile = fasta_file out = open( out_file, 'w') keep_first_char = int( keep_first_char )
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/fastq_to_fqtoc.py --- a/lib/galaxy/datatypes/converters/fastq_to_fqtoc.py +++ b/lib/galaxy/datatypes/converters/fastq_to_fqtoc.py @@ -38,11 +38,11 @@ chunk_end = in_file.tell() out_file.write('{"start":"%s","end":"%s","sequences":"%s"},' % (chunk_begin, chunk_end, sequences)) chunk_begin = chunk_end - + chunk_end = in_file.tell() out_file.write('{"start":"%s","end":"%s","sequences":"%s"}' % (chunk_begin, chunk_end, (current_line % lines_per_chunk) / 4)) out_file.write(']}\n') -
-if __name__ == "__main__": + +if __name__ == "__main__": main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/fastqsolexa_to_fasta_converter.py --- a/lib/galaxy/datatypes/converters/fastqsolexa_to_fasta_converter.py +++ b/lib/galaxy/datatypes/converters/fastqsolexa_to_fasta_converter.py @@ -7,7 +7,7 @@ 1st line: @title_of_seq 2nd line: nucleotides 3rd line: +title_of_qualityscore (might be skipped) -4th line: quality scores +4th line: quality scores (in three forms: a. digits, b. ASCII codes, the first char as the coding base, c. ASCII codes without the first char.)
Usage: @@ -52,4 +52,4 @@
outfile.close()
-if __name__ == "__main__": __main__() \ No newline at end of file +if __name__ == "__main__": __main__() \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/fastqsolexa_to_qual_converter.py --- a/lib/galaxy/datatypes/converters/fastqsolexa_to_qual_converter.py +++ b/lib/galaxy/datatypes/converters/fastqsolexa_to_qual_converter.py @@ -7,7 +7,7 @@ 1st line: @title_of_seq 2nd line: nucleotides 3rd line: +title_of_qualityscore (might be skipped) -4th line: quality scores +4th line: quality scores (in three forms: a. digits, b. ASCII codes, the first char as the coding base, c. ASCII codes without the first char.)
Usage: @@ -30,7 +30,7 @@ seq_title_startswith = '' default_coding_value = 64 fastq_block_lines = 0 - + for i, line in enumerate( file( infile_name ) ): line = line.rstrip() if not line or line.startswith( '#' ): @@ -52,7 +52,7 @@ if not qual_title_startswith: qual_title_startswith = line_startswith if line_startswith != qual_title_startswith: - stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) ) + stop_err( 'Invalid fastqsolexa format at line %d: %s.' % ( i + 1, line ) ) quality_title = line[1:] if quality_title and read_title != quality_title: stop_err( 'Invalid fastqsolexa format at line %d: sequence title "%s" differes from score title "%s".' % ( i + 1, read_title, quality_title ) ) @@ -67,15 +67,15 @@ # peek: ascii or digits? val = line.split()[0]
- try: + try: check = int( val ) fastq_integer = True except: fastq_integer = False - + if fastq_integer: # digits qual = line - else: + else: # ascii quality_score_length = len( line ) if quality_score_length == read_length + 1: @@ -89,8 +89,7 @@ score = ord( char ) - quality_score_startswith # 64 qual = "%s%s " % ( qual, str( score ) ) outfile_score.write( '%s\n' % qual ) - + outfile_score.close()
-if __name__ == "__main__": __main__() - \ No newline at end of file +if __name__ == "__main__": __main__()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/gff_to_interval_index_converter.py --- a/lib/galaxy/datatypes/converters/gff_to_interval_index_converter.py +++ b/lib/galaxy/datatypes/converters/gff_to_interval_index_converter.py @@ -18,23 +18,22 @@ def main(): # Arguments input_fname, out_fname = sys.argv[1:] - + # Do conversion. index = Indexes() offset = 0 reader_wrapper = GFFReaderWrapper( fileinput.FileInput( input_fname ), fix_strand=True ) - for feature in list( reader_wrapper ): + for feature in list( reader_wrapper ): # Add feature; index expects BED coordinates. if isinstance( feature, GenomicInterval ): convert_gff_coords_to_bed( feature ) index.add( feature.chrom, feature.start, feature.end, offset ) - + # Always increment offset, even if feature is not an interval and hence # not included in the index. offset += feature.raw_size
index.write( open(out_fname, "w") ) - -if __name__ == "__main__": + +if __name__ == "__main__": main() - \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/interval_to_bed_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_bed_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_bed_converter.py @@ -1,62 +1,62 @@ -#!/usr/bin/env python -#Dan Blankenberg - -import sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -import bx.intervals.io - -assert sys.version_info[:2] >= ( 2, 4 ) - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def __main__(): - output_name = sys.argv[1] - input_name = sys.argv[2] - try: - chromCol = int( sys.argv[3] ) - 1 - except: - stop_err( "'%s' is an invalid chrom column, correct the column settings before attempting to convert the data format." % str( sys.argv[3] ) ) - try: - startCol = int( sys.argv[4] ) - 1 - except: - stop_err( "'%s' is an invalid start column, correct the column settings before attempting to convert the data format." % str( sys.argv[4] ) ) - try: - endCol = int( sys.argv[5] ) - 1 - except: - stop_err( "'%s' is an invalid end column, correct the column settings before attempting to convert the data format." % str( sys.argv[5] ) ) - try: - strandCol = int( sys.argv[6] ) - 1 - except: - strandCol = -1 - try: - nameCol = int( sys.argv[7] ) - 1 - except: - nameCol = -1 - skipped_lines = 0 - first_skipped_line = 0 - out = open( output_name,'w' ) - count = 0 - for count, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( input_name, 'r' ), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False ) ): - try: - if nameCol >= 0: - name = region.fields[nameCol] - else: - raise IndexError - except: - name = "region_%i" % count - try: - - out.write( "%s\t%i\t%i\t%s\t%i\t%s\n" % ( region.chrom, region.start, region.end, name, 0, region.strand ) ) - except: - skipped_lines += 1 - if not first_skipped_line: - first_skipped_line = count + 1 - out.close() - print "%i regions converted to BED." % ( count + 1 - skipped_lines ) - if skipped_lines > 0: - print "Skipped %d blank or invalid lines starting with line # %d." % ( skipped_lines, first_skipped_line ) - -if __name__ == "__main__": __main__() +#!/usr/bin/env python +#Dan Blankenberg + +import sys +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +import bx.intervals.io + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + output_name = sys.argv[1] + input_name = sys.argv[2] + try: + chromCol = int( sys.argv[3] ) - 1 + except: + stop_err( "'%s' is an invalid chrom column, correct the column settings before attempting to convert the data format." % str( sys.argv[3] ) ) + try: + startCol = int( sys.argv[4] ) - 1 + except: + stop_err( "'%s' is an invalid start column, correct the column settings before attempting to convert the data format." % str( sys.argv[4] ) ) + try: + endCol = int( sys.argv[5] ) - 1 + except: + stop_err( "'%s' is an invalid end column, correct the column settings before attempting to convert the data format." % str( sys.argv[5] ) ) + try: + strandCol = int( sys.argv[6] ) - 1 + except: + strandCol = -1 + try: + nameCol = int( sys.argv[7] ) - 1 + except: + nameCol = -1 + skipped_lines = 0 + first_skipped_line = 0 + out = open( output_name,'w' ) + count = 0 + for count, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( input_name, 'r' ), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False ) ): + try: + if nameCol >= 0: + name = region.fields[nameCol] + else: + raise IndexError + except: + name = "region_%i" % count + try: + + out.write( "%s\t%i\t%i\t%s\t%i\t%s\n" % ( region.chrom, region.start, region.end, name, 0, region.strand ) ) + except: + skipped_lines += 1 + if not first_skipped_line: + first_skipped_line = count + 1 + out.close() + print "%i regions converted to BED." % ( count + 1 - skipped_lines ) + if skipped_lines > 0: + print "Skipped %d blank or invalid lines starting with line # %d." % ( skipped_lines, first_skipped_line ) + +if __name__ == "__main__": __main__()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/interval_to_bedstrict_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_bedstrict_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_bedstrict_converter.py @@ -64,7 +64,7 @@ force_num_columns = int( sys.argv[9] ) except: force_num_columns = None - + skipped_lines = 0 first_skipped_line = None out = open( output_name,'w' ) @@ -88,32 +88,32 @@ break #name (fields[3]) can be anything, no verification needed if len( fields ) > 4: - float( fields[4] ) #score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). + float( fields[4] ) #score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). if len( fields ) > 5: - assert fields[5] in [ '+', '-' ], 'Invalid strand' #strand - Defines the strand - either '+' or '-'. + assert fields[5] in [ '+', '-' ], 'Invalid strand' #strand - Defines the strand - either '+' or '-'. if len( fields ) > 6: - int( fields[6] ) #thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays). + int( fields[6] ) #thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays). if len( fields ) > 7: - int( fields[7] ) #thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays). - if len( fields ) > 8: + int( fields[7] ) #thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays). + if len( fields ) > 8: if fields[8] != '0': #itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser. fields2 = fields[8].split( ',' ) assert len( fields2 ) == 3, 'RGB value must be 0 or have length of 3' for field in fields2: int( field ) #rgb values are integers if len( fields ) > 9: - int( fields[9] ) #blockCount - The number of blocks (exons) in the BED line. + int( fields[9] ) #blockCount - The number of blocks (exons) in the BED line. if len( fields ) > 10: - if fields[10] != ',': #blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. + if fields[10] != ',': #blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. fields2 = fields[10].rstrip( "," ).split( "," ) #remove trailing comma and split on comma - for field in fields2: + for field in fields2: int( field ) if len( fields ) > 11: - if fields[11] != ',': #blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. + if fields[11] != ',': #blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. fields2 = fields[11].rstrip( "," ).split( "," ) #remove trailing comma and split on comma for field in fields2: int( field ) - except: + except: strict_bed = False break if force_num_columns is not None and len( fields ) != force_num_columns: @@ -122,7 +122,7 @@ else: strict_bed = False out.close() - + if not strict_bed: skipped_lines = 0 first_skipped_line = None
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/interval_to_coverage.py --- a/lib/galaxy/datatypes/converters/interval_to_coverage.py +++ b/lib/galaxy/datatypes/converters/interval_to_coverage.py @@ -50,12 +50,12 @@ forward = forward_covs[partition] reverse = reverse_covs[partition] if forward+reverse > 0: - coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), + coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), forward=forward, reverse=reverse) partitions = [] forward_covs = [] reverse_covs = [] - + start_index = bisect(partitions, record.start) forward = int(record.strand == "+") reverse = int(record.strand == "-") @@ -74,43 +74,43 @@ partitions.insert(end_index, record.end) forward_covs.insert(end_index, forward_covs[end_index-1] - forward ) reverse_covs.insert(end_index, reverse_covs[end_index-1] - reverse ) - + if partitions: for partition in xrange(0, start_index): forward = forward_covs[partition] reverse = reverse_covs[partition] if forward+reverse > 0: - coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), + coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), forward=forward, reverse=reverse) partitions = partitions[start_index:] forward_covs = forward_covs[start_index:] reverse_covs = reverse_covs[start_index:] - + lastchrom = chrom - + # Finish the last chromosome if partitions: for partition in xrange(0, len(partitions)-1): forward = forward_covs[partition] reverse = reverse_covs[partition] if forward+reverse > 0: - coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), + coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]), forward=forward, reverse=reverse) - + class CoverageWriter( object ): def __init__( self, out_stream=None, chromCol=0, positionCol=1, forwardCol=2, reverseCol=3 ): self.out_stream = out_stream self.reverseCol = reverseCol self.nlines = 0 - positions = {str(chromCol):'%(chrom)s', - str(positionCol):'%(position)d', - str(forwardCol):'%(forward)d', + positions = {str(chromCol):'%(chrom)s', + str(positionCol):'%(position)d', + str(forwardCol):'%(forward)d', str(reverseCol):'%(reverse)d'} - if reverseCol < 0: + if reverseCol < 0: self.template = "%(0)s\t%(1)s\t%(2)s\n" % positions else: self.template = "%(0)s\t%(1)s\t%(2)s\t%(3)s\n" % positions - + def write(self, **kwargs ): if self.reverseCol < 0: kwargs['forward'] += kwargs['reverse'] posgen = kwargs['position'] @@ -121,12 +121,12 @@ def close(self): self.out_stream.flush() self.out_stream.close() - + if __name__ == "__main__": options, args = doc_optparse.parse( __doc__ ) try: chr_col_1, start_col_1, end_col_1, strand_col_1 = [int(x)-1 for x in options.cols1.split(',')] - chr_col_2, position_col_2, forward_col_2, reverse_col_2 = [int(x)-1 for x in options.cols2.split(',')] + chr_col_2, position_col_2, forward_col_2, reverse_col_2 = [int(x)-1 for x in options.cols2.split(',')] in_fname, out_fname = args except: doc_optparse.exception() @@ -141,7 +141,7 @@ chromCol = chr_col_2, positionCol = position_col_2, forwardCol = forward_col_2, reverseCol = reverse_col_2, ) temp_file.seek(0) - interval = io.NiceReaderWrapper( temp_file, + interval = io.NiceReaderWrapper( temp_file, chrom_col=chr_col_1, start_col=start_col_1, end_col=end_col_1,
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/interval_to_fli.py --- a/lib/galaxy/datatypes/converters/interval_to_fli.py +++ b/lib/galaxy/datatypes/converters/interval_to_fli.py @@ -78,13 +78,13 @@ if len( fields ) < 4: continue
- # Process line + # Process line name_loc_dict[ fields[3] ] = { 'contig': fields[0], 'start': int( fields[1] ), 'end': int ( fields[2] ) } - + # Create sorted list of entries. out = open( out_fname, 'w' ) max_len = 0 @@ -95,7 +95,7 @@ if len( entry ) > max_len: max_len = len( entry ) entries.append( entry ) - + # Write padded entries. out.write( str( max_len + 1 ).ljust( max_len ) + '\n' ) for entry in entries:
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/interval_to_tabix_converter.py --- a/lib/galaxy/datatypes/converters/interval_to_tabix_converter.py +++ b/lib/galaxy/datatypes/converters/interval_to_tabix_converter.py @@ -20,20 +20,19 @@ parser.add_option( '-P', '--preset', dest='preset' ) (options, args) = parser.parse_args() input_fname, index_fname, out_fname = args - + # Create index. if options.preset: # Preset type. - ctabix.tabix_index(filename=index_fname, preset=options.preset, keep_original=True, + ctabix.tabix_index(filename=index_fname, preset=options.preset, keep_original=True, already_compressed=True, index_filename=out_fname) else: # For interval files; column indices are 0-based. - ctabix.tabix_index(filename=index_fname, seq_col=(options.chrom_col - 1), - start_col=(options.start_col - 1), end_col=(options.end_col - 1), + ctabix.tabix_index(filename=index_fname, seq_col=(options.chrom_col - 1), + start_col=(options.start_col - 1), end_col=(options.end_col - 1), keep_original=True, already_compressed=True, index_filename=out_fname) if os.path.getsize(index_fname) == 0: sys.stderr.write("The converted tabix index file is empty, meaning the input data is invalid.") - -if __name__ == "__main__": + +if __name__ == "__main__": main() - \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/lped_to_fped_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_fped_converter.py +++ b/lib/galaxy/datatypes/converters/lped_to_fped_converter.py @@ -1,110 +1,110 @@ -# for rgenetics - lped to fbat -# recode to numeric fbat version -# much slower so best to always -# use numeric alleles internally - -import sys,os,time - - -prog = os.path.split(sys.argv[0])[-1] -myversion = 'Oct 10 2009' - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - -def timenow(): - """return current time as a string - """ - return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) - - -def rgConv(inpedfilepath,outhtmlname,outfilepath): - """convert linkage ped/map to fbat""" - recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'} - basename = os.path.split(inpedfilepath)[-1] # get basename - inmap = '%s.map' % inpedfilepath - inped = '%s.ped' % inpedfilepath - outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data - outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to - try: - mf = file(inmap,'r') - except: - sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap)) - sys.exit(1) - try: - rsl = [x.split()[1] for x in mf] - except: - sys.stderr.write('## cannot parse %s' % inmap) - sys.exit(1) - try: - os.makedirs(outfilepath) - except: - pass # already exists - head = ' '.join(rsl) # list of rs numbers - # TODO add anno to rs but fbat will prolly barf? - pedf = file(inped,'r') - o = file(outfpath,'w',2**20) - o.write(head) - o.write('\n') - for i,row in enumerate(pedf): - if i == 0: - lrow = row.split() - try: - x = [int(x) for x in lrow[10:50]] # look for non numeric codes - except: - dorecode = 1 - if dorecode: - lrow = row.strip().split() - p = lrow[:6] - g = lrow[6:] - gc = [recode.get(x,'0') for x in g] - lrow = p+gc - row = '%s\n' % ' '.join(lrow) - o.write(row) - o.close() - - -def main(): - """call fbater - need to work with rgenetics composite datatypes - so in and out are html files with data in extrafiles path - <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name' - '$output1' '$output1.extra_files_path' - </command> - """ - nparm = 3 - if len(sys.argv) < nparm: - sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) - sys.exit(1) - inpedfilepath = sys.argv[1] - outhtmlname = sys.argv[2] - outfilepath = sys.argv[3] - try: - os.makedirs(outfilepath) - except: - pass - rgConv(inpedfilepath,outhtmlname,outfilepath) - f = file(outhtmlname,'w') - f.write(galhtmlprefix % prog) - flist = os.listdir(outfilepath) - print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info - f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow())) - for i, data in enumerate( flist ): - f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) - f.write("</div></body></html>") - f.close() - - - -if __name__ == "__main__": - main() +# for rgenetics - lped to fbat +# recode to numeric fbat version +# much slower so best to always +# use numeric alleles internally + +import sys,os,time + + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + +def rgConv(inpedfilepath,outhtmlname,outfilepath): + """convert linkage ped/map to fbat""" + recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'} + basename = os.path.split(inpedfilepath)[-1] # get basename + inmap = '%s.map' % inpedfilepath + inped = '%s.ped' % inpedfilepath + outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data + outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to + try: + mf = file(inmap,'r') + except: + sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap)) + sys.exit(1) + try: + rsl = [x.split()[1] for x in mf] + except: + sys.stderr.write('## cannot parse %s' % inmap) + sys.exit(1) + try: + os.makedirs(outfilepath) + except: + pass # already exists + head = ' '.join(rsl) # list of rs numbers + # TODO add anno to rs but fbat will prolly barf? + pedf = file(inped,'r') + o = file(outfpath,'w',2**20) + o.write(head) + o.write('\n') + for i,row in enumerate(pedf): + if i == 0: + lrow = row.split() + try: + x = [int(x) for x in lrow[10:50]] # look for non numeric codes + except: + dorecode = 1 + if dorecode: + lrow = row.strip().split() + p = lrow[:6] + g = lrow[6:] + gc = [recode.get(x,'0') for x in g] + lrow = p+gc + row = '%s\n' % ' '.join(lrow) + o.write(row) + o.close() + + +def main(): + """call fbater + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' + </command> + """ + nparm = 3 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + rgConv(inpedfilepath,outhtmlname,outfilepath) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow())) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/lped_to_pbed_converter.py --- a/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py +++ b/lib/galaxy/datatypes/converters/lped_to_pbed_converter.py @@ -1,110 +1,110 @@ -# for rgenetics - lped to pbed -# where to stop with converters -# pbed might be central -# eg lped/eigen/fbat/snpmatrix all to pbed -# and pbed to lped/eigen/fbat/snpmatrix ? -# that's a lot of converters -import sys,os,time,subprocess - - -prog = os.path.split(sys.argv[0])[-1] -myversion = 'Oct 10 2009' - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - -def timenow(): - """return current time as a string - """ - return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) - -def getMissval(inped=''): - """ - read some lines...ugly hack - try to guess missing value - should be N or 0 but might be . or - - """ - commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'} - try: - f = file(inped,'r') - except: - return None # signal no in file - missval = None - while missval == None: # doggedly continue until we solve the mystery - try: - l = f.readline() - except: - break - ll = l.split()[6:] # ignore pedigree stuff - for c in ll: - if commonmissvals.get(c,None): - missval = c - f.close() - return missval - if not missval: - missval = 'N' # punt - close(f) - return missval - -def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): - """ - """ - pedf = '%s.ped' % inpedfilepath - basename = os.path.split(inpedfilepath)[-1] # get basename - outroot = os.path.join(outfilepath,basename) - missval = getMissval(inped = pedf) - if not missval: - print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf - missval = '0' - cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval) - p = subprocess.Popen(cl,shell=True,cwd=outfilepath) - retval = p.wait() # run plink - - - - -def main(): - """ - need to work with rgenetics composite datatypes - so in and out are html files with data in extrafiles path - <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name' - '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' - </command> - """ - nparm = 4 - if len(sys.argv) < nparm: - sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) - sys.exit(1) - inpedfilepath = sys.argv[1] - outhtmlname = sys.argv[2] - outfilepath = sys.argv[3] - try: - os.makedirs(outfilepath) - except: - pass - plink = sys.argv[4] - rgConv(inpedfilepath,outhtmlname,outfilepath,plink) - f = file(outhtmlname,'w') - f.write(galhtmlprefix % prog) - flist = os.listdir(outfilepath) - s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info - print s - f.write('<div>%s\n<ol>' % (s)) - for i, data in enumerate( flist ): - f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) - f.write("</div></body></html>") - f.close() - - - -if __name__ == "__main__": - main() +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess + + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +def getMissval(inped=''): + """ + read some lines...ugly hack - try to guess missing value + should be N or 0 but might be . or - + """ + commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'} + try: + f = file(inped,'r') + except: + return None # signal no in file + missval = None + while missval == None: # doggedly continue until we solve the mystery + try: + l = f.readline() + except: + break + ll = l.split()[6:] # ignore pedigree stuff + for c in ll: + if commonmissvals.get(c,None): + missval = c + f.close() + return missval + if not missval: + missval = 'N' # punt + close(f) + return missval + +def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): + """ + """ + pedf = '%s.ped' % inpedfilepath + basename = os.path.split(inpedfilepath)[-1] # get basename + outroot = os.path.join(outfilepath,basename) + missval = getMissval(inped = pedf) + if not missval: + print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf + missval = '0' + cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval) + p = subprocess.Popen(cl,shell=True,cwd=outfilepath) + retval = p.wait() # run plink + + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + """ + nparm = 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink = sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + print s + f.write('<div>%s\n<ol>' % (s)) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/maf_to_fasta_converter.py --- a/lib/galaxy/datatypes/converters/maf_to_fasta_converter.py +++ b/lib/galaxy/datatypes/converters/maf_to_fasta_converter.py @@ -1,32 +1,32 @@ -#!/usr/bin/env python -#Dan Blankenberg - -import sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -import bx.align.maf -from galaxy.tools.util import maf_utilities - -assert sys.version_info[:2] >= ( 2, 4 ) - -def __main__(): - output_name = sys.argv.pop(1) - input_name = sys.argv.pop(1) - out = open( output_name, 'w' ) - count = 0 - for count, block in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ): +#!/usr/bin/env python +#Dan Blankenberg + +import sys +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +import bx.align.maf +from galaxy.tools.util import maf_utilities + +assert sys.version_info[:2] >= ( 2, 4 ) + +def __main__(): + output_name = sys.argv.pop(1) + input_name = sys.argv.pop(1) + out = open( output_name, 'w' ) + count = 0 + for count, block in enumerate( bx.align.maf.Reader( open( input_name, 'r' ) ) ): spec_counts = {} - for c in block.components: + for c in block.components: spec, chrom = maf_utilities.src_split( c.src ) if spec not in spec_counts: spec_counts[ spec ] = 0 else: - spec_counts[ spec ] += 1 - out.write( "%s\n" % maf_utilities.get_fasta_header( c, { 'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, count, spec_counts[ spec ] ) ) ) - out.write( "%s\n" % c.text ) - out.write( "\n" ) - out.close() - print "%i MAF blocks converted to FASTA." % ( count ) - - -if __name__ == "__main__": __main__() + spec_counts[ spec ] += 1 + out.write( "%s\n" % maf_utilities.get_fasta_header( c, { 'block_index' : count, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, count, spec_counts[ spec ] ) ) ) + out.write( "%s\n" % c.text ) + out.write( "\n" ) + out.close() + print "%i MAF blocks converted to FASTA." % ( count ) + + +if __name__ == "__main__": __main__()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/maf_to_interval_converter.py --- a/lib/galaxy/datatypes/converters/maf_to_interval_converter.py +++ b/lib/galaxy/datatypes/converters/maf_to_interval_converter.py @@ -1,32 +1,32 @@ -#!/usr/bin/env python -#Dan Blankenberg - -import sys -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) +#!/usr/bin/env python +#Dan Blankenberg + +import sys +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) import bx.align.maf -from galaxy.tools.util import maf_utilities - -assert sys.version_info[:2] >= ( 2, 4 ) - -def __main__(): - output_name = sys.argv.pop(1) - input_name = sys.argv.pop(1) - species = sys.argv.pop(1) - out = open(output_name,'w') - count = 0 - #write interval header line - out.write( "#chrom\tstart\tend\tstrand\n" ) - try: - for block in bx.align.maf.Reader( open( input_name, 'r' ) ): - for c in maf_utilities.iter_components_by_src_start( block, species ): - if c is not None: - out.write( "%s\t%i\t%i\t%s\n" % ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) ) - count += 1 - except Exception, e: - print >> sys.stderr, "There was a problem processing your input: %s" % e - out.close() - print "%i MAF blocks converted to Genomic Intervals for species %s." % ( count, species ) - - -if __name__ == "__main__": __main__() +from galaxy.tools.util import maf_utilities + +assert sys.version_info[:2] >= ( 2, 4 ) + +def __main__(): + output_name = sys.argv.pop(1) + input_name = sys.argv.pop(1) + species = sys.argv.pop(1) + out = open(output_name,'w') + count = 0 + #write interval header line + out.write( "#chrom\tstart\tend\tstrand\n" ) + try: + for block in bx.align.maf.Reader( open( input_name, 'r' ) ): + for c in maf_utilities.iter_components_by_src_start( block, species ): + if c is not None: + out.write( "%s\t%i\t%i\t%s\n" % ( maf_utilities.src_split( c.src )[-1], c.get_forward_strand_start(), c.get_forward_strand_end(), c.strand ) ) + count += 1 + except Exception, e: + print >> sys.stderr, "There was a problem processing your input: %s" % e + out.close() + print "%i MAF blocks converted to Genomic Intervals for species %s." % ( count, species ) + + +if __name__ == "__main__": __main__()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/pbed_ldreduced_converter.py --- a/lib/galaxy/datatypes/converters/pbed_ldreduced_converter.py +++ b/lib/galaxy/datatypes/converters/pbed_ldreduced_converter.py @@ -21,7 +21,7 @@ <div class="document"> """
-plinke = 'plink' +plinke = 'plink'
def timenow(): @@ -51,7 +51,7 @@ except: alog.append('### %s Strange - no std out from plink when running command line\n%s\n' % (timenow(),' '.join(vcl))) return alog - +
def makeLDreduced(basename,infpath=None,outfpath=None,plinke='plink',forcerebuild=False,returnFname=False, winsize="60", winmove="40", r2thresh="0.1" ): @@ -79,11 +79,11 @@ need to work with rgenetics composite datatypes so in and out are html files with data in extrafiles path
- .. raw:: xml + .. raw:: xml
<command interpreter="python"> - pbed_ldreduced_converter.py '$input1.extra_files_path/$input1.metadata.base_name' '$winsize' '$winmove' '$r2thresh' - '$output1' '$output1.files_path' 'plink' + pbed_ldreduced_converter.py '$input1.extra_files_path/$input1.metadata.base_name' '$winsize' '$winmove' '$r2thresh' + '$output1' '$output1.files_path' 'plink' </command>
""" @@ -116,7 +116,7 @@ f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) f.write("</div></body></html>") f.close() - +
if __name__ == "__main__": main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/pbed_to_lped_converter.py --- a/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py +++ b/lib/galaxy/datatypes/converters/pbed_to_lped_converter.py @@ -1,80 +1,80 @@ -# for rgenetics - lped to pbed -# where to stop with converters -# pbed might be central -# eg lped/eigen/fbat/snpmatrix all to pbed -# and pbed to lped/eigen/fbat/snpmatrix ? -# that's a lot of converters -import sys,os,time,subprocess - - -prog = os.path.split(sys.argv[0])[-1] -myversion = 'Oct 10 2009' - -galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> -<title></title> -<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> -</head> -<body> -<div class="document"> -""" - -def timenow(): - """return current time as a string - """ - return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) +# for rgenetics - lped to pbed +# where to stop with converters +# pbed might be central +# eg lped/eigen/fbat/snpmatrix all to pbed +# and pbed to lped/eigen/fbat/snpmatrix ? +# that's a lot of converters +import sys,os,time,subprocess
- + +prog = os.path.split(sys.argv[0])[-1] +myversion = 'Oct 10 2009' + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + def rgConv(inpedfilepath,outhtmlname,outfilepath,plink): """ """ - - basename = os.path.split(inpedfilepath)[-1] # get basename + + basename = os.path.split(inpedfilepath)[-1] # get basename outroot = os.path.join(outfilepath,basename) cl = '%s --noweb --bfile %s --recode --out %s ' % (plink,inpedfilepath,outroot) p = subprocess.Popen(cl,shell=True,cwd=outfilepath) retval = p.wait() # run plink
- - - -def main(): - """ - need to work with rgenetics composite datatypes - so in and out are html files with data in extrafiles path - <command interpreter="python">pbed_to_lped_converter.py '$input1/$input1.metadata.base_name' - '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' - </command> - """ - nparm = 4 - if len(sys.argv) < nparm: - sys.stderr.write('## %s called with %s - needs %d parameters \n' % (myname,sys.argv,nparm)) - sys.exit(1) - inpedfilepath = sys.argv[1] - outhtmlname = sys.argv[2] - outfilepath = sys.argv[3] - try: - os.makedirs(outfilepath) - except: - pass - plink = sys.argv[4] - rgConv(inpedfilepath,outhtmlname,outfilepath,plink) - f = file(outhtmlname,'w') - f.write(galhtmlprefix % prog) - flist = os.listdir(outfilepath) - s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info - print s - f.write('<div>%s\n<ol>' % (s)) - for i, data in enumerate( flist ): - f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) - f.write("</div></body></html>") - f.close() - - - -if __name__ == "__main__": - main() + + + +def main(): + """ + need to work with rgenetics composite datatypes + so in and out are html files with data in extrafiles path + <command interpreter="python">pbed_to_lped_converter.py '$input1/$input1.metadata.base_name' + '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink' + </command> + """ + nparm = 4 + if len(sys.argv) < nparm: + sys.stderr.write('## %s called with %s - needs %d parameters \n' % (myname,sys.argv,nparm)) + sys.exit(1) + inpedfilepath = sys.argv[1] + outhtmlname = sys.argv[2] + outfilepath = sys.argv[3] + try: + os.makedirs(outfilepath) + except: + pass + plink = sys.argv[4] + rgConv(inpedfilepath,outhtmlname,outfilepath,plink) + f = file(outhtmlname,'w') + f.write(galhtmlprefix % prog) + flist = os.listdir(outfilepath) + s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info + print s + f.write('<div>%s\n<ol>' % (s)) + for i, data in enumerate( flist ): + f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) + f.write("</div></body></html>") + f.close() + + + +if __name__ == "__main__": + main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/picard_interval_list_to_bed6_converter.py --- a/lib/galaxy/datatypes/converters/picard_interval_list_to_bed6_converter.py +++ b/lib/galaxy/datatypes/converters/picard_interval_list_to_bed6_converter.py @@ -17,7 +17,7 @@ for i, line in enumerate( open( input_name ) ): complete_interval = False line = line.rstrip( '\r\n' ) - if line: + if line: if line.startswith( HEADER_STARTS_WITH ): header_lines += 1 else:
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/sam_to_bam.py --- a/lib/galaxy/datatypes/converters/sam_to_bam.py +++ b/lib/galaxy/datatypes/converters/sam_to_bam.py @@ -19,12 +19,12 @@ #Parse Command Line parser = optparse.OptionParser() (options, args) = parser.parse_args() - + assert len( args ) == 2, 'You must specify the input and output filenames' input_filename, output_filename = args - + tmp_dir = tempfile.mkdtemp( prefix='tmp-sam_to_bam_converter-' ) - + #convert to SAM unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' ) unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' ) @@ -43,14 +43,14 @@ else: break stderr.close() - + #sort sam, so indexing will not fail sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' ) sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' ) cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_filename ) proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) return_code = proc.wait() - + if return_code: stderr_target = sys.stderr else: @@ -63,7 +63,7 @@ else: break stderr.close() - + cleanup_before_exit( tmp_dir )
if __name__=="__main__": __main__()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/vcf_to_interval_index_converter.py --- a/lib/galaxy/datatypes/converters/vcf_to_interval_index_converter.py +++ b/lib/galaxy/datatypes/converters/vcf_to_interval_index_converter.py @@ -16,20 +16,19 @@ # Read options, args. parser = optparse.OptionParser() (options, args) = parser.parse_args() - in_file, out_file = args - + in_file, out_file = args + # Do conversion. index = Indexes() - reader = galaxy_utils.sequence.vcf.Reader( open( in_file ) ) + reader = galaxy_utils.sequence.vcf.Reader( open( in_file ) ) offset = reader.metadata_len for vcf_line in reader: - # VCF format provides a chrom and 1-based position for each variant. + # VCF format provides a chrom and 1-based position for each variant. # IntervalIndex expects 0-based coordinates. index.add( vcf_line.chrom, vcf_line.pos-1, vcf_line.pos, offset ) offset += len( vcf_line.raw_line ) - + index.write( open( out_file, "w" ) )
-if __name__ == "__main__": +if __name__ == "__main__": main() - \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/vcf_to_vcf_bgzip.py --- a/lib/galaxy/datatypes/converters/vcf_to_vcf_bgzip.py +++ b/lib/galaxy/datatypes/converters/vcf_to_vcf_bgzip.py @@ -1,7 +1,7 @@ #!/usr/bin/env python
""" -Uses pysam to bgzip a vcf file as-is. +Uses pysam to bgzip a vcf file as-is. Headers, which are important, are kept. Original ordering, which may be specifically needed by tools or external display applications, is also maintained.
@@ -17,8 +17,8 @@ parser = optparse.OptionParser() (options, args) = parser.parse_args() input_fname, output_fname = args - + ctabix.tabix_compress(input_fname, output_fname, force=True) - -if __name__ == "__main__": + +if __name__ == "__main__": main()
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py --- a/lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py +++ b/lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py @@ -11,19 +11,19 @@ BLOCK_SIZE = 100
def main(): - + input_fname = sys.argv[1] out_fname = sys.argv[2] - + reader = WiggleReader( open( input_fname ) ) - + # Fill array from reader d = array_tree_dict_from_reader( reader, {}, block_size = BLOCK_SIZE ) - + for array_tree in d.itervalues(): array_tree.root.build_summary() - + FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
-if __name__ == "__main__": +if __name__ == "__main__": main() \ No newline at end of file
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py --- a/lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py +++ b/lib/galaxy/datatypes/converters/wiggle_to_simple_converter.py @@ -17,16 +17,16 @@ sys.exit()
def main(): - if len( sys.argv ) > 1: + if len( sys.argv ) > 1: in_file = open( sys.argv[1] ) - else: + else: in_file = open( sys.stdin ) - + if len( sys.argv ) > 2: out_file = open( sys.argv[2], "w" ) else: out_file = sys.stdout - + try: for fields in bx.wiggle.IntervalReader( UCSCOutWrapper( in_file ) ): out_file.write( "%s\n" % "\t".join( map( str, fields ) ) )
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/coverage.py --- a/lib/galaxy/datatypes/coverage.py +++ b/lib/galaxy/datatypes/coverage.py @@ -15,7 +15,7 @@
class LastzCoverage( Tabular ): file_ext = "coverage" - + MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) MetadataElement( name="positionCol", default=2, desc="Position column", param=metadata.ColumnParameter ) MetadataElement( name="forwardCol", default=3, desc="Forward or aggregate read column", param=metadata.ColumnParameter ) @@ -44,7 +44,7 @@ t_end = math.ceil( end / resolution ) x = numpy.arange( t_start, t_end ) * resolution y = data[ t_start : t_end ] - + return zip(x.tolist(), y.tolist())
def get_track_resolution( self, dataset, start, end):
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/data.py --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -282,14 +282,14 @@ tmpfh = open( tmpf ) # CANNOT clean up - unlink/rmdir was always failing because file handle retained to return - must rely on a cron job to clean up tmp trans.response.set_content_type( "application/x-zip-compressed" ) - trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.zip"' % outfname + trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.zip"' % outfname return tmpfh else: trans.response.set_content_type( "application/x-tar" ) outext = 'tgz' if params.do_action == 'tbz': outext = 'tbz' - trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.%s"' % (outfname,outext) + trans.response.headers[ "Content-Disposition" ] = 'attachment; filename="%s.%s"' % (outfname,outext) archive.wsgi_status = trans.response.wsgi_status() archive.wsgi_headeritems = trans.response.wsgi_headeritems() return archive.stream
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/dataproviders/base.py --- a/lib/galaxy/datatypes/dataproviders/base.py +++ b/lib/galaxy/datatypes/dataproviders/base.py @@ -304,7 +304,7 @@ self.source = self.validate_source( source ) except exceptions.InvalidDataProviderSource, invalid_source: continue - + parent_gen = super( MultiSourceDataProvider, self ).__iter__() for datum in parent_gen: yield datum
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/dataproviders/line.py --- a/lib/galaxy/datatypes/dataproviders/line.py +++ b/lib/galaxy/datatypes/dataproviders/line.py @@ -262,7 +262,7 @@ """ if self.limit != None and self.num_data_returned >= self.limit: return None - + last_block = self.assemble_current_block() self.num_data_read += 1
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/display_applications/application.py --- a/lib/galaxy/datatypes/display_applications/application.py +++ b/lib/galaxy/datatypes/display_applications/application.py @@ -128,7 +128,7 @@ self.data = data self.dataset_hash = dataset_hash self.user_hash = user_hash - self.trans = trans + self.trans = trans self.ready, self.parameters = self.link.build_parameter_dict( self.data, self.dataset_hash, self.user_hash, trans, app_kwds ) def display_ready( self ): return self.ready
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/display_applications/parameters.py --- a/lib/galaxy/datatypes/display_applications/parameters.py +++ b/lib/galaxy/datatypes/display_applications/parameters.py @@ -10,9 +10,9 @@
class DisplayApplicationParameter( object ): """ Abstract Class for Display Application Parameters """ - + type = None - + @classmethod def from_elem( cls, elem, link ): param_type = elem.get( 'type', None ) @@ -42,9 +42,9 @@
class DisplayApplicationDataParameter( DisplayApplicationParameter ): """ Parameter that returns a file_name containing the requested content """ - + type = 'data' - + def __init__( self, elem, link ): DisplayApplicationParameter.__init__( self, elem, link ) self.extensions = elem.get( 'format', None ) @@ -113,7 +113,7 @@ return False def ready( self, other_values ): value = self._get_dataset_like_object( other_values ) - if value: + if value: if value.state == value.states.OK: return True elif value.state == value.states.ERROR: @@ -122,9 +122,9 @@
class DisplayApplicationTemplateParameter( DisplayApplicationParameter ): """ Parameter that returns a string containing the requested content """ - + type = 'template' - + def __init__( self, elem, link ): DisplayApplicationParameter.__init__( self, elem, link ) self.text = elem.text or '' @@ -154,7 +154,7 @@ if self.parameter.guess_mime_type: mime, encoding = mimetypes.guess_type( self._url ) if not mime: - mime = self.trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( self._url )[ -1 ], None ) + mime = self.trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( self._url )[ -1 ], None ) if mime: return mime return 'text/plain' @@ -193,7 +193,7 @@ if self.parameter.guess_mime_type: mime, encoding = mimetypes.guess_type( self._url ) if not mime: - mime = self.trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( self._url )[ -1 ], None ) + mime = self.trans.app.datatypes_registry.get_mimetype_by_extension( ".".split( self._url )[ -1 ], None ) if mime: return mime if hasattr( self.value, 'get_mime' ):
diff -r f33687b0e59013a924a696f1c9630cae960b74a2 -r be3b0358acc9f251a8c8d66beb87f6b4684c1c10 lib/galaxy/datatypes/display_applications/util.py --- a/lib/galaxy/datatypes/display_applications/util.py +++ b/lib/galaxy/datatypes/display_applications/util.py @@ -10,7 +10,7 @@ user_hash = 'None' else: user_hash = str( user.id ) - # Pad to a multiple of 8 with leading "!" + # Pad to a multiple of 8 with leading "!" user_hash = ( "!" * ( 8 - len( user_hash ) % 8 ) ) + user_hash cipher = Blowfish.new( str( dataset.create_time ) ) user_hash = cipher.encrypt( user_hash ).encode( 'hex' )
This diff is so big that we needed to truncate the remainder.
Repository URL: https://bitbucket.org/galaxy/galaxy-central/
--
This is a commit notification from bitbucket.org. You are receiving this because you have the service enabled, addressing the recipient of this email.