module Bio::Alignment::Output
Public Instance Methods
__output_phylip_common(options = {})
click to toggle source
common routine for interleaved/non-interleaved phylip format
# File lib/bio/alignment.rb, line 1099 def __output_phylip_common(options = {}) len = self.alignment_length aln = [ " #{self.number_of_sequences} #{len}\n" ] sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(sn, 10) end namewidth = 10 seqwidth = (options[:width] or 60) seqwidth = seqwidth.div(10) * 10 seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})") gchar = (options[:gap_char] or '-') aseqs = Array.new(self.number_of_sequences).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } end aseqs.collect! do |s| snx = sn.shift head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] head2 = ' ' * namewidth s << (gchar * (len - s.length)) s.gsub!(/(.{1,10})/n, " \\1") s.gsub!(seqregexp, "\\1\n") a = s.split(/^/) head += a.shift ret = a.collect { |x| head2 + x } ret.unshift(head) ret end lines = (len + seqwidth - 1).div(seqwidth) [ aln, aseqs, lines ] end
output(format, *arg)
click to toggle source
# File lib/bio/alignment.rb, line 873 def output(format, *arg) case format when :clustal output_clustal(*arg) when :fasta output_fasta(*arg) when :phylip output_phylip(*arg) when :phylipnon output_phylipnon(*arg) when :msf output_msf(*arg) when :molphy output_molphy(*arg) else raise "Unknown format: #{format.inspect}" end end
output_clustal(options = {})
click to toggle source
Generates ClustalW-formatted text
- seqs
-
sequences (must be an alignment object)
- names
-
names of the sequences
- options
-
options
# File lib/bio/alignment.rb, line 1045 def output_clustal(options = {}) __clustal_formatter(self, self.sequence_names, options) end
output_fasta(options={})
click to toggle source
Generates fasta format text and returns a string.
# File lib/bio/alignment.rb, line 1059 def output_fasta(options={}) #(original) width = (options[:width] or 70) if options[:avoid_same_name] then na = __clustal_avoid_same_name(self.sequence_names, 30) else na = self.sequence_names.collect do |k| k.to_s.gsub(/[\r\n\x00]/, ' ') end end if width and width > 0 then w_reg = Regexp.new(".{1,#{width}}") self.collect do |s| ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n") end.join('') else self.collect do |s| ">#{na.shift}\n" + s.to_s + "\n" end.join('') end end
output_molphy(options = {})
click to toggle source
Generates Molphy alignment format text as a string
# File lib/bio/alignment.rb, line 1151 def output_molphy(options = {}) len = self.alignment_length header = "#{self.number_of_sequences} #{len}\n" sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } if options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(sn, 30) end seqwidth = (options[:width] or 60) seqregexp = Regexp.new("(.{1,#{seqwidth}})") gchar = (options[:gap_char] or '-') aseqs = Array.new(len).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } end aseqs.collect! do |s| s << (gchar * (len - s.length)) s.gsub!(seqregexp, "\\1\n") sn.shift + "\n" + s end aseqs.unshift(header) aseqs.join('') end
output_msf(options = {})
click to toggle source
Generates msf formatted text as a string
# File lib/bio/alignment.rb, line 1193 def output_msf(options = {}) len = self.seq_length if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] sn = __clustal_avoid_same_name(self.sequence_names) else sn = self.sequence_names.collect do |x| x.to_s.gsub(/[\r\n\x00]/, ' ') end end if !options.has_key?(:replace_space) or options[:replace_space] sn.collect! { |x| x.gsub(/\s/, '_') } end if !options.has_key?(:escape) or options[:escape] sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } end if !options.has_key?(:split) or options[:split] sn.collect! { |x| x.split(/\s/)[0].to_s } end seqwidth = 50 namewidth = [31, sn.collect { |x| x.length }.max ].min sep = ' ' * 2 seqregexp = Regexp.new("(.{1,#{seqwidth}})") gchar = (options[:gap_char] or '.') pchar = (options[:padding_char] or '~') aseqs = Array.new(self.number_of_sequences).clear self.each_seq do |s| aseqs << s.to_s.gsub(self.gap_regexp, gchar) end aseqs.each do |s| s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length } s.sub!(/#{Regexp.escape(gchar)}+\z/, '') s << (pchar * (len - s.length)) end case options[:case].to_s when /lower/i aseqs.each { |s| s.downcase! } when /upper/i aseqs.each { |s| s.upcase! } else #default upcase aseqs.each { |s| s.upcase! } end case options[:type].to_s when /protein/i, /aa/i amino = true when /na/i amino = false else if seqclass == Bio::Sequence::AA then amino = true elsif seqclass == Bio::Sequence::NA then amino = false else # if we can't determine, we asuume as protein. amino = aseqs.size aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x } amino = false if amino <= 0 end end seq_type = (amino ? 'P' : 'N') fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf') dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M') sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) } #sums = aseqs.collect { |s| 0 } sum = 0; sums.each { |x| sum += x }; sum = 10000 msf = [ "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n", "\n", "\n", " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n", "\n" ] sn.each do |snx| msf << ' Name: ' + sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] + " Len: #{len} Check: #{sums.shift} Weight: 1.00\n" end msf << "\n//\n" aseqs.collect! do |s| snx = sn.shift head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep s.gsub!(seqregexp, "\\1\n") a = s.split(/^/) a.collect { |x| head + x } end lines = (len + seqwidth - 1).div(seqwidth) i = 1 lines.times do msf << "\n" n_l = i n_r = [ i + seqwidth - 1, len ].min if n_l != n_r then w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max msf << (' ' * namewidth + sep + n_l.to_s + ' ' * w + n_r.to_s + "\n") else msf << (' ' * namewidth + sep + n_l.to_s + "\n") end aseqs.each { |a| msf << a.shift } i += seqwidth end msf << "\n" msf.join('') end
output_phylip(options = {})
click to toggle source
generates phylip interleaved alignment format as a string
# File lib/bio/alignment.rb, line 1082 def output_phylip(options = {}) aln, aseqs, lines = __output_phylip_common(options) lines.times do aseqs.each { |a| aln << a.shift } aln << "\n" end aln.pop if aln[-1] == "\n" aln.join('') end
output_phylipnon(options = {})
click to toggle source
generates Phylip3.2 (old) non-interleaved format as a string
# File lib/bio/alignment.rb, line 1093 def output_phylipnon(options = {}) aln, aseqs, _ = __output_phylip_common(options) aln.first + aseqs.join('') end
to_clustal(*arg)
click to toggle source
# #to_clustal is deprecated. Instead, please use output_clustal. +
# File lib/bio/alignment.rb, line 1053 def to_clustal(*arg) warn "to_clustal is deprecated. Please use output_clustal." output_clustal(*arg) end