module Bio::Alignment::Output - BioRuby API documentation

Source

     # File lib/bio/alignment.rb
1099 def __output_phylip_common(options = {})
1100   len = self.alignment_length
1101   aln = [ " #{self.number_of_sequences} #{len}\n" ]
1102   sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1103   if options[:replace_space]
1104     sn.collect! { |x| x.gsub(/\s/, '_') }
1105   end
1106   if !options.has_key?(:escape) or options[:escape]
1107     sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1108   end
1109   if !options.has_key?(:split) or options[:split]
1110     sn.collect! { |x| x.split(/\s/)[0].to_s }
1111   end
1112   if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1113     sn = __clustal_avoid_same_name(sn, 10)
1114   end
1115 
1116   namewidth = 10
1117   seqwidth  = (options[:width] or 60)
1118   seqwidth = seqwidth.div(10) * 10
1119   seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
1120   gchar = (options[:gap_char] or '-')
1121 
1122   aseqs = Array.new(self.number_of_sequences).clear
1123   self.each_seq do |s|
1124     aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1125   end
1126   case options[:case].to_s
1127   when /lower/i
1128     aseqs.each { |s| s.downcase! }
1129   when /upper/i
1130     aseqs.each { |s| s.upcase! }
1131   end
1132   
1133   aseqs.collect! do |s|
1134     snx = sn.shift
1135     head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
1136     head2 = ' ' * namewidth
1137     s << (gchar * (len - s.length))
1138     s.gsub!(/(.{1,10})/n, " \\1")
1139     s.gsub!(seqregexp, "\\1\n")
1140     a = s.split(/^/)
1141     head += a.shift
1142     ret = a.collect { |x| head2 + x }
1143     ret.unshift(head)
1144     ret
1145   end
1146   lines = (len + seqwidth - 1).div(seqwidth)
1147   [ aln, aseqs, lines ]
1148 end

common routine for interleaved/non-interleaved phylip format

output (format, *arg)

Source

    # File lib/bio/alignment.rb
873 def output(format, *arg)
874   case format
875   when :clustal
876     output_clustal(*arg)
877   when :fasta
878     output_fasta(*arg)
879   when :phylip
880     output_phylip(*arg)
881   when :phylipnon
882     output_phylipnon(*arg)
883   when :msf
884     output_msf(*arg)
885   when :molphy
886     output_molphy(*arg)
887   else
888     raise "Unknown format: #{format.inspect}"
889   end
890 end

output_clustal (options = {})

Source

     # File lib/bio/alignment.rb
1045 def output_clustal(options = {})
1046   __clustal_formatter(self, self.sequence_names, options)
1047 end

Generates ClustalW-formatted text

seqs: sequences (must be an alignment object)
names: names of the sequences
options: options

output_fasta (options={})

Source

     # File lib/bio/alignment.rb
1059 def output_fasta(options={})
1060   #(original)
1061   width = (options[:width] or 70)
1062   if options[:avoid_same_name] then
1063     na = __clustal_avoid_same_name(self.sequence_names, 30)
1064   else
1065     na = self.sequence_names.collect do |k|
1066       k.to_s.gsub(/[\r\n\x00]/, ' ')
1067     end
1068   end
1069   if width and width > 0 then
1070     w_reg = Regexp.new(".{1,#{width}}")
1071     self.collect do |s|
1072       ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
1073     end.join('')
1074   else
1075     self.collect do |s|
1076       ">#{na.shift}\n" + s.to_s + "\n"
1077     end.join('')
1078   end
1079 end

Generates fasta format text and returns a string.

output_molphy (options = {})

Source

     # File lib/bio/alignment.rb
1151 def output_molphy(options = {})
1152   len = self.alignment_length
1153   header = "#{self.number_of_sequences} #{len}\n"
1154   sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1155   if options[:replace_space]
1156     sn.collect! { |x| x.gsub(/\s/, '_') }
1157   end
1158   if !options.has_key?(:escape) or options[:escape]
1159     sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1160   end
1161   if !options.has_key?(:split) or options[:split]
1162     sn.collect! { |x| x.split(/\s/)[0].to_s }
1163   end
1164   if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1165     sn = __clustal_avoid_same_name(sn, 30)
1166   end
1167 
1168   seqwidth  = (options[:width] or 60)
1169   seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1170   gchar = (options[:gap_char] or '-')
1171 
1172   aseqs = Array.new(len).clear
1173   self.each_seq do |s|
1174     aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1175   end
1176   case options[:case].to_s
1177   when /lower/i
1178     aseqs.each { |s| s.downcase! }
1179   when /upper/i
1180     aseqs.each { |s| s.upcase! }
1181   end
1182   
1183   aseqs.collect! do |s|
1184     s << (gchar * (len - s.length))
1185     s.gsub!(seqregexp, "\\1\n")
1186     sn.shift + "\n" + s
1187   end
1188   aseqs.unshift(header)
1189   aseqs.join('')
1190 end

Generates Molphy alignment format text as a string

output_msf (options = {})

Source

     # File lib/bio/alignment.rb
1193 def output_msf(options = {})
1194   len = self.seq_length
1195 
1196   if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1197     sn = __clustal_avoid_same_name(self.sequence_names)
1198   else
1199     sn = self.sequence_names.collect do |x|
1200       x.to_s.gsub(/[\r\n\x00]/, ' ')
1201     end
1202   end
1203   if !options.has_key?(:replace_space) or options[:replace_space]
1204     sn.collect! { |x| x.gsub(/\s/, '_') }
1205   end
1206   if !options.has_key?(:escape) or options[:escape]
1207     sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1208   end
1209   if !options.has_key?(:split) or options[:split]
1210     sn.collect! { |x| x.split(/\s/)[0].to_s }
1211   end
1212 
1213   seqwidth = 50
1214   namewidth = [31, sn.collect { |x| x.length }.max ].min
1215   sep = ' ' * 2
1216 
1217   seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1218   gchar = (options[:gap_char]  or '.')
1219   pchar = (options[:padding_char] or '~')
1220 
1221   aseqs = Array.new(self.number_of_sequences).clear
1222   self.each_seq do |s|
1223     aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1224   end
1225   aseqs.each do |s|
1226     s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
1227     s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
1228     s << (pchar * (len - s.length))
1229   end
1230 
1231   case options[:case].to_s
1232   when /lower/i
1233     aseqs.each { |s| s.downcase! }
1234   when /upper/i
1235     aseqs.each { |s| s.upcase! }
1236   else #default upcase
1237     aseqs.each { |s| s.upcase! }
1238   end
1239 
1240   case options[:type].to_s
1241   when /protein/i, /aa/i
1242     amino = true
1243   when /na/i
1244     amino = false
1245   else
1246     if seqclass == Bio::Sequence::AA then
1247       amino = true
1248     elsif seqclass == Bio::Sequence::NA then
1249       amino = false
1250     else
1251       # if we can't determine, we asuume as protein.
1252       amino = aseqs.size
1253       aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
1254       amino = false if amino <= 0
1255     end
1256   end
1257 
1258   seq_type = (amino ? 'P' : 'N')
1259 
1260   fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
1261   dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
1262 
1263   sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
1264   #sums = aseqs.collect { |s| 0 }
1265   sum = 0; sums.each { |x| sum += x }; sum %= 10000
1266   msf =
1267     [
1268      "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
1269      "\n",
1270      "\n",
1271      " #{fn}  MSF: #{len}  Type: #{seq_type}  #{dt}  Check: #{sum} ..\n",
1272      "\n"
1273     ]
1274 
1275   sn.each do |snx|
1276     msf << ' Name: ' +
1277       sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
1278       "  Len: #{len}  Check: #{sums.shift}  Weight: 1.00\n"
1279   end
1280   msf << "\n//\n"
1281 
1282   aseqs.collect! do |s|
1283     snx = sn.shift
1284     head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
1285     s.gsub!(seqregexp, "\\1\n")
1286     a = s.split(/^/)
1287     a.collect { |x| head + x }
1288   end
1289   lines = (len + seqwidth - 1).div(seqwidth)
1290   i = 1
1291   lines.times do
1292     msf << "\n"
1293     n_l = i
1294     n_r = [ i + seqwidth - 1, len ].min
1295     if n_l != n_r then
1296       w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
1297       msf << (' ' * namewidth + sep + n_l.to_s + 
1298               ' ' * w + n_r.to_s + "\n")
1299     else
1300       msf << (' ' * namewidth + sep + n_l.to_s + "\n")
1301     end
1302     aseqs.each { |a| msf << a.shift }
1303     i += seqwidth
1304   end
1305   msf << "\n"
1306   msf.join('')
1307 end

Generates msf formatted text as a string

output_phylip (options = {})

Source

     # File lib/bio/alignment.rb
1082 def output_phylip(options = {})
1083   aln, aseqs, lines = __output_phylip_common(options)
1084   lines.times do
1085     aseqs.each { |a| aln << a.shift }
1086     aln << "\n"
1087   end
1088   aln.pop if aln[-1] == "\n"
1089   aln.join('')
1090 end

generates phylip interleaved alignment format as a string

output_phylipnon (options = {})

Source

     # File lib/bio/alignment.rb
1093 def output_phylipnon(options = {})
1094   aln, aseqs, _ = __output_phylip_common(options)
1095   aln.first + aseqs.join('')
1096 end

generates Phylip3.2 (old) non-interleaved format as a string

to_clustal (*arg)

Source

     # File lib/bio/alignment.rb
1053 def to_clustal(*arg)
1054   warn "to_clustal is deprecated. Please use output_clustal."
1055   output_clustal(*arg)
1056 end

# to_clustal is deprecated. Instead, please use output_clustal. +