module Bio::Alignment::Output
Public Instance Methods
__output_phylip_common(options = {})
click to toggle source
common routine for interleaved/non-interleaved phylip format
# File lib/bio/alignment.rb 1099 def __output_phylip_common(options = {}) 1100 len = self.alignment_length 1101 aln = [ " #{self.number_of_sequences} #{len}\n" ] 1102 sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } 1103 if options[:replace_space] 1104 sn.collect! { |x| x.gsub(/\s/, '_') } 1105 end 1106 if !options.has_key?(:escape) or options[:escape] 1107 sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1108 end 1109 if !options.has_key?(:split) or options[:split] 1110 sn.collect! { |x| x.split(/\s/)[0].to_s } 1111 end 1112 if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1113 sn = __clustal_avoid_same_name(sn, 10) 1114 end 1115 1116 namewidth = 10 1117 seqwidth = (options[:width] or 60) 1118 seqwidth = seqwidth.div(10) * 10 1119 seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})") 1120 gchar = (options[:gap_char] or '-') 1121 1122 aseqs = Array.new(self.number_of_sequences).clear 1123 self.each_seq do |s| 1124 aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1125 end 1126 case options[:case].to_s 1127 when /lower/i 1128 aseqs.each { |s| s.downcase! } 1129 when /upper/i 1130 aseqs.each { |s| s.upcase! } 1131 end 1132 1133 aseqs.collect! do |s| 1134 snx = sn.shift 1135 head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] 1136 head2 = ' ' * namewidth 1137 s << (gchar * (len - s.length)) 1138 s.gsub!(/(.{1,10})/n, " \\1") 1139 s.gsub!(seqregexp, "\\1\n") 1140 a = s.split(/^/) 1141 head += a.shift 1142 ret = a.collect { |x| head2 + x } 1143 ret.unshift(head) 1144 ret 1145 end 1146 lines = (len + seqwidth - 1).div(seqwidth) 1147 [ aln, aseqs, lines ] 1148 end
output(format, *arg)
click to toggle source
# File lib/bio/alignment.rb 873 def output(format, *arg) 874 case format 875 when :clustal 876 output_clustal(*arg) 877 when :fasta 878 output_fasta(*arg) 879 when :phylip 880 output_phylip(*arg) 881 when :phylipnon 882 output_phylipnon(*arg) 883 when :msf 884 output_msf(*arg) 885 when :molphy 886 output_molphy(*arg) 887 else 888 raise "Unknown format: #{format.inspect}" 889 end 890 end
output_clustal(options = {})
click to toggle source
Generates ClustalW-formatted text
- seqs
-
sequences (must be an alignment object)
- names
-
names of the sequences
- options
-
options
# File lib/bio/alignment.rb 1045 def output_clustal(options = {}) 1046 __clustal_formatter(self, self.sequence_names, options) 1047 end
output_fasta(options={})
click to toggle source
Generates fasta format text and returns a string.
# File lib/bio/alignment.rb 1059 def output_fasta(options={}) 1060 #(original) 1061 width = (options[:width] or 70) 1062 if options[:avoid_same_name] then 1063 na = __clustal_avoid_same_name(self.sequence_names, 30) 1064 else 1065 na = self.sequence_names.collect do |k| 1066 k.to_s.gsub(/[\r\n\x00]/, ' ') 1067 end 1068 end 1069 if width and width > 0 then 1070 w_reg = Regexp.new(".{1,#{width}}") 1071 self.collect do |s| 1072 ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n") 1073 end.join('') 1074 else 1075 self.collect do |s| 1076 ">#{na.shift}\n" + s.to_s + "\n" 1077 end.join('') 1078 end 1079 end
output_molphy(options = {})
click to toggle source
Generates Molphy alignment format text as a string
# File lib/bio/alignment.rb 1151 def output_molphy(options = {}) 1152 len = self.alignment_length 1153 header = "#{self.number_of_sequences} #{len}\n" 1154 sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } 1155 if options[:replace_space] 1156 sn.collect! { |x| x.gsub(/\s/, '_') } 1157 end 1158 if !options.has_key?(:escape) or options[:escape] 1159 sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1160 end 1161 if !options.has_key?(:split) or options[:split] 1162 sn.collect! { |x| x.split(/\s/)[0].to_s } 1163 end 1164 if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1165 sn = __clustal_avoid_same_name(sn, 30) 1166 end 1167 1168 seqwidth = (options[:width] or 60) 1169 seqregexp = Regexp.new("(.{1,#{seqwidth}})") 1170 gchar = (options[:gap_char] or '-') 1171 1172 aseqs = Array.new(len).clear 1173 self.each_seq do |s| 1174 aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1175 end 1176 case options[:case].to_s 1177 when /lower/i 1178 aseqs.each { |s| s.downcase! } 1179 when /upper/i 1180 aseqs.each { |s| s.upcase! } 1181 end 1182 1183 aseqs.collect! do |s| 1184 s << (gchar * (len - s.length)) 1185 s.gsub!(seqregexp, "\\1\n") 1186 sn.shift + "\n" + s 1187 end 1188 aseqs.unshift(header) 1189 aseqs.join('') 1190 end
output_msf(options = {})
click to toggle source
Generates msf formatted text as a string
# File lib/bio/alignment.rb 1193 def output_msf(options = {}) 1194 len = self.seq_length 1195 1196 if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1197 sn = __clustal_avoid_same_name(self.sequence_names) 1198 else 1199 sn = self.sequence_names.collect do |x| 1200 x.to_s.gsub(/[\r\n\x00]/, ' ') 1201 end 1202 end 1203 if !options.has_key?(:replace_space) or options[:replace_space] 1204 sn.collect! { |x| x.gsub(/\s/, '_') } 1205 end 1206 if !options.has_key?(:escape) or options[:escape] 1207 sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1208 end 1209 if !options.has_key?(:split) or options[:split] 1210 sn.collect! { |x| x.split(/\s/)[0].to_s } 1211 end 1212 1213 seqwidth = 50 1214 namewidth = [31, sn.collect { |x| x.length }.max ].min 1215 sep = ' ' * 2 1216 1217 seqregexp = Regexp.new("(.{1,#{seqwidth}})") 1218 gchar = (options[:gap_char] or '.') 1219 pchar = (options[:padding_char] or '~') 1220 1221 aseqs = Array.new(self.number_of_sequences).clear 1222 self.each_seq do |s| 1223 aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1224 end 1225 aseqs.each do |s| 1226 s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length } 1227 s.sub!(/#{Regexp.escape(gchar)}+\z/, '') 1228 s << (pchar * (len - s.length)) 1229 end 1230 1231 case options[:case].to_s 1232 when /lower/i 1233 aseqs.each { |s| s.downcase! } 1234 when /upper/i 1235 aseqs.each { |s| s.upcase! } 1236 else #default upcase 1237 aseqs.each { |s| s.upcase! } 1238 end 1239 1240 case options[:type].to_s 1241 when /protein/i, /aa/i 1242 amino = true 1243 when /na/i 1244 amino = false 1245 else 1246 if seqclass == Bio::Sequence::AA then 1247 amino = true 1248 elsif seqclass == Bio::Sequence::NA then 1249 amino = false 1250 else 1251 # if we can't determine, we asuume as protein. 1252 amino = aseqs.size 1253 aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x } 1254 amino = false if amino <= 0 1255 end 1256 end 1257 1258 seq_type = (amino ? 'P' : 'N') 1259 1260 fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf') 1261 dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M') 1262 1263 sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) } 1264 #sums = aseqs.collect { |s| 0 } 1265 sum = 0; sums.each { |x| sum += x }; sum %= 10000 1266 msf = 1267 [ 1268 "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n", 1269 "\n", 1270 "\n", 1271 " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n", 1272 "\n" 1273 ] 1274 1275 sn.each do |snx| 1276 msf << ' Name: ' + 1277 sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] + 1278 " Len: #{len} Check: #{sums.shift} Weight: 1.00\n" 1279 end 1280 msf << "\n//\n" 1281 1282 aseqs.collect! do |s| 1283 snx = sn.shift 1284 head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep 1285 s.gsub!(seqregexp, "\\1\n") 1286 a = s.split(/^/) 1287 a.collect { |x| head + x } 1288 end 1289 lines = (len + seqwidth - 1).div(seqwidth) 1290 i = 1 1291 lines.times do 1292 msf << "\n" 1293 n_l = i 1294 n_r = [ i + seqwidth - 1, len ].min 1295 if n_l != n_r then 1296 w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max 1297 msf << (' ' * namewidth + sep + n_l.to_s + 1298 ' ' * w + n_r.to_s + "\n") 1299 else 1300 msf << (' ' * namewidth + sep + n_l.to_s + "\n") 1301 end 1302 aseqs.each { |a| msf << a.shift } 1303 i += seqwidth 1304 end 1305 msf << "\n" 1306 msf.join('') 1307 end
output_phylip(options = {})
click to toggle source
generates phylip interleaved alignment format as a string
# File lib/bio/alignment.rb 1082 def output_phylip(options = {}) 1083 aln, aseqs, lines = __output_phylip_common(options) 1084 lines.times do 1085 aseqs.each { |a| aln << a.shift } 1086 aln << "\n" 1087 end 1088 aln.pop if aln[-1] == "\n" 1089 aln.join('') 1090 end
output_phylipnon(options = {})
click to toggle source
generates Phylip3.2 (old) non-interleaved format as a string
# File lib/bio/alignment.rb 1093 def output_phylipnon(options = {}) 1094 aln, aseqs, _ = __output_phylip_common(options) 1095 aln.first + aseqs.join('') 1096 end
to_clustal(*arg)
click to toggle source
# to_clustal
is deprecated. Instead, please use output_clustal. +
# File lib/bio/alignment.rb 1053 def to_clustal(*arg) 1054 warn "to_clustal is deprecated. Please use output_clustal." 1055 output_clustal(*arg) 1056 end