class Bio::GCG::Msf
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf
is a msf format parser.
Constants
- DELIMITER
delimiter used by
Bio::FlatFile
Attributes
checksum[R]
checksum
date[R]
date
description[R]
description
entry_id[R]
ID of the alignment
heading[R]
heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)
length[R]
alignment length
seq_type[R]
sequence type (“N” for DNA/RNA or “P” for protein)
Public Class Methods
new(str)
click to toggle source
Creates a new Msf
object.
# File lib/bio/appl/gcg/msf.rb 31 def initialize(str) 32 str = str.sub(/\A[\r\n]+/, '') 33 preamble, @data = str.split(/^\/\/$/, 2) 34 preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') 35 @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this 36 preamble.sub!(/.*\.\.\s*$/m, '') 37 @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s 38 d = $&.to_s 39 if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then 40 @entry_id = m[1].to_s.strip 41 @length = (m[2] ? m[2].to_i : nil) 42 @seq_type = m[3] 43 @date = m[4].to_s.strip 44 @checksum = (m[6] ? m[6].to_i : nil) 45 end 46 47 @seq_info = [] 48 preamble.each_line do |x| 49 if /Name\: / =~ x then 50 s = {} 51 x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } 52 @seq_info << s 53 end 54 end 55 56 @description.sub!(/\A(\r\n|\r|\n)/, '') 57 @align = nil 58 end
Public Instance Methods
alignment()
click to toggle source
returns Bio::Alignment
object.
# File lib/bio/appl/gcg/msf.rb 176 def alignment 177 do_parse 178 @align 179 end
compcheck()
click to toggle source
CompCheck field
# File lib/bio/appl/gcg/msf.rb 118 def compcheck 119 unless defined?(@compcheck) 120 if /CompCheck\: +(\d+)/ =~ @description then 121 @compcheck = $1.to_i 122 else 123 @compcheck = nil 124 end 125 end 126 @compcheck 127 end
gap_length_weight()
click to toggle source
gap length weight
# File lib/bio/appl/gcg/msf.rb 109 def gap_length_weight 110 unless defined?(@gap_length_weight) 111 /GapLengthWeight\: +(\S+)/ =~ @description 112 @gap_length_weight = $1 113 end 114 @gap_length_weight 115 end
gap_weight()
click to toggle source
gap weight
# File lib/bio/appl/gcg/msf.rb 100 def gap_weight 101 unless defined?(@gap_weight) 102 /GapWeight\: +(\S+)/ =~ @description 103 @gap_weight = $1 104 end 105 @gap_weight 106 end
seq_data()
click to toggle source
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb 182 def seq_data 183 do_parse 184 @seq_data 185 end
symbol_comparison_table()
click to toggle source
symbol comparison table
# File lib/bio/appl/gcg/msf.rb 91 def symbol_comparison_table 92 unless defined?(@symbol_comparison_table) 93 /Symbol comparison table\: +(\S+)/ =~ @description 94 @symbol_comparison_table = $1 95 end 96 @symbol_comparison_table 97 end
validate_checksum()
click to toggle source
validates checksum
# File lib/bio/appl/gcg/msf.rb 188 def validate_checksum 189 do_parse 190 valid = true 191 total = 0 192 @seq_data.each_with_index do |x, i| 193 sum = Bio::GCG::Seq.calc_checksum(x) 194 if sum != @seq_info[i]['Check'].to_i 195 valid = false 196 break 197 end 198 total += sum 199 end 200 return false unless valid 201 if @checksum != 0 # "Check:" field of BioPerl is always 0 202 valid = ((total % 10000) == @checksum) 203 end 204 valid 205 end