class Bio::GCG::Msf

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Constants

DELIMITER

delimiter used by Bio::FlatFile

Attributes

checksum[R]

checksum

date[R]

date

description[R]

description

entry_id[R]

ID of the alignment

heading[R]

heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)

length[R]

alignment length

seq_type[R]

sequence type (“N” for DNA/RNA or “P” for protein)

Public Class Methods

new(str) click to toggle source

Creates a new Msf object.

   # File lib/bio/appl/gcg/msf.rb
31 def initialize(str)
32   str = str.sub(/\A[\r\n]+/, '')
33   preamble, @data = str.split(/^\/\/$/, 2)
34   preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '')
35   @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
36   preamble.sub!(/.*\.\.\s*$/m, '')
37   @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s
38   d = $&.to_s
39   if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
40     @entry_id = m[1].to_s.strip
41     @length   = (m[2] ? m[2].to_i : nil)
42     @seq_type = m[3]
43     @date     = m[4].to_s.strip
44     @checksum = (m[6] ? m[6].to_i : nil)
45   end
46 
47   @seq_info = []
48   preamble.each_line do |x|
49     if /Name\: / =~ x then
50       s = {}
51       x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
52       @seq_info << s
53     end
54   end
55 
56   @description.sub!(/\A(\r\n|\r|\n)/, '')
57   @align = nil
58 end

Public Instance Methods

alignment() click to toggle source

returns Bio::Alignment object.

    # File lib/bio/appl/gcg/msf.rb
176 def alignment
177   do_parse
178   @align
179 end
compcheck() click to toggle source

CompCheck field

    # File lib/bio/appl/gcg/msf.rb
118 def compcheck
119   unless defined?(@compcheck)
120     if /CompCheck\: +(\d+)/ =~ @description then
121       @compcheck = $1.to_i
122     else
123       @compcheck = nil
124     end
125   end
126   @compcheck
127 end
gap_length_weight() click to toggle source

gap length weight

    # File lib/bio/appl/gcg/msf.rb
109 def gap_length_weight
110   unless defined?(@gap_length_weight)
111     /GapLengthWeight\: +(\S+)/ =~ @description
112     @gap_length_weight = $1
113   end
114   @gap_length_weight
115 end
gap_weight() click to toggle source

gap weight

    # File lib/bio/appl/gcg/msf.rb
100 def gap_weight
101   unless defined?(@gap_weight)
102     /GapWeight\: +(\S+)/ =~ @description
103     @gap_weight = $1
104   end
105   @gap_weight
106 end
seq_data() click to toggle source

gets seq data (used internally) (will be obsoleted)

    # File lib/bio/appl/gcg/msf.rb
182 def seq_data
183   do_parse
184   @seq_data
185 end
symbol_comparison_table() click to toggle source

symbol comparison table

   # File lib/bio/appl/gcg/msf.rb
91 def symbol_comparison_table
92   unless defined?(@symbol_comparison_table)
93     /Symbol comparison table\: +(\S+)/ =~ @description
94     @symbol_comparison_table = $1
95   end
96   @symbol_comparison_table
97 end
validate_checksum() click to toggle source

validates checksum

    # File lib/bio/appl/gcg/msf.rb
188 def validate_checksum
189   do_parse
190   valid = true
191   total = 0
192   @seq_data.each_with_index do |x, i|
193     sum = Bio::GCG::Seq.calc_checksum(x)
194     if sum != @seq_info[i]['Check'].to_i
195       valid = false
196       break
197     end
198     total += sum
199   end
200   return false unless valid
201   if @checksum != 0 # "Check:" field of BioPerl is always 0
202     valid = ((total % 10000) == @checksum)
203   end
204   valid
205 end