class Bio::GCG::Msf

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Constants

DELIMITER

delimiter used by Bio::FlatFile

Attributes

checksum[R]

checksum

date[R]

date

description[R]

description

entry_id[R]

ID of the alignment

heading[R]

heading ('!!NA_MULTIPLE_ALIGNMENT 1.0' or whatever like this)

length[R]

alignment length

seq_type[R]

sequence type (ā€œNā€ for DNA/RNA or ā€œPā€ for protein)

Public Class Methods

new(str) click to toggle source

Creates a new Msf object.

# File lib/bio/appl/gcg/msf.rb, line 31
def initialize(str)
  str = str.sub(/\A[\r\n]+/, '')
  preamble, @data = str.split(/^\/\/$/, 2)
  preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '')
  @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
  preamble.sub!(/.*\.\.\s*$/m, '')
  @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s
  d = $&.to_s
  if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
    @entry_id = m[1].to_s.strip
    @length   = (m[2] ? m[2].to_i : nil)
    @seq_type = m[3]
    @date     = m[4].to_s.strip
    @checksum = (m[6] ? m[6].to_i : nil)
  end

  @seq_info = []
  preamble.each_line do |x|
    if /Name\: / =~ x then
      s = {}
      x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
      @seq_info << s
    end
  end

  @description.sub!(/\A(\r\n|\r|\n)/, '')
  @align = nil
end

Public Instance Methods

alignment() click to toggle source

returns Bio::Alignment object.

# File lib/bio/appl/gcg/msf.rb, line 176
def alignment
  do_parse
  @align
end
compcheck() click to toggle source

CompCheck field

# File lib/bio/appl/gcg/msf.rb, line 118
def compcheck
  unless defined?(@compcheck)
    if /CompCheck\: +(\d+)/ =~ @description then
      @compcheck = $1.to_i
    else
      @compcheck = nil
    end
  end
  @compcheck
end
gap_length_weight() click to toggle source

gap length weight

# File lib/bio/appl/gcg/msf.rb, line 109
def gap_length_weight
  unless defined?(@gap_length_weight)
    /GapLengthWeight\: +(\S+)/ =~ @description
    @gap_length_weight = $1
  end
  @gap_length_weight
end
gap_weight() click to toggle source

gap weight

# File lib/bio/appl/gcg/msf.rb, line 100
def gap_weight
  unless defined?(@gap_weight)
    /GapWeight\: +(\S+)/ =~ @description
    @gap_weight = $1
  end
  @gap_weight
end
seq_data() click to toggle source

gets seq data (used internally) (will be obsoleted)

# File lib/bio/appl/gcg/msf.rb, line 182
def seq_data
  do_parse
  @seq_data
end
symbol_comparison_table() click to toggle source

symbol comparison table

# File lib/bio/appl/gcg/msf.rb, line 91
def symbol_comparison_table
  unless defined?(@symbol_comparison_table)
    /Symbol comparison table\: +(\S+)/ =~ @description
    @symbol_comparison_table = $1
  end
  @symbol_comparison_table
end
validate_checksum() click to toggle source

validates checksum

# File lib/bio/appl/gcg/msf.rb, line 188
def validate_checksum
  do_parse
  valid = true
  total = 0
  @seq_data.each_with_index do |x, i|
    sum = Bio::GCG::Seq.calc_checksum(x)
    if sum != @seq_info[i]['Check'].to_i
      valid = false
      break
    end
    total += sum
  end
  return false unless valid
  if @checksum != 0 # "Check:" field of BioPerl is always 0
    valid = ((total % 10000) == @checksum)
  end
  valid
end