class Bio::PSORT::PSORT2::Report

Bio::PSORT::PSORT2::Report

Report parser classe for PSORT II(PSORT2).

Example

Constants

BOUNDARY

Report boundary string.

RS

Report delimiter.

Attributes

definition[RW]

Definition of query sequence.

entry_id[RW]

#entry_id of query sequence.

features[RW]

Feature vector used the kNN prediction.

k[RW]

k parameter of k-nearest neighbors classifier.

pred[RW]

Predicted subcellular localization (three letters code).

prob[RW]

Probability vector of kNN prediction.

raw[RW]

Raw text of output report.

scl[RW]

Given subcellular localization (three letters code).

seq[RW]

Sequence of query sequence.

Public Class Methods

default_parser(ent, entry_id = nil) click to toggle source

Parser for the default report format. “psort report'' output.

# File lib/bio/appl/psort/report.rb, line 273
def self.default_parser(ent, entry_id = nil)
  report = self.new(ent, entry_id)
  ent = ent.split(/\n\n/).map {|e| e.chomp }

  report.set_header_line(ent[0])

  # feature matrix
  ent[1].gsub(/\n/,' ').strip.split(/  /).map {|fe|
    pair = fe.split(/: /)
    report.features[pair[0].strip] = pair[1].strip.to_f
  }

  report.prob = self.set_kNN_prob(ent[2])
  report.set_prediction(ent[3])         

  return report
end
divent(entry) click to toggle source

Divides entry body

# File lib/bio/appl/psort/report.rb, line 392
def self.divent(entry)
  boundary = entry.index(BOUNDARY)
  return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
end
new(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) click to toggle source

Constructs aBio::PSORT::PSORT2::Report object.

# File lib/bio/appl/psort/report.rb, line 227
def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 
               seq = nil, k = nil, features = {}, prob = {}, pred = nil)
  @entry_id   = entry_id
  @scl        = scl
  @definition = definition
  @seq        = seq
  @features   = features
  @prob       = prob
  @pred       = pred
  @k          = k
  @raw        = raw
end
parser(str, entry_id) click to toggle source

Parses output report with output format detection automatically.

# File lib/bio/appl/psort/report.rb, line 242
def self.parser(str, entry_id)
  case str
  when /^ psg:/   # default report
    self.default_parser(str, entry_id)
  when /^PSG:/    # -v report
    self.v_parser(str, entry_id)
  when /: too short length /
    self.too_short_parser(str, entry_id)
  when /PSORT II server/
    tmp = self.new(ent, entry_id)
  else
    raise ArgumentError, "invalid format\n[#{str}]"
  end
end
set_kNN_prob(str) click to toggle source

Returns @prob value.

# File lib/bio/appl/psort/report.rb, line 309
def self.set_kNN_prob(str)
  prob = Hash.new
  Bio::PSORT::PSORT2::SclNames.keys.each {|a| 
    prob.update( {a => 0.0} )
  }
  str.gsub(/\t/,'').split(/\n/).each {|a|
    val,scl = a.strip.split(/ %: /)
    key = Bio::PSORT::PSORT2::SclNames.index(scl)
    prob[key] = val.to_f
  }
  return prob
end
too_short_parser(ent, entry_id = nil) click to toggle source

Parser for “too short length'' report.

$id: too short length ($leng), skipped\n";
# File lib/bio/appl/psort/report.rb, line 260
def self.too_short_parser(ent, entry_id = nil)
  report = self.new(ent)
  report.entry_id = entry_id
  if ent =~ /^(.+)?: too short length/
    report.entry_id = $1 unless report.entry_id
    report.scl = '---'
  end
  report
end
v_parser(ent, entry_id = nil) click to toggle source

Parser for the verbose output report format. “psort -v report'' and WWW server output.

# File lib/bio/appl/psort/report.rb, line 338
def self.v_parser(ent, entry_id = nil)
  report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)

  ent = ent.split(/\n\n/).map {|e| e.chomp }
  ent.each_with_index {|e, i|
    unless /^(\w|-|\>|\t)/ =~ e
      j = self.__send__(:search_j, i, ent)
      ent[i - j] += e
      ent[i] = nil
    end
    if /^none/ =~ e    # psort output bug
      j = self.__send__(:search_j, i, ent)
      ent[i - j] += e
      ent[i] = nil
    end
  }
  ent.compact!

  if /^ PSORT II server/ =~ ent[0] # for WWW version
    ent.shift 
    delline = ''
    ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
    i = ent.index(delline)
    ent.delete(delline)
    ent.delete_at(i - 1)
  end

  report.set_header_line(ent.shift)  
  report.seq = Bio::Sequence::AA.new(ent.shift)

  fent, pent = self.divent(ent)
  report.set_features(fent)          
  report.prob = self.set_kNN_prob(pent[0].strip)  
  report.set_prediction(pent[1].strip)

  return report
end

Public Instance Methods

set_features(features_ary) click to toggle source

Sets @features values.

# File lib/bio/appl/psort/report.rb, line 398
def set_features(features_ary)
  features_ary.each {|fent|
    key = fent.split(/\:( |\n)/)[0].strip
    self.features[key] = fent # unless /^\>/ =~ key
  }
  self.features['AA'] = self.seq.length
end
set_header_line(str) click to toggle source

Returns header information.

# File lib/bio/appl/psort/report.rb, line 292
def set_header_line(str)
  str.sub!(/^-+\n/,'')
  tmp = str.split(/\t| /)
  @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id

  case tmp.join(' ').chomp
  when /\(\d+ aa\) (.+)$/
    @definition = $1
  else
    @definition = tmp.join(' ').chomp
  end
  scl = @definition.split(' ')[0]

  @scl = scl if SclNames.keys.index(scl)
end
set_prediction(str) click to toggle source

Returns @prob and @k values.

# File lib/bio/appl/psort/report.rb, line 323
def set_prediction(str)
  case str
  when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
    @entry_id ||= $1 unless @entry_id
    @pred = $2
    @k    = $3
  else
    raise ArgumentError, 
      "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
  end
end