class Bio::PSORT::PSORT2::Report

Bio::PSORT::PSORT2::Report

Report parser classe for PSORT II(PSORT2).

Example

Constants

BOUNDARY

Report boundary string.

RS

Report delimiter.

Attributes

definition[RW]

Definition of query sequence.

entry_id[RW]

entry_id of query sequence.

features[RW]

Feature vector used the kNN prediction.

k[RW]

k parameter of k-nearest neighbors classifier.

pred[RW]

Predicted subcellular localization (three letters code).

prob[RW]

Probability vector of kNN prediction.

raw[RW]

Raw text of output report.

scl[RW]

Given subcellular localization (three letters code).

seq[RW]

Sequence of query sequence.

Public Class Methods

default_parser(ent, entry_id = nil) click to toggle source

Parser for the default report format. “psort report” output.

    # File lib/bio/appl/psort/report.rb
273 def self.default_parser(ent, entry_id = nil)
274   report = self.new(ent, entry_id)
275   ent = ent.split(/\n\n/).map {|e| e.chomp }
276 
277   report.set_header_line(ent[0])
278 
279   # feature matrix
280   ent[1].gsub(/\n/,' ').strip.split(/  /).map {|fe|
281     pair = fe.split(/: /)
282     report.features[pair[0].strip] = pair[1].strip.to_f
283   }
284 
285   report.prob = self.set_kNN_prob(ent[2])
286   report.set_prediction(ent[3])         
287 
288   return report
289 end
divent(entry) click to toggle source

Divides entry body

    # File lib/bio/appl/psort/report.rb
392 def self.divent(entry)
393   boundary = entry.index(BOUNDARY)
394   return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
395 end
new(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) click to toggle source

Constructs aBio::PSORT::PSORT2::Report object.

    # File lib/bio/appl/psort/report.rb
227 def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 
228                seq = nil, k = nil, features = {}, prob = {}, pred = nil)
229   @entry_id   = entry_id
230   @scl        = scl
231   @definition = definition
232   @seq        = seq
233   @features   = features
234   @prob       = prob
235   @pred       = pred
236   @k          = k
237   @raw        = raw
238 end
parser(str, entry_id) click to toggle source

Parses output report with output format detection automatically.

    # File lib/bio/appl/psort/report.rb
242 def self.parser(str, entry_id)
243   case str
244   when /^ psg:/   # default report
245     self.default_parser(str, entry_id)
246   when /^PSG:/    # -v report
247     self.v_parser(str, entry_id)
248   when /: too short length /
249     self.too_short_parser(str, entry_id)
250   when /PSORT II server/
251     tmp = self.new(ent, entry_id)
252   else
253     raise ArgumentError, "invalid format\n[#{str}]"
254   end
255 end
set_kNN_prob(str) click to toggle source

Returns @prob value.

    # File lib/bio/appl/psort/report.rb
309 def self.set_kNN_prob(str)
310   prob = Hash.new
311   Bio::PSORT::PSORT2::SclNames.keys.each {|a| 
312     prob.update( {a => 0.0} )
313   }
314   str.gsub(/\t/,'').split(/\n/).each {|a|
315     val,scl = a.strip.split(/ %: /)
316     key = Bio::PSORT::PSORT2::SclNames.index(scl)
317     prob[key] = val.to_f
318   }
319   return prob
320 end
too_short_parser(ent, entry_id = nil) click to toggle source

Parser for “too short length” report.

$id: too short length ($leng), skipped\n";
    # File lib/bio/appl/psort/report.rb
260 def self.too_short_parser(ent, entry_id = nil)
261   report = self.new(ent)
262   report.entry_id = entry_id
263   if ent =~ /^(.+)?: too short length/
264     report.entry_id = $1 unless report.entry_id
265     report.scl = '---'
266   end
267   report
268 end
v_parser(ent, entry_id = nil) click to toggle source

Parser for the verbose output report format. “psort -v report” and WWW server output.

    # File lib/bio/appl/psort/report.rb
338 def self.v_parser(ent, entry_id = nil)
339   report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
340 
341   ent = ent.split(/\n\n/).map {|e| e.chomp }
342   ent.each_with_index {|e, i|
343     unless /^(\w|-|\>|\t)/ =~ e
344       j = self.__send__(:search_j, i, ent)
345       ent[i - j] += e
346       ent[i] = nil
347     end
348     if /^none/ =~ e    # psort output bug
349       j = self.__send__(:search_j, i, ent)
350       ent[i - j] += e
351       ent[i] = nil
352     end
353   }
354   ent.compact!
355 
356   if /^ PSORT II server/ =~ ent[0] # for WWW version
357     ent.shift 
358     delline = ''
359     ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
360     i = ent.index(delline)
361     ent.delete(delline)
362     ent.delete_at(i - 1)
363   end
364 
365   report.set_header_line(ent.shift)  
366   report.seq = Bio::Sequence::AA.new(ent.shift)
367 
368   fent, pent = self.divent(ent)
369   report.set_features(fent)          
370   report.prob = self.set_kNN_prob(pent[0].strip)  
371   report.set_prediction(pent[1].strip)
372 
373   return report
374 end

Public Instance Methods

set_features(features_ary) click to toggle source

Sets @features values.

    # File lib/bio/appl/psort/report.rb
398 def set_features(features_ary)
399   features_ary.each {|fent|
400     key = fent.split(/\:( |\n)/)[0].strip
401     self.features[key] = fent # unless /^\>/ =~ key
402   }
403   self.features['AA'] = self.seq.length
404 end
set_header_line(str) click to toggle source

Returns header information.

    # File lib/bio/appl/psort/report.rb
292 def set_header_line(str)
293   str.sub!(/^-+\n/,'')
294   tmp = str.split(/\t| /)
295   @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
296 
297   case tmp.join(' ').chomp
298   when /\(\d+ aa\) (.+)$/
299     @definition = $1
300   else
301     @definition = tmp.join(' ').chomp
302   end
303   scl = @definition.split(' ')[0]
304 
305   @scl = scl if SclNames.keys.index(scl)
306 end
set_prediction(str) click to toggle source

Returns @prob and @k values.

    # File lib/bio/appl/psort/report.rb
323 def set_prediction(str)
324   case str
325   when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
326     @entry_id ||= $1 unless @entry_id
327     @pred = $2
328     @k    = $3
329   else
330     raise ArgumentError, 
331       "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
332   end
333 end