class Bio::PSORT::PSORT2::Report
Bio::PSORT::PSORT2::Report
¶ ↑
Report
parser classe for PSORT
II(PSORT2
).
Example¶ ↑
Constants
Attributes
Definition of query sequence.
entry_id
of query sequence.
Feature
vector used the kNN prediction.
k parameter of k-nearest neighbors classifier.
Predicted subcellular localization (three letters code).
Probability vector of kNN prediction.
Raw text of output report.
Given subcellular localization (three letters code).
Sequence
of query sequence.
Public Class Methods
Parser for the default report format. “psort report” output.
# File lib/bio/appl/psort/report.rb 273 def self.default_parser(ent, entry_id = nil) 274 report = self.new(ent, entry_id) 275 ent = ent.split(/\n\n/).map {|e| e.chomp } 276 277 report.set_header_line(ent[0]) 278 279 # feature matrix 280 ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe| 281 pair = fe.split(/: /) 282 report.features[pair[0].strip] = pair[1].strip.to_f 283 } 284 285 report.prob = self.set_kNN_prob(ent[2]) 286 report.set_prediction(ent[3]) 287 288 return report 289 end
Divides entry body
# File lib/bio/appl/psort/report.rb 392 def self.divent(entry) 393 boundary = entry.index(BOUNDARY) 394 return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)] 395 end
Constructs aBio::PSORT::PSORT2::Report object.
# File lib/bio/appl/psort/report.rb 227 def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 228 seq = nil, k = nil, features = {}, prob = {}, pred = nil) 229 @entry_id = entry_id 230 @scl = scl 231 @definition = definition 232 @seq = seq 233 @features = features 234 @prob = prob 235 @pred = pred 236 @k = k 237 @raw = raw 238 end
Parses output report with output format detection automatically.
# File lib/bio/appl/psort/report.rb 242 def self.parser(str, entry_id) 243 case str 244 when /^ psg:/ # default report 245 self.default_parser(str, entry_id) 246 when /^PSG:/ # -v report 247 self.v_parser(str, entry_id) 248 when /: too short length / 249 self.too_short_parser(str, entry_id) 250 when /PSORT II server/ 251 tmp = self.new(ent, entry_id) 252 else 253 raise ArgumentError, "invalid format\n[#{str}]" 254 end 255 end
Returns @prob value.
# File lib/bio/appl/psort/report.rb 309 def self.set_kNN_prob(str) 310 prob = Hash.new 311 Bio::PSORT::PSORT2::SclNames.keys.each {|a| 312 prob.update( {a => 0.0} ) 313 } 314 str.gsub(/\t/,'').split(/\n/).each {|a| 315 val,scl = a.strip.split(/ %: /) 316 key = Bio::PSORT::PSORT2::SclNames.index(scl) 317 prob[key] = val.to_f 318 } 319 return prob 320 end
Parser for “too short length” report.
$id: too short length ($leng), skipped\n";
# File lib/bio/appl/psort/report.rb 260 def self.too_short_parser(ent, entry_id = nil) 261 report = self.new(ent) 262 report.entry_id = entry_id 263 if ent =~ /^(.+)?: too short length/ 264 report.entry_id = $1 unless report.entry_id 265 report.scl = '---' 266 end 267 report 268 end
Parser for the verbose output report format. “psort -v report” and WWW server output.
# File lib/bio/appl/psort/report.rb 338 def self.v_parser(ent, entry_id = nil) 339 report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) 340 341 ent = ent.split(/\n\n/).map {|e| e.chomp } 342 ent.each_with_index {|e, i| 343 unless /^(\w|-|\>|\t)/ =~ e 344 j = self.__send__(:search_j, i, ent) 345 ent[i - j] += e 346 ent[i] = nil 347 end 348 if /^none/ =~ e # psort output bug 349 j = self.__send__(:search_j, i, ent) 350 ent[i - j] += e 351 ent[i] = nil 352 end 353 } 354 ent.compact! 355 356 if /^ PSORT II server/ =~ ent[0] # for WWW version 357 ent.shift 358 delline = '' 359 ent.each {|e| delline = e if /^Results of Subprograms/ =~ e } 360 i = ent.index(delline) 361 ent.delete(delline) 362 ent.delete_at(i - 1) 363 end 364 365 report.set_header_line(ent.shift) 366 report.seq = Bio::Sequence::AA.new(ent.shift) 367 368 fent, pent = self.divent(ent) 369 report.set_features(fent) 370 report.prob = self.set_kNN_prob(pent[0].strip) 371 report.set_prediction(pent[1].strip) 372 373 return report 374 end
Public Instance Methods
Sets @features values.
# File lib/bio/appl/psort/report.rb 398 def set_features(features_ary) 399 features_ary.each {|fent| 400 key = fent.split(/\:( |\n)/)[0].strip 401 self.features[key] = fent # unless /^\>/ =~ key 402 } 403 self.features['AA'] = self.seq.length 404 end
Returns header information.
# File lib/bio/appl/psort/report.rb 292 def set_header_line(str) 293 str.sub!(/^-+\n/,'') 294 tmp = str.split(/\t| /) 295 @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id 296 297 case tmp.join(' ').chomp 298 when /\(\d+ aa\) (.+)$/ 299 @definition = $1 300 else 301 @definition = tmp.join(' ').chomp 302 end 303 scl = @definition.split(' ')[0] 304 305 @scl = scl if SclNames.keys.index(scl) 306 end
Returns @prob and @k values.
# File lib/bio/appl/psort/report.rb 323 def set_prediction(str) 324 case str 325 when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/ 326 @entry_id ||= $1 unless @entry_id 327 @pred = $2 328 @k = $3 329 else 330 raise ArgumentError, 331 "Invalid format at(#{self.entry_id}):\n[#{str}]\n" 332 end 333 end