class Bio::Iprscan::Report

DESCRIPTION

Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.

See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html

USAGE

# Read a marged.txt and split each entry.
Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| 
  report.query_id
  report.matches.size
  report.matches.each do |match|
    match.ipr_id #=> 'IPR...'
    match.ipr_description
    match.method
    match.accession
    match.description
    match.match_start
    match.match_end
    match.evalue    
  end
  # report.to_gff3 
  # report.to_html
end

Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| 
  report.class #=> Bio::Iprscan::Report
end

Constants

RS

Entry delimiter pattern.

Attributes

crc64[RW]

CRC64 checksum of query sequence.

entry_id[RW]

Qeury sequence name (entry_id).

matches[RW]

Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.

query_id[RW]

Qeury sequence name (entry_id).

query_length[RW]

Qeury sequence length.

Public Class Methods

new() click to toggle source
# File lib/bio/appl/iprscan/report.rb, line 235
def initialize
  @query_id = nil
  @query_length = nil
  @crc64 = nil
  @matches = []
end
parse_ptxt(io) { |parse_ptxt_entry| ... } click to toggle source

Splits entry stream.

Usage

Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report|
  report
end
# File lib/bio/appl/iprscan/report.rb, line 193
def self.parse_ptxt(io)
  io.each("\n\/\/\n") do |entry|
    yield self.parse_ptxt_entry(entry)
  end
end
parse_ptxt_entry(str) click to toggle source

Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report object.

Usage

File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| 
  report = Bio::Iprscan::Report.parse_ptxt_entry(e)
end
# File lib/bio/appl/iprscan/report.rb, line 208
def self.parse_ptxt_entry(str)
  report = self.new
  ipr_line = ''
  str.split(/\n/).each do |line|
    line = line.split("\t")
    if line.size == 2
      report.query_id = line[0]
      report.query_length = line[1].to_i
    elsif line.first == '//'
    elsif line.first == 'InterPro'
      ipr_line = line
    else
      startp, endp = line[4].split("-")
      report.matches << Match.new(:ipr_id => ipr_line[1], 
                                  :ipr_description => ipr_line[2],
                                  :method => line[0], 
                                  :accession => line[1],
                                  :description => line[2], 
                                  :evalue => line[3],
                                  :match_start => startp.to_i,
                                  :match_end => endp.to_i)
    end
  end
  report
end
parse_raw(io) { |parse_raw_entry| ... } click to toggle source

USAGE

Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report|
  report
end
# File lib/bio/appl/iprscan/report.rb, line 71
def self.parse_raw(io)
  entry = ''
  while line = io.gets
    if entry != '' and entry.split("\t").first == line.split("\t").first
      entry << line
    elsif entry != ''
      yield Bio::Iprscan::Report.parse_raw_entry(entry)
      entry = line
    else
      entry << line
    end
  end
  yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
end
parse_raw_entry(str) click to toggle source

Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.

# File lib/bio/appl/iprscan/report.rb, line 88
def self.parse_raw_entry(str)
  report = self.new
  str.split(/\n/).each do |line|
    line = line.split("\t")
    report.matches << Match.new(:query_id => line[0],
                                :crc64    => line[1],
                                :query_length => line[2].to_i,
                                :method       => line[3], 
                                :accession    => line[4],
                                :description => line[5], 
                                :match_start => line[6].to_i,
                                :match_end   => line[7].to_i,
                                :evalue => line[8],
                                :status => line[9],
                                :date   => line[10])
    if line[11]
      report.matches.last.ipr_id = line[11]
      report.matches.last.ipr_description = line[12]
    end
    report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13]          
  end
  report.query_id = report.matches.first.query_id
  report.query_length = report.matches.first.query_length
  report
end
parse_txt(io) { |parse_txt_entry| ... } click to toggle source

Splits the entry stream.

Usage

Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report|
  report.class #=> Bio::Iprscan::Report
end
# File lib/bio/appl/iprscan/report.rb, line 129
def self.parse_txt(io)
  io.each("\n\nSequence") do |entry|
    if entry =~ /Sequence$/
      entry = entry.sub(/Sequence$/, '')
    end
    unless entry =~ /^Sequence/
      entry = 'Sequence' + entry
    end
    yield self.parse_txt_entry(entry)
  end
end
parse_txt_entry(str) click to toggle source

Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.

# File lib/bio/appl/iprscan/report.rb, line 146
def self.parse_txt_entry(str)
  unless str =~ /^Sequence /
    raise ArgumentError, "Invalid format:  \n\n#{str}"
  end
  header, *matches = str.split(/\n\n/)
  report = self.new
  report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end
  report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end
  report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end
  ipr_line = ''
  go_annotation = ''
  matches.each do |m|
    m = m.split(/\n/).map {|x| x.split(/  +/) }
    m.each do |match|
      case match[0]
      when 'method'
      when /(Molecular Function|Cellular Component|Biological Process):/
        go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/)
      when 'InterPro'
        ipr_line = match
      else
        pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */)
        pos_scores.each do |pos_score|
          report.matches << Match.new(:ipr_id          => ipr_line[1],
                                      :ipr_description => ipr_line[2],
                                      :method      => match[0], 
                                      :accession   => match[1],
                                      :description => match[2], 
                                      :evalue      => pos_score[3],
                                      :status      => pos_score[0],
                                      :match_start => pos_score[1].to_i,
                                      :match_end   => pos_score[2].to_i,
                                      :go_terms => go_annotation)
        end
      end
    end
  end
  return report
end

Public Instance Methods

format_raw() click to toggle source

def format_txt end

# File lib/bio/appl/iprscan/report.rb, line 265
def format_raw
  @matches.map { |match|
    [self.query_id,
     self.crc64,
     self.query_length,
     match.method_name,
     match.accession,
     match.description,
     match.match_start,
     match.match_end,
     match.evalue,
     match.status,
     match.date,
     match.ipr_id,
     match.ipr_description,
     match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ')
    ].join("\t")
  }.join("\n")
end
output(format_type) click to toggle source

Output interpro matches in the format_type.

# File lib/bio/appl/iprscan/report.rb, line 244
def output(format_type)
  case format_type
  when 'raw', :raw
    format_raw
  else
    raise NameError, "Invalid format_type."
  end
end
to_hash() click to toggle source

Returns a Hash (key as an Interpro ID and value as a Match).

report.to_hash.each do |ipr_id, matches|
  matches.each do |match|
    report.matches.ipr_id == ipr_id #=> true
  end
end
# File lib/bio/appl/iprscan/report.rb, line 297
def to_hash
  unless (defined? @ipr_ids) && @ipr_ids
    @ipr_ids = {} 
    @matches.each_with_index do |match, i|
      @ipr_ids[match.ipr_id] ||= []
      @ipr_ids[match.ipr_id] << match
    end
    return @ipr_ids
  else
    return @ipr_ids
  end
end