class Bio::Iprscan::Report

DESCRIPTION

Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.

See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html

USAGE

# Read a marged.txt and split each entry.
Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| 
  report.query_id
  report.matches.size
  report.matches.each do |match|
    match.ipr_id #=> 'IPR...'
    match.ipr_description
    match.method
    match.accession
    match.description
    match.match_start
    match.match_end
    match.evalue    
  end
  # report.to_gff3
  # report.to_html
end

Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| 
  report.class #=> Bio::Iprscan::Report
end

Constants

RS

Entry delimiter pattern.

Attributes

crc64[RW]

CRC64 checksum of query sequence.

entry_id[RW]

Qeury sequence name (entry_id).

matches[RW]

Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.

query_id[RW]

Qeury sequence name (entry_id).

query_length[RW]

Qeury sequence length.

Public Class Methods

new() click to toggle source
    # File lib/bio/appl/iprscan/report.rb
235 def initialize
236   @query_id = nil
237   @query_length = nil
238   @crc64 = nil
239   @matches = []
240 end
parse_ptxt(io) { |parse_ptxt_entry| ... } click to toggle source

Splits entry stream.

Usage

Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report|
  report
end
    # File lib/bio/appl/iprscan/report.rb
193 def self.parse_ptxt(io)
194   io.each("\n\/\/\n") do |entry|
195     yield self.parse_ptxt_entry(entry)
196   end
197 end
parse_ptxt_entry(str) click to toggle source

Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report object.

Usage

File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| 
  report = Bio::Iprscan::Report.parse_ptxt_entry(e)
end
    # File lib/bio/appl/iprscan/report.rb
208 def self.parse_ptxt_entry(str)
209   report = self.new
210   ipr_line = ''
211   str.split(/\n/).each do |line|
212     line = line.split("\t")
213     if line.size == 2
214       report.query_id = line[0]
215       report.query_length = line[1].to_i
216     elsif line.first == '//'
217     elsif line.first == 'InterPro'
218       ipr_line = line
219     else
220       startp, endp = line[4].split("-")
221       report.matches << Match.new(:ipr_id => ipr_line[1], 
222                                   :ipr_description => ipr_line[2],
223                                   :method => line[0], 
224                                   :accession => line[1],
225                                   :description => line[2], 
226                                   :evalue => line[3],
227                                   :match_start => startp.to_i,
228                                   :match_end => endp.to_i)
229     end
230   end
231   report
232 end
parse_raw(io) { |parse_raw_entry| ... } click to toggle source

USAGE

Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report|
  report
end
   # File lib/bio/appl/iprscan/report.rb
71 def self.parse_raw(io)
72   entry = ''
73   while line = io.gets
74     if entry != '' and entry.split("\t").first == line.split("\t").first
75       entry << line
76     elsif entry != ''
77       yield Bio::Iprscan::Report.parse_raw_entry(entry)
78       entry = line
79     else
80       entry << line
81     end
82   end
83   yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
84 end
parse_raw_entry(str) click to toggle source

Parser method for a raw formated entry. Returns a Bio::Iprscan::Report object.

    # File lib/bio/appl/iprscan/report.rb
 88 def self.parse_raw_entry(str)
 89   report = self.new
 90   str.split(/\n/).each do |line|
 91     line = line.split("\t")
 92     report.matches << Match.new(:query_id => line[0],
 93                                 :crc64    => line[1],
 94                                 :query_length => line[2].to_i,
 95                                 :method       => line[3], 
 96                                 :accession    => line[4],
 97                                 :description => line[5], 
 98                                 :match_start => line[6].to_i,
 99                                 :match_end   => line[7].to_i,
100                                 :evalue => line[8],
101                                 :status => line[9],
102                                 :date   => line[10])
103     if line[11]
104       report.matches.last.ipr_id = line[11]
105       report.matches.last.ipr_description = line[12]
106     end
107     report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13]          
108   end
109   report.query_id = report.matches.first.query_id
110   report.query_length = report.matches.first.query_length
111   report
112 end
parse_txt(io) { |parse_txt_entry| ... } click to toggle source

Splits the entry stream.

Usage

Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report|
  report.class #=> Bio::Iprscan::Report
end
    # File lib/bio/appl/iprscan/report.rb
129 def self.parse_txt(io)
130   io.each("\n\nSequence") do |entry|
131     if entry =~ /Sequence$/
132       entry = entry.sub(/Sequence$/, '')
133     end
134     unless entry =~ /^Sequence/
135       entry = 'Sequence' + entry
136     end
137     yield self.parse_txt_entry(entry)
138   end
139 end
parse_txt_entry(str) click to toggle source

Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.

    # File lib/bio/appl/iprscan/report.rb
146 def self.parse_txt_entry(str)
147   unless str =~ /^Sequence /
148     raise ArgumentError, "Invalid format:  \n\n#{str}"
149   end
150   header, *matches = str.split(/\n\n/)
151   report = self.new
152   report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end
153   report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end
154   report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end
155   ipr_line = ''
156   go_annotation = ''
157   matches.each do |m|
158     m = m.split(/\n/).map {|x| x.split(/  +/) }
159     m.each do |match|
160       case match[0]
161       when 'method'
162       when /(Molecular Function|Cellular Component|Biological Process):/
163         go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/)
164       when 'InterPro'
165         ipr_line = match
166       else
167         pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */)
168         pos_scores.each do |pos_score|
169           report.matches << Match.new(:ipr_id          => ipr_line[1],
170                                       :ipr_description => ipr_line[2],
171                                       :method      => match[0], 
172                                       :accession   => match[1],
173                                       :description => match[2], 
174                                       :evalue      => pos_score[3],
175                                       :status      => pos_score[0],
176                                       :match_start => pos_score[1].to_i,
177                                       :match_end   => pos_score[2].to_i,
178                                       :go_terms => go_annotation)
179         end
180       end
181     end
182   end
183   return report
184 end

Public Instance Methods

format_raw() click to toggle source

def format_txt end

    # File lib/bio/appl/iprscan/report.rb
265 def format_raw
266   @matches.map { |match|
267     [self.query_id,
268      self.crc64,
269      self.query_length,
270      match.method_name,
271      match.accession,
272      match.description,
273      match.match_start,
274      match.match_end,
275      match.evalue,
276      match.status,
277      match.date,
278      match.ipr_id,
279      match.ipr_description,
280      match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ')
281     ].join("\t")
282   }.join("\n")
283 end
output(format_type) click to toggle source

Output interpro matches in the format_type.

    # File lib/bio/appl/iprscan/report.rb
244 def output(format_type)
245   case format_type
246   when 'raw', :raw
247     format_raw
248   else
249     raise NameError, "Invalid format_type."
250   end
251 end
to_hash() click to toggle source

Returns a Hash (key as an Interpro ID and value as a Match).

report.to_hash.each do |ipr_id, matches|
  matches.each do |match|
    report.matches.ipr_id == ipr_id #=> true
  end
end
    # File lib/bio/appl/iprscan/report.rb
297 def to_hash
298   unless (defined? @ipr_ids) && @ipr_ids
299     @ipr_ids = {} 
300     @matches.each_with_index do |match, i|
301       @ipr_ids[match.ipr_id] ||= []
302       @ipr_ids[match.ipr_id] << match
303     end
304     return @ipr_ids
305   else
306     return @ipr_ids
307   end
308 end