class Bio::Iprscan::Report
DESCRIPTION¶ ↑
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
USAGE¶ ↑
# Read a marged.txt and split each entry. Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| report.query_id report.matches.size report.matches.each do |match| match.ipr_id #=> 'IPR...' match.ipr_description match.method match.accession match.description match.match_start match.match_end match.evalue end # report.to_gff3 # report.to_html end Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| report.class #=> Bio::Iprscan::Report end
Constants
- RS
-
Entry delimiter pattern.
Attributes
CRC64 checksum of query sequence.
Qeury sequence name (entry_id
).
Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.
Qeury sequence name (entry_id
).
Qeury sequence length.
Public Class Methods
Source
# File lib/bio/appl/iprscan/report.rb 236 def initialize 237 @query_id = nil 238 @query_length = nil 239 @crc64 = nil 240 @matches = [] 241 end
Source
Source
# File lib/bio/appl/iprscan/report.rb 209 def self.parse_ptxt_entry(str) 210 report = self.new 211 ipr_line = '' 212 str.split(/\n/).each do |line| 213 line = line.split("\t") 214 if line.size == 2 215 report.query_id = line[0] 216 report.query_length = line[1].to_i 217 elsif line.first == '//' 218 elsif line.first == 'InterPro' 219 ipr_line = line 220 else 221 startp, endp = line[4].split("-") 222 report.matches << Match.new(:ipr_id => ipr_line[1], 223 :ipr_description => ipr_line[2], 224 :method => line[0], 225 :accession => line[1], 226 :description => line[2], 227 :evalue => line[3], 228 :match_start => startp.to_i, 229 :match_end => endp.to_i) 230 end 231 end 232 report 233 end
Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
object.
Usage¶ ↑
File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| report = Bio::Iprscan::Report.parse_ptxt_entry(e) end
Source
# File lib/bio/appl/iprscan/report.rb 72 def self.parse_raw(io) 73 entry = String.new 74 while line = io.gets 75 if entry != '' and entry.split("\t").first == line.split("\t").first 76 entry << line 77 elsif entry != '' 78 yield Bio::Iprscan::Report.parse_raw_entry(entry) 79 entry = line 80 else 81 entry << line 82 end 83 end 84 yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' 85 end
USAGE¶ ↑
Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report| report end
Source
# File lib/bio/appl/iprscan/report.rb 89 def self.parse_raw_entry(str) 90 report = self.new 91 str.split(/\n/).each do |line| 92 line = line.split("\t") 93 report.matches << Match.new(:query_id => line[0], 94 :crc64 => line[1], 95 :query_length => line[2].to_i, 96 :method => line[3], 97 :accession => line[4], 98 :description => line[5], 99 :match_start => line[6].to_i, 100 :match_end => line[7].to_i, 101 :evalue => line[8], 102 :status => line[9], 103 :date => line[10]) 104 if line[11] 105 report.matches.last.ipr_id = line[11] 106 report.matches.last.ipr_description = line[12] 107 end 108 report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] 109 end 110 report.query_id = report.matches.first.query_id 111 report.query_length = report.matches.first.query_length 112 report 113 end
Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
object.
Source
# File lib/bio/appl/iprscan/report.rb 130 def self.parse_txt(io) 131 io.each("\n\nSequence") do |entry| 132 if entry =~ /Sequence$/ 133 entry = entry.sub(/Sequence$/, '') 134 end 135 unless entry =~ /^Sequence/ 136 entry = 'Sequence' + entry 137 end 138 yield self.parse_txt_entry(entry) 139 end 140 end
Splits the entry stream.
Usage¶ ↑
Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report| report.class #=> Bio::Iprscan::Report end
Source
# File lib/bio/appl/iprscan/report.rb 147 def self.parse_txt_entry(str) 148 unless str =~ /^Sequence / 149 raise ArgumentError, "Invalid format: \n\n#{str}" 150 end 151 header, *matches = str.split(/\n\n/) 152 report = self.new 153 report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end 154 report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end 155 report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end 156 ipr_line = '' 157 go_annotation = '' 158 matches.each do |m| 159 m = m.split(/\n/).map {|x| x.split(/ +/) } 160 m.each do |match| 161 case match[0] 162 when 'method' 163 when /(Molecular Function|Cellular Component|Biological Process):/ 164 go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) 165 when 'InterPro' 166 ipr_line = match 167 else 168 pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) 169 pos_scores.each do |pos_score| 170 report.matches << Match.new(:ipr_id => ipr_line[1], 171 :ipr_description => ipr_line[2], 172 :method => match[0], 173 :accession => match[1], 174 :description => match[2], 175 :evalue => pos_score[3], 176 :status => pos_score[0], 177 :match_start => pos_score[1].to_i, 178 :match_end => pos_score[2].to_i, 179 :go_terms => go_annotation) 180 end 181 end 182 end 183 end 184 return report 185 end
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report
object.
Public Instance Methods
Source
# File lib/bio/appl/iprscan/report.rb 266 def format_raw 267 @matches.map { |match| 268 [self.query_id, 269 self.crc64, 270 self.query_length, 271 match.method_name, 272 match.accession, 273 match.description, 274 match.match_start, 275 match.match_end, 276 match.evalue, 277 match.status, 278 match.date, 279 match.ipr_id, 280 match.ipr_description, 281 match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') 282 ].join("\t") 283 }.join("\n") 284 end
def format_txt end
Source
# File lib/bio/appl/iprscan/report.rb 245 def output(format_type) 246 case format_type 247 when 'raw', :raw 248 format_raw 249 else 250 raise NameError, "Invalid format_type." 251 end 252 end
Output interpro matches in the format_type.
Source
# File lib/bio/appl/iprscan/report.rb 298 def to_hash 299 unless (defined? @ipr_ids) && @ipr_ids 300 @ipr_ids = {} 301 @matches.each_with_index do |match, i| 302 @ipr_ids[match.ipr_id] ||= [] 303 @ipr_ids[match.ipr_id] << match 304 end 305 return @ipr_ids 306 else 307 return @ipr_ids 308 end 309 end
Returns a Hash (key as an Interpro ID and value as a Match
).
report.to_hash.each do |ipr_id, matches| matches.each do |match| report.matches.ipr_id == ipr_id #=> true end end