class Bio::Iprscan::Report
DESCRIPTION¶ ↑
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
USAGE¶ ↑
# Read a marged.txt and split each entry. Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| report.query_id report.matches.size report.matches.each do |match| match.ipr_id #=> 'IPR...' match.ipr_description match.method match.accession match.description match.match_start match.match_end match.evalue end # report.to_gff3 # report.to_html end Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| report.class #=> Bio::Iprscan::Report end
Constants
- RS
Entry delimiter pattern.
Attributes
CRC64 checksum of query sequence.
Qeury sequence name (entry_id
).
Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.
Qeury sequence name (entry_id
).
Qeury sequence length.
Public Class Methods
# File lib/bio/appl/iprscan/report.rb 235 def initialize 236 @query_id = nil 237 @query_length = nil 238 @crc64 = nil 239 @matches = [] 240 end
Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
object.
Usage¶ ↑
File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| report = Bio::Iprscan::Report.parse_ptxt_entry(e) end
# File lib/bio/appl/iprscan/report.rb 208 def self.parse_ptxt_entry(str) 209 report = self.new 210 ipr_line = '' 211 str.split(/\n/).each do |line| 212 line = line.split("\t") 213 if line.size == 2 214 report.query_id = line[0] 215 report.query_length = line[1].to_i 216 elsif line.first == '//' 217 elsif line.first == 'InterPro' 218 ipr_line = line 219 else 220 startp, endp = line[4].split("-") 221 report.matches << Match.new(:ipr_id => ipr_line[1], 222 :ipr_description => ipr_line[2], 223 :method => line[0], 224 :accession => line[1], 225 :description => line[2], 226 :evalue => line[3], 227 :match_start => startp.to_i, 228 :match_end => endp.to_i) 229 end 230 end 231 report 232 end
USAGE¶ ↑
Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report| report end
# File lib/bio/appl/iprscan/report.rb 71 def self.parse_raw(io) 72 entry = '' 73 while line = io.gets 74 if entry != '' and entry.split("\t").first == line.split("\t").first 75 entry << line 76 elsif entry != '' 77 yield Bio::Iprscan::Report.parse_raw_entry(entry) 78 entry = line 79 else 80 entry << line 81 end 82 end 83 yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' 84 end
Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
object.
# File lib/bio/appl/iprscan/report.rb 88 def self.parse_raw_entry(str) 89 report = self.new 90 str.split(/\n/).each do |line| 91 line = line.split("\t") 92 report.matches << Match.new(:query_id => line[0], 93 :crc64 => line[1], 94 :query_length => line[2].to_i, 95 :method => line[3], 96 :accession => line[4], 97 :description => line[5], 98 :match_start => line[6].to_i, 99 :match_end => line[7].to_i, 100 :evalue => line[8], 101 :status => line[9], 102 :date => line[10]) 103 if line[11] 104 report.matches.last.ipr_id = line[11] 105 report.matches.last.ipr_description = line[12] 106 end 107 report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] 108 end 109 report.query_id = report.matches.first.query_id 110 report.query_length = report.matches.first.query_length 111 report 112 end
Splits the entry stream.
Usage¶ ↑
Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report| report.class #=> Bio::Iprscan::Report end
# File lib/bio/appl/iprscan/report.rb 129 def self.parse_txt(io) 130 io.each("\n\nSequence") do |entry| 131 if entry =~ /Sequence$/ 132 entry = entry.sub(/Sequence$/, '') 133 end 134 unless entry =~ /^Sequence/ 135 entry = 'Sequence' + entry 136 end 137 yield self.parse_txt_entry(entry) 138 end 139 end
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report
object.
# File lib/bio/appl/iprscan/report.rb 146 def self.parse_txt_entry(str) 147 unless str =~ /^Sequence / 148 raise ArgumentError, "Invalid format: \n\n#{str}" 149 end 150 header, *matches = str.split(/\n\n/) 151 report = self.new 152 report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end 153 report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end 154 report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end 155 ipr_line = '' 156 go_annotation = '' 157 matches.each do |m| 158 m = m.split(/\n/).map {|x| x.split(/ +/) } 159 m.each do |match| 160 case match[0] 161 when 'method' 162 when /(Molecular Function|Cellular Component|Biological Process):/ 163 go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) 164 when 'InterPro' 165 ipr_line = match 166 else 167 pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) 168 pos_scores.each do |pos_score| 169 report.matches << Match.new(:ipr_id => ipr_line[1], 170 :ipr_description => ipr_line[2], 171 :method => match[0], 172 :accession => match[1], 173 :description => match[2], 174 :evalue => pos_score[3], 175 :status => pos_score[0], 176 :match_start => pos_score[1].to_i, 177 :match_end => pos_score[2].to_i, 178 :go_terms => go_annotation) 179 end 180 end 181 end 182 end 183 return report 184 end
Public Instance Methods
def format_txt end
# File lib/bio/appl/iprscan/report.rb 265 def format_raw 266 @matches.map { |match| 267 [self.query_id, 268 self.crc64, 269 self.query_length, 270 match.method_name, 271 match.accession, 272 match.description, 273 match.match_start, 274 match.match_end, 275 match.evalue, 276 match.status, 277 match.date, 278 match.ipr_id, 279 match.ipr_description, 280 match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') 281 ].join("\t") 282 }.join("\n") 283 end
Output interpro matches in the format_type.
# File lib/bio/appl/iprscan/report.rb 244 def output(format_type) 245 case format_type 246 when 'raw', :raw 247 format_raw 248 else 249 raise NameError, "Invalid format_type." 250 end 251 end
Returns a Hash (key as an Interpro ID and value as a Match
).
report.to_hash.each do |ipr_id, matches| matches.each do |match| report.matches.ipr_id == ipr_id #=> true end end
# File lib/bio/appl/iprscan/report.rb 297 def to_hash 298 unless (defined? @ipr_ids) && @ipr_ids 299 @ipr_ids = {} 300 @matches.each_with_index do |match, i| 301 @ipr_ids[match.ipr_id] ||= [] 302 @ipr_ids[match.ipr_id] << match 303 end 304 return @ipr_ids 305 else 306 return @ipr_ids 307 end 308 end