class Bio::PROSITE
Constants
- DELIMITER
Delimiter
- RS
Delimiter
- TAGSIZE
Bio::DB
API
Public Class Methods
Bio::EMBLDB::new
# File lib/bio/db/prosite.rb 26 def initialize(entry) 27 super(entry, TAGSIZE) 28 end
prosite pattern to regular expression
prosite/prosuser.txt:
The PA (PAttern) lines contains the definition of a PROSITE
pattern. The patterns are described using the following conventions:
0) The standard IUPAC one-letter codes for the amino acids are used. 0) Ambiguities are indicated by listing the acceptable amino acids for a
given position, between square parentheses `[ ]'. For example: [ALT] stands for Ala or Leu or Thr.
1) A period ends the pattern. 2) When a pattern is restricted to either the N- or C-terminal of a
sequence, that pattern either starts with a `<' symbol or respectively ends with a `>' symbol.
3) Ambiguities are also indicated by listing between a pair of curly
brackets `{ }' the amino acids that are not accepted at a given position. For example: {AM} stands for any amino acid except Ala and Met.
4) Repetition of an element of the pattern can be indicated by following
that element with a numerical value or a numerical range between parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to x-x or x-x-x or x-x-x-x.
5) The symbol ‘x’ is used for a position where any amino acid is accepted. 6) Each element in a pattern is separated from its neighbor by a ‘-’.
Examples:
PA [AC]-x-V-x(4)-{ED}.
This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any but Glu or Asp}
PA <A-x-(2)-x(0,1)-V.
This pattern, which must be in the N-terminal of the sequence (‘<’), is translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
# File lib/bio/db/prosite.rb 467 def self.pa2re(pattern) 468 pattern.gsub!(/\s/, '') # remove white spaces 469 pattern.sub!(/\.$/, '') # (1) remove trailing '.' 470 pattern.sub!(/^</, '^') # (2) restricted to the N-terminal : `<' 471 pattern.sub!(/>$/, '$') # (2) restricted to the C-terminal : `>' 472 pattern.gsub!(/\{(\w+)\}/) { |m| 473 '[^' + $1 + ']' # (3) not accepted at a given position : '{}' 474 } 475 pattern.gsub!(/\(([\d,]+)\)/) { |m| 476 '{' + $1 + '}' # (4) repetition of an element : (n), (n,m) 477 } 478 pattern.tr!('x', '.') # (5) any amino acid is accepted : 'x' 479 pattern.tr!('-', '') # (6) each element is separated by a '-' 480 Regexp.new(pattern, Regexp::IGNORECASE) 481 end
Public Instance Methods
AC Accession number (1 per entry)
AC PSnnnnn;
Returns
# File lib/bio/db/prosite.rb 57 def ac 58 unless @data['AC'] 59 @data['AC'] = fetch('AC').chomp(';') 60 end 61 @data['AC'] 62 end
CC Comments (>=0 per entry)
CC /QUALIFIER=data; /QUALIFIER=data; .......
/TAXO-RANGE Taxonomic range. /MAX-REPEAT Maximum known number of repetitions of the pattern in a
single protein.
/SITE Indication of an ‘interesting’ site in the pattern. /SKIP-FLAG Indication of an entry that can be, in some cases, ignored
by a program (because it is too unspecific).
Returns
# File lib/bio/db/prosite.rb 273 def cc 274 unless @data['CC'] 275 hash = {} # temporal hash 276 fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v| 277 hash[k] = v 278 end 279 @data['CC'] = hash 280 end 281 @data['CC'] 282 end
DE Short description (1 per entry)
DE Description.
Returns
# File lib/bio/db/prosite.rb 84 def de 85 field_fetch('DE') 86 end
Returns
# File lib/bio/db/prosite.rb 44 def division 45 unless @data['TYPE'] 46 name 47 end 48 @data['TYPE'] 49 end
DR Cross-references to SWISS-PROT (>=0 per entry)
DR AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
-
‘AC_NB’ is the SWISS-PROT primary accession number of the entry to which reference is being made.
-
‘ENTRY_NAME’ is the SWISS-PROT entry name.
-
‘C’ is a one character flag that can be one of the following:
T For a true positive. N For a false negative; a sequence which belongs to the set under
consideration, but which has not been picked up by the pattern or profile.
P For a ‘potential’ hit; a sequence that belongs to the set under
consideration, but which was not picked up because the region(s) that are used as a 'fingerprint' (pattern or profile) is not yet available in the data bank (partial sequence).
? For an unknown; a sequence which possibly could belong to the set under
consideration.
F For a false positive; a sequence which does not belong to the set in
consideration.
Returns
# File lib/bio/db/prosite.rb 349 def dr 350 unless @data['DR'] 351 hash = {} # temporal hash 352 if fetch('DR') 353 fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c| 354 hash[a] = [e, c] # SWISS-PROT : accession, entry, true/false 355 end 356 end 357 @data['DR'] = hash 358 end 359 @data['DR'] 360 end
DT Date (1 per entry)
DT MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
Returns
# File lib/bio/db/prosite.rb 72 def dt 73 field_fetch('DT') 74 end
Returns
# File lib/bio/db/prosite.rb 250 def false_neg 251 statistics['FALSE_NEG'] 252 end
Returns
# File lib/bio/db/prosite.rb 235 def false_pos 236 statistics['FALSE_POS'] 237 end
Returns
# File lib/bio/db/prosite.rb 240 def false_positive_hits 241 false_pos.first 242 end
Returns
# File lib/bio/db/prosite.rb 245 def false_positive_sequences 246 false_pos.last 247 end
Returns
# File lib/bio/db/prosite.rb 386 def list_falsenegative(by_name = nil) 387 list_xref('F', by_name) 388 end
Returns
# File lib/bio/db/prosite.rb 391 def list_falsepositive(by_name = nil) 392 list_xref('P', by_name) 393 end
Returns
# File lib/bio/db/prosite.rb 396 def list_potentialhit(by_name = nil) 397 list_xref('P', by_name) 398 end
Returns
# File lib/bio/db/prosite.rb 381 def list_truepositive(by_name = nil) 382 list_xref('T', by_name) 383 end
Returns
# File lib/bio/db/prosite.rb 401 def list_unknown(by_name = nil) 402 list_xref('?', by_name) 403 end
Returns
# File lib/bio/db/prosite.rb 365 def list_xref(flag, by_name = nil) 366 ary = [] 367 sp_xref.each do |sp_acc, value| 368 if value[1] == flag 369 if by_name 370 sp_name = value[0] 371 ary.push(sp_name) 372 else 373 ary.push(sp_acc) 374 end 375 end 376 end 377 return ary 378 end
MA Matrix/profile (>=0 per entry)
see - ma2re method
Returns
# File lib/bio/db/prosite.rb 111 def ma 112 field_fetch('MA') 113 end
prosite profile to regular expression
prosite/profile.txt:
Returns
# File lib/bio/db/prosite.rb 497 def ma2re(matrix) 498 raise NotImplementedError 499 end
Returns
# File lib/bio/db/prosite.rb 306 def max_repeat 307 comment['MAX-REPEAT'].to_i 308 end
ID Identification (Begins each entry; 1 per entry)
ID ENTRY_NAME; ENTRY_TYPE. (ENTRY_TYPE : PATTERN, MATRIX, RULE)
Returns
# File lib/bio/db/prosite.rb 36 def name 37 unless @data['ID'] 38 @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ') 39 end 40 @data['ID'] 41 end
NR Numerical results (>=0 per entry)
- SWISS-PROT scan statistics of true and false positives/negatives
/RELEASE SWISS-PROT release number and total number of sequence
entries in that release.
/TOTAL Total number of hits in SWISS-PROT. /POSITIVE Number of hits on proteins that are known to belong to the
set in consideration.
/UNKNOWN Number of hits on proteins that could possibly belong to
the set in consideration.
/FALSE_POS Number of false hits (on unrelated proteins). /FALSE_NEG Number of known missed hits. /PARTIAL Number of partial sequences which belong to the set in
consideration, but which are not hit by the pattern or profile because they are partial (fragment) sequences.
Returns
# File lib/bio/db/prosite.rb 150 def nr 151 unless @data['NR'] 152 hash = {} # temporal hash 153 fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v| 154 if v =~ /^(\d+)\((\d+)\)$/ 155 hits = $1.to_i # the number of hits 156 seqs = $2.to_i # the number of sequences 157 v = [hits, seqs] 158 elsif v =~ /([\d\.]+),(\d+)/ 159 sprel = $1 # the number of SWISS-PROT release 160 spseq = $2.to_i # the number of SWISS-PROT sequences 161 v = [sprel, spseq] 162 else 163 v = v.to_i 164 end 165 hash[k] = v 166 end 167 @data['NR'] = hash 168 end 169 @data['NR'] 170 end
PA Pattern (>=0 per entry)
see - pa2re method
Returns
# File lib/bio/db/prosite.rb 96 def pa 97 field_fetch('PA') 98 @data['PA'] = fetch('PA') unless @data['PA'] 99 @data['PA'].gsub!(/\s+/, '') if @data['PA'] 100 @data['PA'] 101 end
# File lib/bio/db/prosite.rb 483 def pa2re(pattern) 484 self.class.pa2re(pattern) 485 end
Returns
# File lib/bio/db/prosite.rb 256 def partial 257 statistics['PARTIAL'] 258 end
3D Cross-references to PDB
(>=0 per entry)
3D name; [name2;...]
Returns
# File lib/bio/db/prosite.rb 411 def pdb_xref 412 unless @data['3D'] 413 @data['3D'] = fetch('3D').split(/; */) 414 end 415 @data['3D'] 416 end
DO Pointer to the documentation file (1 per entry)
DO PDOCnnnnn;
Returns
# File lib/bio/db/prosite.rb 424 def pdoc_xref 425 @data['DO'] = fetch('DO').chomp(';') 426 end
Returns
# File lib/bio/db/prosite.rb 205 def positive 206 statistics['POSITIVE'] 207 end
Returns
# File lib/bio/db/prosite.rb 210 def positive_hits 211 positive.first 212 end
Returns
# File lib/bio/db/prosite.rb 215 def positive_sequences 216 positive.last 217 end
# File lib/bio/db/prosite.rb 487 def re 488 self.class.pa2re(self.pa) 489 end
Returns
# File lib/bio/db/prosite.rb 175 def release 176 statistics['RELEASE'] 177 end
RU Rule (>=0 per entry)
RU Rule_Description. The rule is described in ordinary English and is free-format.
Returns
# File lib/bio/db/prosite.rb 125 def ru 126 field_fetch('RU') 127 end
Returns
# File lib/bio/db/prosite.rb 311 def site 312 if comment['SITE'] 313 num, desc = comment['SITE'].split(',') 314 end 315 return [num.to_i, desc] 316 end
Returns
# File lib/bio/db/prosite.rb 319 def skip_flag 320 if comment['SKIP-FLAG'] == 'TRUE' 321 return true 322 end 323 end
Returns
# File lib/bio/db/prosite.rb 180 def swissprot_release_number 181 release.first 182 end
Returns
# File lib/bio/db/prosite.rb 185 def swissprot_release_sequences 186 release.last 187 end
Returns
# File lib/bio/db/prosite.rb 287 def taxon_range(expand = nil) 288 range = comment['TAXO-RANGE'] 289 if range and expand 290 expand = [] 291 range.scan(/./) do |x| 292 case x 293 when 'A'; expand.push('archaebacteria') 294 when 'B'; expand.push('bacteriophages') 295 when 'E'; expand.push('eukaryotes') 296 when 'P'; expand.push('prokaryotes') 297 when 'V'; expand.push('eukaryotic viruses') 298 end 299 end 300 range = expand 301 end 302 return range 303 end
Returns
# File lib/bio/db/prosite.rb 190 def total 191 statistics['TOTAL'] 192 end
Returns
# File lib/bio/db/prosite.rb 195 def total_hits 196 total.first 197 end
Returns
# File lib/bio/db/prosite.rb 200 def total_sequences 201 total.last 202 end
Returns
# File lib/bio/db/prosite.rb 220 def unknown 221 statistics['UNKNOWN'] 222 end
Returns
# File lib/bio/db/prosite.rb 225 def unknown_hits 226 unknown.first 227 end
Returns
# File lib/bio/db/prosite.rb 230 def unknown_sequences 231 unknown.last 232 end