class Bio::PROSITE
Constants
- DELIMITER
-
Delimiter
- RS
-
Delimiter
- TAGSIZE
-
Bio::DB
API
Public Class Methods
Source
# File lib/bio/db/prosite.rb 27 def initialize(entry) 28 super(entry, TAGSIZE) 29 end
Bio::EMBLDB::new
Source
# File lib/bio/db/prosite.rb 468 def self.pa2re(pattern) 469 pattern = pattern.dup 470 pattern.gsub!(/\s/, '') # remove white spaces 471 pattern.sub!(/\.$/, '') # (1) remove trailing '.' 472 pattern.sub!(/^</, '^') # (2) restricted to the N-terminal : `<' 473 pattern.sub!(/>$/, '$') # (2) restricted to the C-terminal : `>' 474 pattern.gsub!(/\{(\w+)\}/) { |m| 475 '[^' + $1 + ']' # (3) not accepted at a given position : '{}' 476 } 477 pattern.gsub!(/\(([\d,]+)\)/) { |m| 478 '{' + $1 + '}' # (4) repetition of an element : (n), (n,m) 479 } 480 pattern.tr!('x', '.') # (5) any amino acid is accepted : 'x' 481 pattern.tr!('-', '') # (6) each element is separated by a '-' 482 Regexp.new(pattern, Regexp::IGNORECASE) 483 end
prosite pattern to regular expression
prosite/prosuser.txt:
The PA (PAttern) lines contains the definition of a PROSITE
pattern. The patterns are described using the following conventions:
0) The standard IUPAC one-letter codes for the amino acids are used. 0) Ambiguities are indicated by listing the acceptable amino acids for a
given position, between square parentheses `[ ]'. For example: [ALT] stands for Ala or Leu or Thr.
1) A period ends the pattern. 2) When a pattern is restricted to either the N- or C-terminal of a
sequence, that pattern either starts with a `<' symbol or respectively ends with a `>' symbol.
3) Ambiguities are also indicated by listing between a pair of curly
brackets `{ }' the amino acids that are not accepted at a given position. For example: {AM} stands for any amino acid except Ala and Met.
4) Repetition of an element of the pattern can be indicated by following
that element with a numerical value or a numerical range between parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to x-x or x-x-x or x-x-x-x.
5) The symbol ‘x’ is used for a position where any amino acid is accepted. 6) Each element in a pattern is separated from its neighbor by a ‘-’.
Examples:
PA [AC]-x-V-x(4)-{ED}.
This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any but Glu or Asp}
PA <A-x-(2)-x(0,1)-V.
This pattern, which must be in the N-terminal of the sequence (‘<’), is translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
Public Instance Methods
Source
# File lib/bio/db/prosite.rb 58 def ac 59 unless @data['AC'] 60 @data['AC'] = fetch('AC').chomp(';') 61 end 62 @data['AC'] 63 end
AC Accession number (1 per entry)
AC PSnnnnn;
Returns
Source
# File lib/bio/db/prosite.rb 274 def cc 275 unless @data['CC'] 276 hash = {} # temporal hash 277 fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v| 278 hash[k] = v 279 end 280 @data['CC'] = hash 281 end 282 @data['CC'] 283 end
CC Comments (>=0 per entry)
CC /QUALIFIER=data; /QUALIFIER=data; .......
/TAXO-RANGE Taxonomic range. /MAX-REPEAT Maximum known number of repetitions of the pattern in a
single protein.
/SITE Indication of an ‘interesting’ site in the pattern. /SKIP-FLAG Indication of an entry that can be, in some cases, ignored
by a program (because it is too unspecific).
Returns
Source
# File lib/bio/db/prosite.rb 85 def de 86 field_fetch('DE') 87 end
DE Short description (1 per entry)
DE Description.
Returns
Source
# File lib/bio/db/prosite.rb 45 def division 46 unless @data['TYPE'] 47 name 48 end 49 @data['TYPE'] 50 end
Returns
Source
# File lib/bio/db/prosite.rb 350 def dr 351 unless @data['DR'] 352 hash = {} # temporal hash 353 if fetch('DR') 354 fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c| 355 hash[a] = [e, c] # SWISS-PROT : accession, entry, true/false 356 end 357 end 358 @data['DR'] = hash 359 end 360 @data['DR'] 361 end
DR Cross-references to SWISS-PROT (>=0 per entry)
DR AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C; AC_NB, ENTRY_NAME, C;
-
‘AC_NB’ is the SWISS-PROT primary accession number of the entry to which reference is being made.
-
‘ENTRY_NAME’ is the SWISS-PROT entry name.
-
‘C’ is a one character flag that can be one of the following:
T For a true positive. N For a false negative; a sequence which belongs to the set under
consideration, but which has not been picked up by the pattern or profile.
P For a ‘potential’ hit; a sequence that belongs to the set under
consideration, but which was not picked up because the region(s) that are used as a 'fingerprint' (pattern or profile) is not yet available in the data bank (partial sequence).
? For an unknown; a sequence which possibly could belong to the set under
consideration.
F For a false positive; a sequence which does not belong to the set in
consideration.
Returns
Source
# File lib/bio/db/prosite.rb 73 def dt 74 field_fetch('DT') 75 end
DT Date (1 per entry)
DT MMM-YYYY (CREATED); MMM-YYYY (DATA UPDATE); MMM-YYYY (INFO UPDATE).
Returns
Source
# File lib/bio/db/prosite.rb 251 def false_neg 252 statistics['FALSE_NEG'] 253 end
Returns
Source
# File lib/bio/db/prosite.rb 236 def false_pos 237 statistics['FALSE_POS'] 238 end
Returns
Source
# File lib/bio/db/prosite.rb 241 def false_positive_hits 242 false_pos.first 243 end
Returns
Source
# File lib/bio/db/prosite.rb 246 def false_positive_sequences 247 false_pos.last 248 end
Returns
Source
# File lib/bio/db/prosite.rb 387 def list_falsenegative(by_name = nil) 388 list_xref('F', by_name) 389 end
Returns
Source
# File lib/bio/db/prosite.rb 392 def list_falsepositive(by_name = nil) 393 list_xref('P', by_name) 394 end
Returns
Source
# File lib/bio/db/prosite.rb 397 def list_potentialhit(by_name = nil) 398 list_xref('P', by_name) 399 end
Returns
Source
# File lib/bio/db/prosite.rb 382 def list_truepositive(by_name = nil) 383 list_xref('T', by_name) 384 end
Returns
Source
# File lib/bio/db/prosite.rb 402 def list_unknown(by_name = nil) 403 list_xref('?', by_name) 404 end
Returns
Source
# File lib/bio/db/prosite.rb 366 def list_xref(flag, by_name = nil) 367 ary = [] 368 sp_xref.each do |sp_acc, value| 369 if value[1] == flag 370 if by_name 371 sp_name = value[0] 372 ary.push(sp_name) 373 else 374 ary.push(sp_acc) 375 end 376 end 377 end 378 return ary 379 end
Returns
Source
# File lib/bio/db/prosite.rb 112 def ma 113 field_fetch('MA') 114 end
MA Matrix/profile (>=0 per entry)
see - ma2re method
Returns
Source
# File lib/bio/db/prosite.rb 499 def ma2re(matrix) 500 raise NotImplementedError 501 end
prosite profile to regular expression
prosite/profile.txt:
Returns
Source
# File lib/bio/db/prosite.rb 307 def max_repeat 308 comment['MAX-REPEAT'].to_i 309 end
Returns
Source
# File lib/bio/db/prosite.rb 37 def name 38 unless @data['ID'] 39 @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ') 40 end 41 @data['ID'] 42 end
ID Identification (Begins each entry; 1 per entry)
ID ENTRY_NAME; ENTRY_TYPE. (ENTRY_TYPE : PATTERN, MATRIX, RULE)
Returns
Source
# File lib/bio/db/prosite.rb 151 def nr 152 unless @data['NR'] 153 hash = {} # temporal hash 154 fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v| 155 if v =~ /^(\d+)\((\d+)\)$/ 156 hits = $1.to_i # the number of hits 157 seqs = $2.to_i # the number of sequences 158 v = [hits, seqs] 159 elsif v =~ /([\d\.]+),(\d+)/ 160 sprel = $1 # the number of SWISS-PROT release 161 spseq = $2.to_i # the number of SWISS-PROT sequences 162 v = [sprel, spseq] 163 else 164 v = v.to_i 165 end 166 hash[k] = v 167 end 168 @data['NR'] = hash 169 end 170 @data['NR'] 171 end
NR Numerical results (>=0 per entry)
- SWISS-PROT scan statistics of true and false positives/negatives
/RELEASE SWISS-PROT release number and total number of sequence
entries in that release.
/TOTAL Total number of hits in SWISS-PROT. /POSITIVE Number of hits on proteins that are known to belong to the
set in consideration.
/UNKNOWN Number of hits on proteins that could possibly belong to
the set in consideration.
/FALSE_POS Number of false hits (on unrelated proteins). /FALSE_NEG Number of known missed hits. /PARTIAL Number of partial sequences which belong to the set in
consideration, but which are not hit by the pattern or profile because they are partial (fragment) sequences.
Returns
Source
# File lib/bio/db/prosite.rb 97 def pa 98 field_fetch('PA') 99 @data['PA'] = fetch('PA') unless @data['PA'] 100 @data['PA'].gsub!(/\s+/, '') if @data['PA'] 101 @data['PA'] 102 end
PA Pattern (>=0 per entry)
see - pa2re method
Returns
Source
# File lib/bio/db/prosite.rb 485 def pa2re(pattern) 486 self.class.pa2re(pattern) 487 end
Source
# File lib/bio/db/prosite.rb 257 def partial 258 statistics['PARTIAL'] 259 end
Returns
Source
# File lib/bio/db/prosite.rb 412 def pdb_xref 413 unless @data['3D'] 414 @data['3D'] = fetch('3D').split(/; */) 415 end 416 @data['3D'] 417 end
3D Cross-references to PDB
(>=0 per entry)
3D name; [name2;...]
Returns
Source
# File lib/bio/db/prosite.rb 425 def pdoc_xref 426 @data['DO'] = fetch('DO').chomp(';') 427 end
DO Pointer to the documentation file (1 per entry)
DO PDOCnnnnn;
Returns
Source
# File lib/bio/db/prosite.rb 206 def positive 207 statistics['POSITIVE'] 208 end
Returns
Source
# File lib/bio/db/prosite.rb 211 def positive_hits 212 positive.first 213 end
Returns
Source
# File lib/bio/db/prosite.rb 216 def positive_sequences 217 positive.last 218 end
Returns
Source
# File lib/bio/db/prosite.rb 176 def release 177 statistics['RELEASE'] 178 end
Returns
Source
# File lib/bio/db/prosite.rb 126 def ru 127 field_fetch('RU') 128 end
RU Rule (>=0 per entry)
RU Rule_Description. The rule is described in ordinary English and is free-format.
Returns
Source
# File lib/bio/db/prosite.rb 312 def site 313 if comment['SITE'] 314 num, desc = comment['SITE'].split(',') 315 end 316 return [num.to_i, desc] 317 end
Returns
Source
# File lib/bio/db/prosite.rb 320 def skip_flag 321 if comment['SKIP-FLAG'] == 'TRUE' 322 return true 323 end 324 end
Returns
Source
# File lib/bio/db/prosite.rb 181 def swissprot_release_number 182 release.first 183 end
Returns
Source
# File lib/bio/db/prosite.rb 186 def swissprot_release_sequences 187 release.last 188 end
Returns
Source
# File lib/bio/db/prosite.rb 288 def taxon_range(expand = nil) 289 range = comment['TAXO-RANGE'] 290 if range and expand 291 expand = [] 292 range.scan(/./) do |x| 293 case x 294 when 'A'; expand.push('archaebacteria') 295 when 'B'; expand.push('bacteriophages') 296 when 'E'; expand.push('eukaryotes') 297 when 'P'; expand.push('prokaryotes') 298 when 'V'; expand.push('eukaryotic viruses') 299 end 300 end 301 range = expand 302 end 303 return range 304 end
Returns
Source
# File lib/bio/db/prosite.rb 196 def total_hits 197 total.first 198 end
Returns
Source
# File lib/bio/db/prosite.rb 201 def total_sequences 202 total.last 203 end
Returns
Source
# File lib/bio/db/prosite.rb 221 def unknown 222 statistics['UNKNOWN'] 223 end
Returns
Source
# File lib/bio/db/prosite.rb 226 def unknown_hits 227 unknown.first 228 end
Returns
Source
# File lib/bio/db/prosite.rb 231 def unknown_sequences 232 unknown.last 233 end
Returns