class Bio::UniProtKB

Description

Parser class for UniProtKB/SwissProt and TrEMBL database entry.

See the UniProtKB document files and manuals.

Examples

str = File.read("p53_human.swiss")
obj = Bio::UniProtKB.new(str)
obj.entry_id #=> "P53_HUMAN"

References

Public Instance Methods

aalen()
Alias for: sequence_length
aaseq()
Alias for: seq
cc(topic = nil) click to toggle source

returns contents in the CC lines.

returns an object of contents in the TOPIC.

returns contents of the “ALTERNATIVE PRODUCTS”.

  • Bio::UniProtKB#cc(‘ALTERNATIVE PRODUCTS’) -> Hash

    {'Event' => str, 
     'Named isoforms' => int,  
     'Comment' => str,
     'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
    
    CC   -!- ALTERNATIVE PRODUCTS:
    CC       Event=Alternative splicing; Named isoforms=15;
    ...
    CC         placentae isoforms. All tissues differentially splice exon 13;
    CC       Name=A; Synonyms=no del;
    CC         IsoId=P15529-1; Sequence=Displayed;

returns contents of the “DATABASE”.

  • Bio::UniProtKB#cc(‘DATABASE’) -> Array

    [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
    
    CC   -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].

returns contents of the “MASS SPECTROMETRY”.

  • Bio::UniProtKB#cc(‘MASS SPECTROMETRY’) -> Array

    [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
    
    CC   -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].

CC lines (>=0, optional)

CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
CC       IN LIVER, KIDNEY, LUNG AND BRAIN.

CC   -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
CC       SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.

See also www.expasy.org/sprot/userman.html#CC_line

    # File lib/bio/db/embl/uniprotkb.rb
785 def cc(topic = nil)
786   unless @data['CC']
787     cc  = Hash.new
788     comment_border= '-' * (77 - 4 + 1)
789     dlm = /-!- /
790 
791     # 12KD_MYCSM has no CC lines.
792     return cc if get('CC').size == 0
793     
794     cc_raw = fetch('CC')
795 
796     # Removing the copyright statement.
797     cc_raw.sub!(/ *---.+---/m, '')
798 
799     # Not any CC Lines without the copyright statement.
800     return cc if cc_raw == ''
801 
802     begin
803       cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
804       _ = copyright #dummy for suppress "assigned but unused variable"
805       cc_raw = cc_raw.sub(dlm,'')
806       cc_raw.split(dlm).each do |tmp|
807         tmp = tmp.strip
808 
809         if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
810           key  = $1
811           body = $2
812           body.gsub!(/- (?!AND)/,'-')
813           body.strip!
814           unless cc[key]
815             cc[key] = [body]
816           else
817             cc[key].push(body)
818           end
819         else
820           raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
821                  '', get('CC'),''].join("\n")
822         end
823       end
824     rescue NameError
825       if fetch('CC') == ''
826         return {}
827       else
828         raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
829                "\n'#{self.get('CC')}'\n", "(#{$!})"].join
830       end
831     rescue NoMethodError
832     end
833     
834     @data['CC'] = cc
835   end
836 
837 
838   case topic
839   when 'ALLERGEN'
840     return @data['CC'][topic]
841   when 'ALTERNATIVE PRODUCTS'
842     return cc_alternative_products(@data['CC'][topic])
843   when 'BIOPHYSICOCHEMICAL PROPERTIES'
844     return cc_biophysiochemical_properties(@data['CC'][topic])
845   when 'BIOTECHNOLOGY'
846     return @data['CC'][topic]
847   when 'CATALITIC ACTIVITY'
848     return cc_catalytic_activity(@data['CC'][topic])
849   when 'CAUTION'
850     return cc_caution(@data['CC'][topic])
851   when 'COFACTOR'
852     return @data['CC'][topic]
853   when 'DEVELOPMENTAL STAGE'
854     return @data['CC'][topic].join('')
855   when 'DISEASE'
856     return @data['CC'][topic].join('')
857   when 'DOMAIN'
858     return @data['CC'][topic]
859   when 'ENZYME REGULATION'
860     return @data['CC'][topic].join('')
861   when 'FUNCTION'
862     return @data['CC'][topic].join('')
863   when 'INDUCTION'
864     return @data['CC'][topic].join('')
865   when 'INTERACTION'
866     return cc_interaction(@data['CC'][topic])
867   when 'MASS SPECTROMETRY'
868     return cc_mass_spectrometry(@data['CC'][topic])
869   when 'MISCELLANEOUS'
870     return @data['CC'][topic]
871   when 'PATHWAY'
872     return cc_pathway(@data['CC'][topic])
873   when 'PHARMACEUTICAL'
874     return @data['CC'][topic]
875   when 'POLYMORPHISM'
876     return @data['CC'][topic]
877   when 'PTM'
878     return @data['CC'][topic]
879   when 'RNA EDITING'
880     return cc_rna_editing(@data['CC'][topic])
881   when 'SIMILARITY'
882     return @data['CC'][topic]
883   when 'SUBCELLULAR LOCATION'
884     return cc_subcellular_location(@data['CC'][topic])
885   when 'SUBUNIT'
886     return @data['CC'][topic]
887   when 'TISSUE SPECIFICITY'
888     return @data['CC'][topic]
889   when 'TOXIC DOSE'
890     return @data['CC'][topic]
891   when 'WEB RESOURCE'
892     return cc_web_resource(@data['CC'][topic])
893   when 'DATABASE'
894     # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
895     tmp = Array.new
896     db = @data['CC']['DATABASE']
897     return db unless db
898 
899     db.each do |e|
900       db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
901       e.sub(/.$/,'').split(/;/).each do |line|
902         case line
903         when /NAME=(.+)/
904           db['NAME'] = $1
905         when /NOTE=(.+)/
906           db['NOTE'] = $1
907         when /WWW="(.+)"/
908           db['WWW'] = $1
909         when /FTP="(.+)"/
910           db['FTP'] = $1
911         end 
912       end
913       tmp.push(db)
914     end
915     return tmp
916   when nil
917     return @data['CC']
918   else
919     return @data['CC'][topic]
920   end
921 end
de() click to toggle source

Returns an Array (for new format since rel 14) or a String (for old format before rel 14) for the DE line.

Calls superclass method Bio::EMBLDB::Common#de
    # File lib/bio/db/embl/uniprotkb.rb
333 def de
334   return @data['DE'] if @data['DE']
335   parsed_de_line = parse_DE_line_rel14(get('DE'))
336   case parsed_de_line
337   when Array # new format since rel14
338     @data['DE'] ||= parsed_de_line
339   else
340     super
341   end
342   @data['DE']
343 end
dr(key = nil) click to toggle source

Bio::UniProtKB#dr

     # File lib/bio/db/embl/uniprotkb.rb
1142 def dr(key = nil)
1143   unless key
1144     embl_dr
1145   else
1146     (embl_dr[key] or []).map {|x|
1147       {'Accession' => x[0],
1148        'Version' => x[1],
1149        ' ' => x[2],
1150        'Molecular Type' => x[3]}
1151     }
1152   end
1153 end
Also aliased as: embl_dr
dt(key = nil) click to toggle source

returns a Hash of information in the DT lines.

hash keys: 
  ['created', 'sequence', 'annotation']

Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is changed, and the word “annotation” is no longer used in DT lines. Despite the change, the word “annotation” is still used for keeping compatibility.

returns a String of information in the DT lines by a given key.

DT Line; date (3/entry)

DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
DT DD-MMM-YYY (sequence version NN)
DT DD-MMM-YYY (entry version NN)

The format have been changed in UniProtKB release 7.0 of 07-Feb-2006. Below is the older format.

Old format of DT Line; date (3/entry)

DT DD-MMM-YYY (rel. NN, Created)
DT DD-MMM-YYY (rel. NN, Last sequence update)
DT DD-MMM-YYY (rel. NN, Last annotation update)
    # File lib/bio/db/embl/uniprotkb.rb
157 def dt(key = nil)
158   return dt[key] if key
159   return @data['DT'] if @data['DT']
160 
161   part = self.get('DT').split(/\n/)
162   @data['DT'] = {
163     'created'    => part[0].sub(/\w{2}   /,'').strip,
164     'sequence'   => part[1].sub(/\w{2}   /,'').strip,
165     'annotation' => part[2].sub(/\w{2}   /,'').strip
166   }
167 end
embl_dr(key = nil)

Backup Bio::EMBLDB#dr as embl_dr

Alias for: dr
entry()
Alias for: entry_id
entry_id() click to toggle source

returns a ENTRY_NAME in the ID line.

    # File lib/bio/db/embl/uniprotkb.rb
 98 def entry_id
 99   id_line('ENTRY_NAME')
100 end
Also aliased as: entry_name, entry
entry_name()
Alias for: entry_id
ft(feature_key = nil) click to toggle source

returns contents in the feature table.

Examples

sp = Bio::UniProtKB.new(entry)
ft = sp.ft
ft.class #=> Hash
ft.keys.each do |feature_key|
  ft[feature_key].each do |feature|
    feature['From'] #=> '1'
    feature['To']   #=> '21'
    feature['Description'] #=> ''
    feature['FTId'] #=> ''
    feature['diff'] #=> []
    feature['original'] #=> [feature_key, '1', '21', '', '']
  end
end
  • Bio::UniProtKB#ft -> Hash

    {FEATURE_KEY => [{'From' => int, 'To' => int, 
                      'Description' => aStr, 'FTId' => aStr,
                      'diff' => [original_residues, changed_residues],
                      'original' => aAry }],...}

returns an Array of the information about the feature_name in the feature table.

FT Line; feature table data (>=0, optional)

Col     Data item
-----   -----------------
 1- 2   FT
 6-13   Feature name 
15-20   `FROM' endpoint
22-27   `TO' endpoint
35-75   Description (>=0 per key)
-----   -----------------

Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ‘<’, ‘>’ or ‘?’. (c.f. ‘<1’, ‘?42’)

See also www.expasy.org/sprot/userman.html#FT_line

     # File lib/bio/db/embl/uniprotkb.rb
1207 def ft(feature_key = nil)
1208   return ft[feature_key] if feature_key
1209   return @data['FT'] if @data['FT']
1210 
1211   ftstr = get('FT')
1212   ftlines = ftstr.split("\n")
1213   for i in 0..10 do
1214     if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ ftlines[i] &&
1215        /^FT +\/([^\s\=]+)(?:\=(\")?(.+)(\")?)?\s*$/ =~ ftlines[i+1] then
1216       fmt_2019_11 = true
1217       break #for i
1218     end
1219   end #for i
1220 
1221   hash = if fmt_2019_11 then
1222            ft_2019_11_parser(ftlines)
1223          else
1224            ft_legacy_parser(ftlines)
1225          end
1226   @data['FT'] = hash
1227 end
gene_name() click to toggle source

returns a String of the first gene name in the GN line.

    # File lib/bio/db/embl/uniprotkb.rb
448 def gene_name
449   (x = self.gene_names) ? x.first : nil
450 end
gene_names() click to toggle source

returns a Array of gene names in the GN line.

    # File lib/bio/db/embl/uniprotkb.rb
437 def gene_names
438   gn # set @data['GN'] if it hasn't been already done
439   if @data['GN'].first.class == Hash then
440     @data['GN'].collect { |element| element[:name] }
441   else
442     @data['GN'].first
443   end
444 end
gn() click to toggle source

returns gene names in the GN line.

New UniProt/SwissProt format:

where <gene record> is:

{ :name => '...', 
  :synonyms => [ 's1', 's2', ... ],
  :loci   => [ 'l1', 'l2', ... ],
  :orfs     => [ 'o1', 'o2', ... ] 
}

Old format:

GN Line: Gene name(s) (>=0, optional)

    # File lib/bio/db/embl/uniprotkb.rb
361 def gn
362   unless @data['GN']
363     case fetch('GN')
364     when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
365       @data['GN'] = gn_uniprot_parser
366     else
367       @data['GN'] = gn_old_parser
368     end
369   end
370   @data['GN']
371 end
hi() click to toggle source

The HI line

Bio::UniProtKB#hi #=> hash

    # File lib/bio/db/embl/uniprotkb.rb
701 def hi
702   unless @data['HI']
703     @data['HI'] = []
704     fetch('HI').split(/\. /).each do |hlist|
705       hash = {'Category' => '',  'Keywords' => [], 'Keyword' => ''}
706       hash['Category'], hash['Keywords'] = hlist.split(': ')
707       hash['Keywords'] = hash['Keywords'].split('; ')
708       hash['Keyword'] = hash['Keywords'].pop
709       hash['Keyword'].sub!(/\.$/, '')
710       @data['HI'] << hash
711     end
712   end
713   @data['HI']
714 end
id_line(key = nil) click to toggle source

returns a Hash of the ID line.

returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]

ID Line (since UniProtKB release 9.0 of 31-Oct-2006)

ID   P53_HUMAN               Reviewed;         393 AA.
#"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."

Examples

obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed", 
                  "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}

obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"

ID Line (older style)

ID   P53_HUMAN      STANDARD;      PRT;   393 AA.
#"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."

Examples

obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", 
                  "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}

obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
   # File lib/bio/db/embl/uniprotkb.rb
73 def id_line(key = nil)
74   return id_line[key] if key
75   return @data['ID'] if @data['ID']
76 
77   part = @orig['ID'].split(/ +/)         
78   if part[4].to_s.chomp == 'AA.' then
79     # after UniProtKB release 9.0 of 31-Oct-2006
80     # (http://www.uniprot.org/docs/sp_news.htm)
81     molecule_type   = nil
82     sequence_length = part[3].to_i
83   else
84     molecule_type   = part[3].sub(/;/,'')
85     sequence_length = part[4].to_i
86   end
87   @data['ID'] = {
88     'ENTRY_NAME'      => part[1],
89     'DATA_CLASS'      => part[2].sub(/;/,''),
90     'MOLECULE_TYPE'   => molecule_type,
91     'SEQUENCE_LENGTH' => sequence_length
92   }
93 end
molecule() click to toggle source

returns a MOLECULE_TYPE in the ID line.

A short-cut for Bio::UniProtKB#id_line(‘MOLECULE_TYPE’).

    # File lib/bio/db/embl/uniprotkb.rb
108 def molecule
109   id_line('MOLECULE_TYPE')
110 end
Also aliased as: molecule_type
molecule_type()
Alias for: molecule
oh() click to toggle source

The OH Line;

OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line

    # File lib/bio/db/embl/uniprotkb.rb
531 def oh
532   unless @data['OH']
533     @data['OH'] = fetch('OH').split("\. ").map {|x|
534       if x =~ /NCBI_TaxID=(\d+);/
535         taxid = $1
536       else
537         raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
538                               $!, "\n", get('OH'), "\n"].join
539         
540       end
541       if x =~ /NCBI_TaxID=\d+; (.+)/ 
542         host_name = $1
543         host_name.sub!(/\.$/, '')
544       else
545         host_name = nil
546       end
547       {'NCBI_TaxID' => taxid, 'HostName' => host_name}
548     }
549   end
550   @data['OH']
551 end
os(num = nil) click to toggle source

returns a Array of Hashs or a String of the OS line when a key given.

  • Bio::EMBLDB#os -> Array

[{'name' => '(Human)', 'os' => 'Homo sapiens'}, 
 {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
{'name' => "(Human)", 'os' => 'Homo sapiens'}

OS Line; organism species (>=1)

OS   Genus species (name).
OS   Genus species (name0) (name1).
OS   Genus species (name0) (name1).
OS   Genus species (name0), G s0 (name0), and G s (name0) (name1).
OS   Homo sapiens (Human), and Rarrus norveticus (Rat)
OS   Hippotis sp. Clark and Watts 825.
OS   unknown cyperaceous sp.
    # File lib/bio/db/embl/uniprotkb.rb
470 def os(num = nil)
471   unless @data['OS']
472     os = Array.new
473     fetch('OS').split(/, and|, /).each do |tmp|
474       if tmp =~ /(\w+ *[\w \:\'\+\-\.]+[\w\.])/
475         org = $1
476         tmp =~ /(\(.+\))/ 
477         os.push({'name' => $1, 'os' => org})
478       else
479         raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
480       end
481     end
482     @data['OS'] = os
483   end
484 
485   if num
486     # EX. "Trifolium repens (white clover)"
487     return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
488   else
489     return @data['OS']
490   end
491 end
ox() click to toggle source

returns a Hash of oraganism taxonomy cross-references.

OX Line; organism taxonomy cross-reference (>=1 per entry)

OX   NCBI_TaxID=1234;
OX   NCBI_TaxID=1234, 2345, 3456, 4567;
    # File lib/bio/db/embl/uniprotkb.rb
514 def ox
515   unless @data['OX']
516     tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
517     hsh = Hash.new
518     tmp.each do |e|
519       db,refs = e.split(/=/)
520       hsh[db] = refs.split(/, */)
521     end
522     @data['OX'] = hsh
523   end
524   return @data['OX']
525 end
protein_name() click to toggle source

returns the proposed official name of the protein. Returns a String.

Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full name which is taken from “RecName: Full=” or “SubName: Full=” line normally in the beginning of the DE lines. Unlike parser for old format, no special treatments for fragment or precursor.

For old format, the method parses the DE lines and returns the protein name as a String.

DE Line; description (>=1)

"DE #{OFFICIAL_NAME} (#{SYNONYM})"
"DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
OFFICIAL_NAME  1/entry
SYNONYM        >=0
CONTEINS       >=0
    # File lib/bio/db/embl/uniprotkb.rb
250 def protein_name
251   parsed_de_line = self.de
252   if parsed_de_line.kind_of?(Array) then
253     # since UniProtKB release 14.0 of 22-Jul-2008
254     name = nil
255     parsed_de_line.each do |a|
256       case a[0]
257       when 'RecName', 'SubName'
258         if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
259           name = name_pair[1]
260           break
261         end
262       end
263     end
264     name = name.to_s
265   else
266     # old format (before Rel. 13.x)
267     name = ""
268     if de_line = fetch('DE') then
269       str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
270       name = str[/^[^(]*/].strip
271       name << ' (Fragment)' if str =~ /fragment/i
272     end
273   end
274   return name
275 end
ref() click to toggle source

returns contents in the R lines.

where <reference information Hash> is:

{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}

R Lines

  • RN RC RP RX RA RT RL RG

    # File lib/bio/db/embl/uniprotkb.rb
567 def ref
568   unless @data['R']
569     @data['R'] = [get('R').split(/\nRN   /)].flatten.map { |str|
570       hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
571              'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
572       str = 'RN   ' + str unless /^RN   / =~ str
573 
574       str.split("\n").each do |line|
575         if /^(R[NPXARLCTG])   (.+)/ =~ line
576           hash[$1] += $2 + ' '
577         else
578           raise "Invalid format in R lines, \n[#{line}]\n"
579         end
580       end
581 
582       hash['RN'] = set_RN(hash['RN'])
583       hash['RC'] = set_RC(hash['RC'])
584       hash['RP'] = set_RP(hash['RP'])
585       hash['RX'] = set_RX(hash['RX'])
586       hash['RA'] = set_RA(hash['RA'])
587       hash['RT'] = set_RT(hash['RT'])
588       hash['RL'] = set_RL(hash['RL'])
589       hash['RG'] = set_RG(hash['RG'])
590 
591       hash
592     }
593 
594   end
595   @data['R']
596 end
references() click to toggle source

returns Bio::Reference object from Bio::EMBLDB::Common#ref.

    # File lib/bio/db/embl/uniprotkb.rb
661 def references
662   unless @data['references']
663     ary = self.ref.map {|ent|
664       hash = Hash.new('')
665       ent.each {|key, value|
666         case key
667         when 'RA'
668           hash['authors'] = value.split(/, /)
669         when 'RT'
670           hash['title'] = value
671         when 'RL'
672           if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
673             hash['journal'] = $1
674             hash['volume']  = $2
675             hash['issue']   = $3
676             hash['pages']   = $4
677             hash['year']    = $5
678           else
679             hash['journal'] = value
680           end
681         when 'RX'  # PUBMED, MEDLINE, DOI
682           value.each do |tag, xref|
683             hash[ tag.downcase ]  = xref
684           end
685         end
686       }
687       Reference.new(hash)
688     }
689     @data['references'] = References.new(ary)
690   end
691   @data['references']
692 end
seq() click to toggle source

returns a Bio::Sequence::AA of the amino acid sequence.

blank Line; sequence data (>=1)

     # File lib/bio/db/embl/uniprotkb.rb
1431 def seq
1432   unless @data['']
1433     @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
1434   end
1435   return @data['']
1436 end
Also aliased as: aaseq
sequence_length() click to toggle source

returns a SEQUENCE_LENGTH in the ID line.

A short-cut for Bio::UniProtKB#id_line(‘SEQUENCE_LENGHT’).

    # File lib/bio/db/embl/uniprotkb.rb
117 def sequence_length
118   id_line('SEQUENCE_LENGTH')
119 end
Also aliased as: aalen
set_RN(data) click to toggle source
    # File lib/bio/db/embl/uniprotkb.rb
598 def set_RN(data)
599   data.strip
600 end
sq(key = nil) click to toggle source

returns a Hash of conteins in the SQ lines.

  • Bio::UniProtKBL#sq -> hsh

returns a value of a key given in the SQ lines.

  • Bio::UniProtKBL#sq(key) -> int or str

  • Keys: [‘MW’, ‘mw’, ‘molecular’, ‘weight’, ‘aalen’, ‘len’, ‘length’,

    'CRC64']

SQ Line; sequence header (1/entry)

SQ   SEQUENCE   233 AA;  25630 MW;  146A1B48A1475C86 CRC64;
SQ   SEQUENCE  \d+ AA; \d+ MW;  [0-9A-Z]+ CRC64;

MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).

     # File lib/bio/db/embl/uniprotkb.rb
1403 def sq(key = nil)
1404   unless @data['SQ']
1405     if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
1406       @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
1407     else
1408       raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
1409     end
1410   end
1411 
1412   if key
1413     case key
1414     when /mw/, /molecular/, /weight/
1415       @data['SQ']['MW']
1416     when /len/, /length/, /AA/
1417       @data['SQ']['aalen']
1418     else
1419       @data['SQ'][key]
1420     end
1421   else 
1422     @data['SQ']
1423   end
1424 end
synonyms() click to toggle source

returns synonyms (unofficial and/or alternative names). Returns an Array containing String objects.

Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full or short names which are taken from “RecName: Short=”, “RecName: EC=”, and AltName lines, except after “Contains:” or “Includes:”. For keeping compatibility with old format parser, “RecName: EC=N.N.N.N” is reported as “EC N.N.N.N”. In addition, to prevent confusion, “Allergen=” and “CD_antigen=” prefixes are added for the corresponding fields.

For old format, the method parses the DE lines and returns synonyms. synonyms are each placed in () following the official name on the DE line.

    # File lib/bio/db/embl/uniprotkb.rb
291 def synonyms
292   ary = Array.new
293   parsed_de_line = self.de
294   if parsed_de_line.kind_of?(Array) then
295     # since UniProtKB release 14.0 of 22-Jul-2008
296     parsed_de_line.each do |a|
297       case a[0]
298       when 'Includes', 'Contains'
299         break #the each loop
300       when 'RecName', 'SubName', 'AltName'
301         a[1..-1].each do |b|
302           if name = b[1] and b[1] != self.protein_name then
303             case b[0]
304             when 'EC'
305               name = "EC " + b[1]
306             when 'Allergen', 'CD_antigen'
307               name = b[0] + '=' + b[1]
308             else
309               name = b[1]
310             end
311             ary.push name
312           end
313         end
314       end #case a[0]
315     end #parsed_de_line.each
316   else
317     # old format (before Rel. 13.x)
318     if de_line = fetch('DE') then
319       line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ].  That's the "contains" part
320     line.scan(/\([^)]+/) do |synonym| 
321       unless synonym =~ /fragment/i then 
322         ary << synonym[1..-1].strip # index to remove the leading (
323       end
324       end
325     end
326   end
327   return ary
328 end