class Bio::UniProtKB
Description¶ ↑
Parser class for UniProtKB/SwissProt and TrEMBL
database entry.
See the UniProtKB
document files and manuals.
Examples¶ ↑
str = File.read("p53_human.swiss") obj = Bio::UniProtKB.new(str) obj.entry_id #=> "P53_HUMAN"
References
¶ ↑
-
The
UniProt
Knowledgebase (UniProtKB
) www.uniprot.org/help/uniprotkb -
The Universal Protein Resource (
UniProt
) uniprot.org/ -
The UniProtKB/SwissProt/TrEMBL User Manual www.uniprot.org/docs/userman.html
Public Instance Methods
returns contents in the CC lines.
-
Bio::UniProtKB#cc
-> Hash
returns an object of contents in the TOPIC.
-
Bio::UniProtKB#cc(TOPIC)
-> Array w/in Hash, Hash
returns contents of the “ALTERNATIVE PRODUCTS”.
-
Bio::UniProtKB#cc
(‘ALTERNATIVE PRODUCTS’) -> Hash{'Event' => str, 'Named isoforms' => int, 'Comment' => str, 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]} CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing; Named isoforms=15; ... CC placentae isoforms. All tissues differentially splice exon 13; CC Name=A; Synonyms=no del; CC IsoId=P15529-1; Sequence=Displayed;
returns contents of the “DATABASE”.
-
Bio::UniProtKB#cc
(‘DATABASE’) -> Array[{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...] CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
returns contents of the “MASS SPECTROMETRY”.
-
Bio::UniProtKB#cc
(‘MASS SPECTROMETRY’) -> Array[{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...] CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
CC lines (>=0, optional)¶ ↑
CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT CC IN LIVER, KIDNEY, LUNG AND BRAIN. CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
See also www.expasy.org/sprot/userman.html#CC_line
# File lib/bio/db/embl/uniprotkb.rb 806 def cc(topic = nil) 807 unless @data['CC'] 808 cc = Hash.new 809 comment_border= '-' * (77 - 4 + 1) 810 dlm = /-!- / 811 812 # 12KD_MYCSM has no CC lines. 813 return cc if get('CC').size == 0 814 815 cc_raw = fetch('CC') 816 817 # Removing the copyright statement. 818 cc_raw.sub!(/ *---.+---/m, '') 819 820 # Not any CC Lines without the copyright statement. 821 return cc if cc_raw == '' 822 823 begin 824 cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0] 825 _ = copyright #dummy for suppress "assigned but unused variable" 826 cc_raw = cc_raw.sub(dlm,'') 827 cc_raw.split(dlm).each do |tmp| 828 tmp = tmp.strip 829 830 if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp 831 key = $1 832 body = $2 833 body.gsub!(/- (?!AND)/,'-') 834 body.strip! 835 unless cc[key] 836 cc[key] = [body] 837 else 838 cc[key].push(body) 839 end 840 else 841 raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"', 842 '', get('CC'),''].join("\n") 843 end 844 end 845 rescue NameError 846 if fetch('CC') == '' 847 return {} 848 else 849 raise ["Error: Invalid CC Lines: [#{entry_id}]: ", 850 "\n'#{self.get('CC')}'\n", "(#{$!})"].join 851 end 852 rescue NoMethodError 853 end 854 855 @data['CC'] = cc 856 end 857 858 859 case topic 860 when 'ALLERGEN' 861 return @data['CC'][topic] 862 when 'ALTERNATIVE PRODUCTS' 863 return cc_alternative_products(@data['CC'][topic]) 864 when 'BIOPHYSICOCHEMICAL PROPERTIES' 865 return cc_biophysiochemical_properties(@data['CC'][topic]) 866 when 'BIOTECHNOLOGY' 867 return @data['CC'][topic] 868 when 'CATALITIC ACTIVITY' 869 return cc_catalytic_activity(@data['CC'][topic]) 870 when 'CAUTION' 871 return cc_caution(@data['CC'][topic]) 872 when 'COFACTOR' 873 return @data['CC'][topic] 874 when 'DEVELOPMENTAL STAGE' 875 return @data['CC'][topic].join('') 876 when 'DISEASE' 877 return @data['CC'][topic].join('') 878 when 'DOMAIN' 879 return @data['CC'][topic] 880 when 'ENZYME REGULATION' 881 return @data['CC'][topic].join('') 882 when 'FUNCTION' 883 return @data['CC'][topic].join('') 884 when 'INDUCTION' 885 return @data['CC'][topic].join('') 886 when 'INTERACTION' 887 return cc_interaction(@data['CC'][topic]) 888 when 'MASS SPECTROMETRY' 889 return cc_mass_spectrometry(@data['CC'][topic]) 890 when 'MISCELLANEOUS' 891 return @data['CC'][topic] 892 when 'PATHWAY' 893 return cc_pathway(@data['CC'][topic]) 894 when 'PHARMACEUTICAL' 895 return @data['CC'][topic] 896 when 'POLYMORPHISM' 897 return @data['CC'][topic] 898 when 'PTM' 899 return @data['CC'][topic] 900 when 'RNA EDITING' 901 return cc_rna_editing(@data['CC'][topic]) 902 when 'SIMILARITY' 903 return @data['CC'][topic] 904 when 'SUBCELLULAR LOCATION' 905 return cc_subcellular_location(@data['CC'][topic]) 906 when 'SUBUNIT' 907 return @data['CC'][topic] 908 when 'TISSUE SPECIFICITY' 909 return @data['CC'][topic] 910 when 'TOXIC DOSE' 911 return @data['CC'][topic] 912 when 'WEB RESOURCE' 913 return cc_web_resource(@data['CC'][topic]) 914 when 'DATABASE' 915 # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"]. 916 tmp = Array.new 917 db = @data['CC']['DATABASE'] 918 return db unless db 919 920 db.each do |e| 921 db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil} 922 e.sub(/.$/,'').split(/;/).each do |line| 923 case line 924 when /NAME=(.+)/ 925 db['NAME'] = $1 926 when /NOTE=(.+)/ 927 db['NOTE'] = $1 928 when /WWW="(.+)"/ 929 db['WWW'] = $1 930 when /FTP="(.+)"/ 931 db['FTP'] = $1 932 end 933 end 934 tmp.push(db) 935 end 936 return tmp 937 when nil 938 return @data['CC'] 939 else 940 return @data['CC'][topic] 941 end 942 end
Returns an Array (for new format since rel 14) or a String (for old format before rel 14) for the DE line.
Bio::EMBLDB::Common#de
# File lib/bio/db/embl/uniprotkb.rb 333 def de 334 return @data['DE'] if @data['DE'] 335 parsed_de_line = parse_DE_line_rel14(get('DE')) 336 case parsed_de_line 337 when Array # new format since rel14 338 @data['DE'] ||= parsed_de_line 339 else 340 super 341 end 342 @data['DE'] 343 end
# File lib/bio/db/embl/uniprotkb.rb 1171 def dr(key = nil) 1172 unless key 1173 embl_dr 1174 else 1175 (embl_dr[key] or []).map {|x| 1176 {'Accession' => x[0], 1177 'Version' => x[1], 1178 ' ' => x[2], 1179 'Molecular Type' => x[3]} 1180 } 1181 end 1182 end
returns a Hash of information in the DT lines.
hash keys: ['created', 'sequence', 'annotation']
Since UniProtKB
release 7.0 of 07-Feb-2006, the DT line format is changed, and the word “annotation” is no longer used in DT lines. Despite the change, the word “annotation” is still used for keeping compatibility.
returns a String of information in the DT lines by a given key.
DT Line; date (3/entry)¶ ↑
DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.) DT DD-MMM-YYY (sequence version NN) DT DD-MMM-YYY (entry version NN)
The format have been changed in UniProtKB
release 7.0 of 07-Feb-2006. Below is the older format.
Old format of DT Line; date (3/entry)¶ ↑
DT DD-MMM-YYY (rel. NN, Created) DT DD-MMM-YYY (rel. NN, Last sequence update) DT DD-MMM-YYY (rel. NN, Last annotation update)
# File lib/bio/db/embl/uniprotkb.rb 157 def dt(key = nil) 158 return dt[key] if key 159 return @data['DT'] if @data['DT'] 160 161 part = self.get('DT').split(/\n/) 162 @data['DT'] = { 163 'created' => part[0].sub(/\w{2} /,'').strip, 164 'sequence' => part[1].sub(/\w{2} /,'').strip, 165 'annotation' => part[2].sub(/\w{2} /,'').strip 166 } 167 end
returns a ENTRY_NAME in the ID line.
# File lib/bio/db/embl/uniprotkb.rb 98 def entry_id 99 id_line('ENTRY_NAME') 100 end
returns contents in the feature table.
Examples¶ ↑
sp = Bio::UniProtKB.new(entry) ft = sp.ft ft.class #=> Hash ft.keys.each do |feature_key| ft[feature_key].each do |feature| feature['From'] #=> '1' feature['To'] #=> '21' feature['Description'] #=> '' feature['FTId'] #=> '' feature['diff'] #=> [] feature['original'] #=> [feature_key, '1', '21', '', ''] end end
-
Bio::UniProtKB#ft
-> Hash{FEATURE_KEY => [{'From' => int, 'To' => int, 'Description' => aStr, 'FTId' => aStr, 'diff' => [original_residues, changed_residues], 'original' => aAry }],...}
returns an Array of the information about the feature_name in the feature table.
-
Bio::UniProtKB#ft(feature_name)
-> Array of Hash[{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
FT Line; feature table data (>=0, optional)¶ ↑
Col Data item ----- ----------------- 1- 2 FT 6-13 Feature name 15-20 `FROM' endpoint 22-27 `TO' endpoint 35-75 Description (>=0 per key) ----- -----------------
Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ‘<’, ‘>’ or ‘?’. (c.f. ‘<1’, ‘?42’)
See also www.expasy.org/sprot/userman.html#FT_line
# File lib/bio/db/embl/uniprotkb.rb 1236 def ft(feature_key = nil) 1237 return ft[feature_key] if feature_key 1238 return @data['FT'] if @data['FT'] 1239 1240 ftstr = get('FT') 1241 ftlines = ftstr.split("\n") 1242 for i in 0..10 do 1243 if /^FT +([^\s]+) +(([^\s]+)\:)?([\<\?]?[0-9]+|\?)(?:\.\.([\>\?]?[0-9]+|\?))?\s*$/ =~ ftlines[i] && 1244 /^FT +\/([^\s\=]+)(?:\=(\")?(.+)(\")?)?\s*$/ =~ ftlines[i+1] then 1245 fmt_2019_11 = true 1246 break #for i 1247 end 1248 end #for i 1249 1250 hash = if fmt_2019_11 then 1251 ft_2019_11_parser(ftlines) 1252 else 1253 ft_legacy_parser(ftlines) 1254 end 1255 @data['FT'] = hash 1256 end
returns a String of the first gene name in the GN line.
# File lib/bio/db/embl/uniprotkb.rb 448 def gene_name 449 (x = self.gene_names) ? x.first : nil 450 end
returns a Array of gene names in the GN line.
# File lib/bio/db/embl/uniprotkb.rb 437 def gene_names 438 gn # set @data['GN'] if it hasn't been already done 439 if @data['GN'].first.class == Hash then 440 @data['GN'].collect { |element| element[:name] } 441 else 442 @data['GN'].first 443 end 444 end
returns gene names in the GN line.
New UniProt/SwissProt format:
-
Bio::UniProtKB#gn
-> [ <gene record>* ]
where <gene record> is:
{ :name => '...', :synonyms => [ 's1', 's2', ... ], :loci => [ 'l1', 'l2', ... ], :orfs => [ 'o1', 'o2', ... ] }
Old format:
-
Bio::UniProtKB#gn
-> Array # AND -
Bio::UniProtKB#gn[0]
-> Array # OR
GN Line: Gene name(s) (>=0, optional)¶ ↑
# File lib/bio/db/embl/uniprotkb.rb 361 def gn 362 unless @data['GN'] 363 case fetch('GN') 364 when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/ 365 @data['GN'] = gn_uniprot_parser 366 else 367 @data['GN'] = gn_old_parser 368 end 369 end 370 @data['GN'] 371 end
The HI line¶ ↑
Bio::UniProtKB#hi
#=> hash
# File lib/bio/db/embl/uniprotkb.rb 722 def hi 723 unless @data['HI'] 724 @data['HI'] = [] 725 fetch('HI').split(/\. /).each do |hlist| 726 hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''} 727 hash['Category'], hash['Keywords'] = hlist.split(': ') 728 hash['Keywords'] = hash['Keywords'].split('; ') 729 hash['Keyword'] = hash['Keywords'].pop 730 hash['Keyword'].sub!(/\.$/, '') 731 @data['HI'] << hash 732 end 733 end 734 @data['HI'] 735 end
returns a Hash of the ID line.
returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]
ID Line (since UniProtKB
release 9.0 of 31-Oct-2006)¶ ↑
ID P53_HUMAN Reviewed; 393 AA. #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."
Examples¶ ↑
obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil} obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
ID Line (older style)¶ ↑
ID P53_HUMAN STANDARD; PRT; 393 AA. #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
Examples¶ ↑
obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"} obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
# File lib/bio/db/embl/uniprotkb.rb 73 def id_line(key = nil) 74 return id_line[key] if key 75 return @data['ID'] if @data['ID'] 76 77 part = @orig['ID'].split(/ +/) 78 if part[4].to_s.chomp == 'AA.' then 79 # after UniProtKB release 9.0 of 31-Oct-2006 80 # (http://www.uniprot.org/docs/sp_news.htm) 81 molecule_type = nil 82 sequence_length = part[3].to_i 83 else 84 molecule_type = part[3].sub(/;/,'') 85 sequence_length = part[4].to_i 86 end 87 @data['ID'] = { 88 'ENTRY_NAME' => part[1], 89 'DATA_CLASS' => part[2].sub(/;/,''), 90 'MOLECULE_TYPE' => molecule_type, 91 'SEQUENCE_LENGTH' => sequence_length 92 } 93 end
returns a MOLECULE_TYPE in the ID line.
A short-cut for Bio::UniProtKB#id_line
(‘MOLECULE_TYPE’).
# File lib/bio/db/embl/uniprotkb.rb 108 def molecule 109 id_line('MOLECULE_TYPE') 110 end
The OH Line; ¶ ↑
OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line
# File lib/bio/db/embl/uniprotkb.rb 531 def oh 532 unless @data['OH'] 533 oh = [] 534 a = fetch('OH').split(/(NCBI\_TaxID\=)(\d+)(\;)/) 535 t = catch :error do 536 taxid = nil 537 host_name = nil 538 while x = a.shift 539 x = x.to_s.strip 540 case x 541 when '' 542 next 543 when 'NCBI_TaxID=' 544 if taxid then 545 oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name}) 546 taxid = nil 547 host_name = nil 548 end 549 taxid = a.shift 550 throw :error, :missing_semicolon if a.shift != ';' 551 else 552 throw :error, :missing_taxid if host_name 553 host_name = x 554 host_name.sub!(/\.\z/, '') 555 end 556 end #while x... 557 if taxid then 558 oh.push({'NCBI_TaxID' => taxid, 'HostName' => host_name}) 559 elsif host_name then 560 throw :error, :missing_taxid_last 561 end 562 nil 563 end #t = catch... 564 if t then 565 raise ArgumentError, 566 ["Error: Invalid OH line format (#{self.entry_id}):", 567 $!, "\n", get('OH'), "\n"].join 568 end 569 @data['OH'] = oh 570 end 571 @data['OH'] 572 end
returns a Array of Hashs or a String of the OS line when a key given.
-
Bio::EMBLDB#os -> Array
[{'name' => '(Human)', 'os' => 'Homo sapiens'}, {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
-
Bio::EPTR#os -> Hash
{'name' => "(Human)", 'os' => 'Homo sapiens'}
-
Bio::UniProtKB#os[0]
[‘name’] -> “(Human)” -
Bio::EPTR#os(0) -> “Homo sapiens (Human)”
OS Line; organism species (>=1)¶ ↑
OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name0) (name1). OS Homo sapiens (Human), and Rarrus norveticus (Rat) OS Hippotis sp. Clark and Watts 825. OS unknown cyperaceous sp.
# File lib/bio/db/embl/uniprotkb.rb 470 def os(num = nil) 471 unless @data['OS'] 472 os = Array.new 473 fetch('OS').split(/, and|, /).each do |tmp| 474 if tmp =~ /(\w+ *[\w \:\'\+\-\.]+[\w\.])/ 475 org = $1 476 tmp =~ /(\(.+\))/ 477 os.push({'name' => $1, 'os' => org}) 478 else 479 raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n" 480 end 481 end 482 @data['OS'] = os 483 end 484 485 if num 486 # EX. "Trifolium repens (white clover)" 487 return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}" 488 else 489 return @data['OS'] 490 end 491 end
returns a Hash of oraganism taxonomy cross-references.
-
Bio::UniProtKB#ox
-> Hash{'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
OX Line; organism taxonomy cross-reference (>=1 per entry)¶ ↑
OX NCBI_TaxID=1234; OX NCBI_TaxID=1234, 2345, 3456, 4567;
# File lib/bio/db/embl/uniprotkb.rb 514 def ox 515 unless @data['OX'] 516 tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip } 517 hsh = Hash.new 518 tmp.each do |e| 519 db,refs = e.split(/=/) 520 hsh[db] = refs.split(/, */) 521 end 522 @data['OX'] = hsh 523 end 524 return @data['OX'] 525 end
returns the proposed official name of the protein. Returns a String.
Since UniProtKB
release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full name which is taken from “RecName: Full=” or “SubName: Full=” line normally in the beginning of the DE lines. Unlike parser for old format, no special treatments for fragment or precursor.
For old format, the method parses the DE lines and returns the protein name as a String.
DE Line; description (>=1)¶ ↑
"DE #{OFFICIAL_NAME} (#{SYNONYM})" "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]." OFFICIAL_NAME 1/entry SYNONYM >=0 CONTEINS >=0
# File lib/bio/db/embl/uniprotkb.rb 250 def protein_name 251 parsed_de_line = self.de 252 if parsed_de_line.kind_of?(Array) then 253 # since UniProtKB release 14.0 of 22-Jul-2008 254 name = nil 255 parsed_de_line.each do |a| 256 case a[0] 257 when 'RecName', 'SubName' 258 if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then 259 name = name_pair[1] 260 break 261 end 262 end 263 end 264 name = name.to_s 265 else 266 # old format (before Rel. 13.x) 267 name = "" 268 if de_line = fetch('DE') then 269 str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part) 270 name = str[/^[^(]*/].strip 271 name << ' (Fragment)' if str =~ /fragment/i 272 end 273 end 274 return name 275 end
returns contents in the R lines.
-
Bio::EMBLDB::Common#ref
-> [ <refernece information Hash>* ]
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
-
RN RC RP RX RA RT RL RG
# File lib/bio/db/embl/uniprotkb.rb 588 def ref 589 unless @data['R'] 590 @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str| 591 hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 592 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} 593 str = 'RN ' + str unless /^RN / =~ str 594 595 str.split("\n").each do |line| 596 if /^(R[NPXARLCTG]) (.+)/ =~ line 597 hash[$1] += $2 + ' ' 598 else 599 raise "Invalid format in R lines, \n[#{line}]\n" 600 end 601 end 602 603 hash['RN'] = set_RN(hash['RN']) 604 hash['RC'] = set_RC(hash['RC']) 605 hash['RP'] = set_RP(hash['RP']) 606 hash['RX'] = set_RX(hash['RX']) 607 hash['RA'] = set_RA(hash['RA']) 608 hash['RT'] = set_RT(hash['RT']) 609 hash['RL'] = set_RL(hash['RL']) 610 hash['RG'] = set_RG(hash['RG']) 611 612 hash 613 } 614 615 end 616 @data['R'] 617 end
returns Bio::Reference
object from Bio::EMBLDB::Common#ref
.
# File lib/bio/db/embl/uniprotkb.rb 682 def references 683 unless @data['references'] 684 ary = self.ref.map {|ent| 685 hash = Hash.new('') 686 ent.each {|key, value| 687 case key 688 when 'RA' 689 hash['authors'] = value.split(/, /) 690 when 'RT' 691 hash['title'] = value 692 when 'RL' 693 if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ 694 hash['journal'] = $1 695 hash['volume'] = $2 696 hash['issue'] = $3 697 hash['pages'] = $4 698 hash['year'] = $5 699 else 700 hash['journal'] = value 701 end 702 when 'RX' # PUBMED, MEDLINE, DOI 703 value.each do |tag, xref| 704 hash[ tag.downcase ] = xref 705 end 706 end 707 } 708 Reference.new(hash) 709 } 710 @data['references'] = References.new(ary) 711 end 712 @data['references'] 713 end
returns a Bio::Sequence::AA
of the amino acid sequence.
blank Line; sequence data (>=1)
# File lib/bio/db/embl/uniprotkb.rb 1464 def seq 1465 unless @data[''] 1466 @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') ) 1467 end 1468 return @data[''] 1469 end
returns a SEQUENCE_LENGTH in the ID line.
A short-cut for Bio::UniProtKB#id_line
(‘SEQUENCE_LENGHT’).
# File lib/bio/db/embl/uniprotkb.rb 117 def sequence_length 118 id_line('SEQUENCE_LENGTH') 119 end
# File lib/bio/db/embl/uniprotkb.rb 619 def set_RN(data) 620 data.strip 621 end
returns a Hash of conteins in the SQ lines.
-
Bio::UniProtKBL#sq -> hsh
returns a value of a key given in the SQ lines.
-
Bio::UniProtKBL#sq(key) -> int or str
-
Keys: [‘MW’, ‘mw’, ‘molecular’, ‘weight’, ‘aalen’, ‘len’, ‘length’,
'CRC64']
SQ Line; sequence header (1/entry)¶ ↑
SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
# File lib/bio/db/embl/uniprotkb.rb 1436 def sq(key = nil) 1437 unless @data['SQ'] 1438 if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/ 1439 @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 } 1440 else 1441 raise "Invalid SQ Line: \n'#{fetch('SQ')}'" 1442 end 1443 end 1444 1445 if key 1446 case key 1447 when /mw/, /molecular/, /weight/ 1448 @data['SQ']['MW'] 1449 when /len/, /length/, /AA/ 1450 @data['SQ']['aalen'] 1451 else 1452 @data['SQ'][key] 1453 end 1454 else 1455 @data['SQ'] 1456 end 1457 end
returns synonyms (unofficial and/or alternative names). Returns an Array containing String objects.
Since UniProtKB
release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full or short names which are taken from “RecName: Short=”, “RecName: EC=”, and AltName lines, except after “Contains:” or “Includes:”. For keeping compatibility with old format parser, “RecName: EC=N.N.N.N” is reported as “EC N.N.N.N”. In addition, to prevent confusion, “Allergen=” and “CD_antigen=” prefixes are added for the corresponding fields.
For old format, the method parses the DE lines and returns synonyms. synonyms are each placed in () following the official name on the DE line.
# File lib/bio/db/embl/uniprotkb.rb 291 def synonyms 292 ary = Array.new 293 parsed_de_line = self.de 294 if parsed_de_line.kind_of?(Array) then 295 # since UniProtKB release 14.0 of 22-Jul-2008 296 parsed_de_line.each do |a| 297 case a[0] 298 when 'Includes', 'Contains' 299 break #the each loop 300 when 'RecName', 'SubName', 'AltName' 301 a[1..-1].each do |b| 302 if name = b[1] and b[1] != self.protein_name then 303 case b[0] 304 when 'EC' 305 name = "EC " + b[1] 306 when 'Allergen', 'CD_antigen' 307 name = b[0] + '=' + b[1] 308 else 309 name = b[1] 310 end 311 ary.push name 312 end 313 end 314 end #case a[0] 315 end #parsed_de_line.each 316 else 317 # old format (before Rel. 13.x) 318 if de_line = fetch('DE') then 319 line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part 320 line.scan(/\([^)]+/) do |synonym| 321 unless synonym =~ /fragment/i then 322 ary << synonym[1..-1].strip # index to remove the leading ( 323 end 324 end 325 end 326 end 327 return ary 328 end