class Bio::EMBL
Public Instance Methods
returns comment text in the comments (CC) line.
CC Line; comments of notes (>=0)
# File lib/bio/db/embl/embl.rb 402 def cc 403 get('CC').to_s.gsub(/^CC /, '') 404 end
# File lib/bio/db/embl/embl.rb 130 def data_class 131 id_line('DATA_CLASS') 132 end
created date. Returns Date object, String or nil.
# File lib/bio/db/embl/embl.rb 462 def date_created 463 parse_date(self.dt['created']) 464 end
modified date. Returns Date object, String or nil.
# File lib/bio/db/embl/embl.rb 457 def date_modified 458 parse_date(self.dt['updated']) 459 end
database references (DR). Returns an array of Bio::Sequence::DBLink
objects.
# File lib/bio/db/embl/embl.rb 512 def dblinks 513 get('DR').split(/\n/).collect { |x| 514 Bio::Sequence::DBLink.parse_embl_DR_line(x) 515 } 516 end
returns DIVISION in the ID line.
-
Bio::EMBL#division
-> String
# File lib/bio/db/embl/embl.rb 140 def division 141 id_line('DIVISION') 142 end
returns contents in the date (DT) line.
-
Bio::EMBL#dt
-> <DT Hash>
where <DT Hash> is:
{}
-
Bio::EMBL#dt(key)
-> String
keys: ‘created’ and ‘updated’
DT Line; date (2/entry)
# File lib/bio/db/embl/embl.rb 182 def dt(key=nil) 183 unless @data['DT'] 184 tmp = Hash.new 185 dt_line = self.get('DT').split(/\n/) 186 tmp['created'] = dt_line[0].sub(/\w{2} /,'').strip 187 tmp['updated'] = dt_line[1].sub(/\w{2} /,'').strip 188 @data['DT'] = tmp 189 end 190 if key 191 @data['DT'][key] 192 else 193 @data['DT'] 194 end 195 end
iterates on CDS features in the FT lines.
# File lib/bio/db/embl/embl.rb 381 def each_cds 382 ft.each do |cds_feature| 383 if cds_feature.feature == 'CDS' 384 yield cds_feature 385 end 386 end 387 end
iterates on gene features in the FT lines.
# File lib/bio/db/embl/embl.rb 390 def each_gene 391 ft.each do |gene_feature| 392 if gene_feature.feature == 'gene' 393 yield gene_feature 394 end 395 end 396 end
returns ENTRY_NAME in the ID line.
-
Bio::EMBL#entry
-> String
# File lib/bio/db/embl/embl.rb 117 def entry 118 id_line('ENTRY_NAME') 119 end
entry version number numbered by EMBL
# File lib/bio/db/embl/embl.rb 477 def entry_version 478 parse_release_version(self.dt['updated'])[1] 479 end
returns feature table header (String) in the feature header (FH) line.
FH Line; feature table header (0 or 2)
# File lib/bio/db/embl/embl.rb 326 def fh 327 fetch('FH') 328 end
returns contents in the feature table (FT) lines.
-
Bio::EMBL#ft
{} -> {|Bio::Feature| }
same as features method in bio/db/genbank.rb
FT Line; feature table data (>=0)
# File lib/bio/db/embl/embl.rb 337 def ft 338 unless @data['FT'] 339 ary = Array.new 340 in_quote = false 341 @orig['FT'].each_line do |line| 342 next if line =~ /^FEATURES/ 343 344 #head = line[0,20].strip # feature key (source, CDS, ...) 345 body = line[20,60].chomp # feature value (position, /qualifier=) 346 if line =~ /^FT {3}(\S+)/ 347 ary.push([ $1, body ]) # [ feature, position, /q="data", ... ] 348 elsif body =~ /^ \// and not in_quote 349 ary.last.push(body) # /q="data..., /q=data, /q 350 351 if body =~ /=" / and body !~ /"$/ 352 in_quote = true 353 end 354 355 else 356 ary.last.last << body # ...data..., ...data..." 357 358 if body =~ /"$/ 359 in_quote = false 360 end 361 end 362 end 363 364 ary.map! do |subary| 365 parse_qualifiers(subary) 366 end 367 368 @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility) 369 end 370 if block_given? 371 @data['FT'].each do |feature| 372 yield feature 373 end 374 else 375 @data['FT'] 376 end 377 end
returns contents in the ID line.
-
Bio::EMBL#id_line
-> <ID Hash>
where <ID Hash> is:
{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String, 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
ID Line
"ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
DATA_CLASS = [‘standard’]
MOLECULE_TYPE: DNA RNA XXX
Code ( DIVISION )
EST (ESTs) PHG (Bacteriophage) FUN (Fungi) GSS (Genome survey) HTC (High Throughput cDNAs) HTG (HTGs) HUM (Human) INV (Invertebrates) ORG (Organelles) MAM (Other Mammals) VRT (Other Vertebrates) PLN (Plants) PRO (Prokaryotes) ROD (Rodents) SYN (Synthetic) STS (STSs) UNC (Unclassified) VRL (Viruses)
Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
-
Primary accession number
-
Sequence
version number -
Topology: ‘circular’ or ‘linear’
-
Molecule type (see note 1 below)
-
Data class (see section 3.1)
-
Taxonomic division (see section 3.2)
-
Sequence
length (see note 2 below)
# File lib/bio/db/embl/embl.rb 89 def id_line(key=nil) 90 unless @data['ID'] 91 tmp = Hash.new 92 idline = fetch('ID').split(/; +/) 93 tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/) 94 if idline.first =~ /^SV/ 95 tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last 96 tmp['TOPOLOGY'] = idline.shift 97 tmp['MOLECULE_TYPE'] = idline.shift 98 tmp['DATA_CLASS'] = idline.shift 99 else 100 tmp['MOLECULE_TYPE'] = idline.shift 101 end 102 tmp['DIVISION'] = idline.shift 103 tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i 104 105 @data['ID'] = tmp 106 end 107 108 if key 109 @data['ID'][key] 110 else 111 @data['ID'] 112 end 113 end
returns MOLECULE_TYPE in the ID line.
-
Bio::EMBL#molecule
-> String
# File lib/bio/db/embl/embl.rb 125 def molecule 126 id_line('MOLECULE_TYPE') 127 end
returns contents in the OS line.
-
Bio::EMBL#os
-> Array of <OS Hash>
where <OS Hash> is:
[{'name'=>'Human', 'os'=>'Homo sapiens'}, {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
-
Bio::EMBL#os[0]
[‘name’] => “Human” -
Bio::EMBL#os[0]
=> {‘name’=>“Human”, ‘os’=>‘Homo sapiens’}
OS Line; organism species (>=1)
OS Trifolium repens (white clover)
Typically, OS line shows “Genus species (name)” style:
OS Genus species (name)
Other examples:
OS uncultured bacterium OS xxxxxx metagenome OS Cloning vector xxxxxxxx
Complicated examples:
OS Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848)) OS Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark) OS Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias) OS Sicydiinae sp. 'Keith et al., 2010' OS Acanthopagrus sp. 'Jean & Lee, 2008' OS Gaussia princeps (T. Scott, 1894) OS Rana sp. 8 Hillis & Wilcox, 2005 OS Contracaecum rudolphii C D'Amelio et al., 2007 OS Partula sp. 'Mt. Marau, Tahiti' OS Leptocephalus sp. 'type II larva' (Smith, 1989) OS Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002 OS Non-A, non-B hepatitis virus OS Canidae (dog, coyote, wolf, fox) OS Salmonella enterica subsp. enterica serovar 4,[5],12:i:- OS Yersinia enterocolitica (type O:5,27) OS Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4)) OS Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2)) OS Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
# File lib/bio/db/embl/embl.rb 266 def os(num = nil) 267 unless @data['OS'] 268 os = Array.new 269 tmp = fetch('OS') 270 if /([A-Z][a-z]* *[\w \:\'\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp 271 org = $1 272 name = $2 273 os.push({'name' => name, 'os' => org}) 274 else 275 os.push({'name' => nil, 'os' => tmp}) 276 end 277 @data['OS'] = os 278 end 279 if num 280 # EX. "Trifolium repens (white clover)" 281 "#{@data['OS'][num]['os']} {#data['OS'][num]['name']" 282 end 283 @data['OS'] 284 end
release number when created
# File lib/bio/db/embl/embl.rb 472 def release_created 473 parse_release_version(self.dt['created'])[0] 474 end
release number when last updated
# File lib/bio/db/embl/embl.rb 467 def release_modified 468 parse_release_version(self.dt['updated'])[0] 469 end
returns the nucleotie sequence in this entry.
@orig as sequence bb Line; (blanks) sequence data (>=1)
# File lib/bio/db/embl/embl.rb 446 def seq 447 Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') ) 448 end
returns SEQUENCE_LENGTH in the ID line.
-
Bio::EMBL#sequencelength -> String
# File lib/bio/db/embl/embl.rb 146 def sequence_length 147 id_line('SEQUENCE_LENGTH') 148 end
species
# File lib/bio/db/embl/embl.rb 519 def species 520 self.fetch('OS') 521 end
returns sequence header information in the sequence header (SQ) line.
-
Bio::EMBL#sq
-> <SQ Hash>
where <SQ Hash> is:
{'ntlen' => Int, 'other' => Int, 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
-
Bio::EMBL#sq(base)
-> <base content in Int> -
Bio::EMBL#sq[base]
-> <base content in Int>
SQ Line; sequence header (1/entry)
SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
# File lib/bio/db/embl/embl.rb 423 def sq(base = nil) 424 unless @data['SQ'] 425 fetch('SQ') =~ \ 426 /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/ 427 @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i, 428 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i} 429 else 430 @data['SQ'] 431 end 432 433 if base 434 @data['SQ'][base.downcase] 435 else 436 @data['SQ'] 437 end 438 end
returns the version information in the sequence version (SV) line.
-
Bio::EMBL#sv
-> Accession.Version in String -
Bio::EMBL#version
-> accession in Int
SV Line; sequence version (1/entry)
SV Accession.Version
# File lib/bio/db/embl/embl.rb 162 def sv 163 if (v = field_fetch('SV').sub(/;/,'')) == "" 164 [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 165 else 166 v 167 end 168 end
converts the entry to Bio::Sequence
object
- Arguments
- Returns
-
Bio::Sequence
object
# File lib/bio/db/embl/embl.rb 531 def to_biosequence 532 Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL) 533 end
# File lib/bio/db/embl/embl.rb 134 def topology 135 id_line('TOPOLOGY') 136 end
# File lib/bio/db/embl/embl.rb 169 def version 170 (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i 171 end