class Bio::EMBL

Public Instance Methods

cc() click to toggle source

returns comment text in the comments (CC) line.

CC Line; comments of notes (>=0)

    # File lib/bio/db/embl/embl.rb
402 def cc
403   get('CC').to_s.gsub(/^CC   /, '')
404 end
Also aliased as: comment
comment()
Alias for: cc
data_class() click to toggle source
    # File lib/bio/db/embl/embl.rb
130 def data_class
131   id_line('DATA_CLASS')
132 end
date_created() click to toggle source

created date. Returns Date object, String or nil.

    # File lib/bio/db/embl/embl.rb
462 def date_created
463   parse_date(self.dt['created'])
464 end
date_modified() click to toggle source

modified date. Returns Date object, String or nil.

    # File lib/bio/db/embl/embl.rb
457 def date_modified
458   parse_date(self.dt['updated'])
459 end
division() click to toggle source

returns DIVISION in the ID line.

    # File lib/bio/db/embl/embl.rb
140 def division
141   id_line('DIVISION')
142 end
dt(key=nil) click to toggle source

returns contents in the date (DT) line.

where <DT Hash> is:

{}

keys: ‘created’ and ‘updated’

DT Line; date (2/entry)

    # File lib/bio/db/embl/embl.rb
182 def dt(key=nil)
183   unless @data['DT']
184     tmp = Hash.new
185     dt_line = self.get('DT').split(/\n/)
186     tmp['created'] = dt_line[0].sub(/\w{2}   /,'').strip
187     tmp['updated'] = dt_line[1].sub(/\w{2}   /,'').strip
188     @data['DT'] = tmp
189   end
190   if key
191     @data['DT'][key]
192   else
193     @data['DT']
194   end
195 end
each_cds() { |cds_feature| ... } click to toggle source

iterates on CDS features in the FT lines.

    # File lib/bio/db/embl/embl.rb
381 def each_cds
382   ft.each do |cds_feature|
383     if cds_feature.feature == 'CDS'
384       yield cds_feature
385     end
386   end
387 end
each_gene() { |gene_feature| ... } click to toggle source

iterates on gene features in the FT lines.

    # File lib/bio/db/embl/embl.rb
390 def each_gene
391   ft.each do |gene_feature|
392     if gene_feature.feature == 'gene'
393       yield gene_feature
394     end
395   end
396 end
entry() click to toggle source

returns ENTRY_NAME in the ID line.

    # File lib/bio/db/embl/embl.rb
117 def entry
118   id_line('ENTRY_NAME')
119 end
Also aliased as: entry_name, entry_id
entry_id()
Alias for: entry
entry_name()
Alias for: entry
entry_version() click to toggle source

entry version number numbered by EMBL

    # File lib/bio/db/embl/embl.rb
477 def entry_version
478   parse_release_version(self.dt['updated'])[1]
479 end
features()
Alias for: ft
fh() click to toggle source

returns feature table header (String) in the feature header (FH) line.

FH Line; feature table header (0 or 2)

    # File lib/bio/db/embl/embl.rb
326 def fh
327   fetch('FH')
328 end
ft() { |feature| ... } click to toggle source

returns contents in the feature table (FT) lines.

same as features method in bio/db/genbank.rb

FT Line; feature table data (>=0)

    # File lib/bio/db/embl/embl.rb
337 def ft
338   unless @data['FT']
339     ary = Array.new
340     in_quote = false
341     @orig['FT'].each_line do |line|
342       next if line =~ /^FEATURES/
343 
344       #head = line[0,20].strip  # feature key (source, CDS, ...)
345       body = line[20,60].chomp # feature value (position, /qualifier=)
346       if line =~ /^FT {3}(\S+)/
347         ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
348       elsif body =~ /^ \// and not in_quote
349         ary.last.push(body)    # /q="data..., /q=data, /q
350 
351         if body =~ /=" / and body !~ /"$/
352           in_quote = true
353         end
354 
355       else
356         ary.last.last << body # ...data..., ...data..."
357 
358         if body =~ /"$/
359           in_quote = false
360         end
361       end
362     end
363 
364     ary.map! do |subary|
365       parse_qualifiers(subary)
366     end
367 
368     @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
369   end
370   if block_given?
371     @data['FT'].each do |feature|
372       yield feature
373     end
374   else
375     @data['FT']
376   end
377 end
Also aliased as: features
id_line(key=nil) click to toggle source

returns contents in the ID line.

where <ID Hash> is:

{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}

ID Line

"ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."

DATA_CLASS = [‘standard’]

MOLECULE_TYPE: DNA RNA XXX

Code ( DIVISION )

EST (ESTs)
PHG (Bacteriophage)
FUN (Fungi)
GSS (Genome survey)
HTC (High Throughput cDNAs) 
HTG (HTGs)
HUM (Human)
INV (Invertebrates)
ORG (Organelles)
MAM (Other Mammals)
VRT (Other Vertebrates)
PLN (Plants)
PRO (Prokaryotes)
ROD (Rodents)
SYN (Synthetic)
STS (STSs)
UNC (Unclassified)
VRL (Viruses)

Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.

  1. Primary accession number

  2. Sequence version number

  3. Topology: ‘circular’ or ‘linear’

  4. Molecule type (see note 1 below)

  5. Data class (see section 3.1)

  6. Taxonomic division (see section 3.2)

  7. Sequence length (see note 2 below)

    # File lib/bio/db/embl/embl.rb
 89 def id_line(key=nil)
 90   unless @data['ID']
 91     tmp = Hash.new
 92     idline = fetch('ID').split(/; +/)         
 93     tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
 94     if idline.first =~ /^SV/
 95       tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
 96       tmp['TOPOLOGY'] = idline.shift
 97       tmp['MOLECULE_TYPE'] = idline.shift
 98       tmp['DATA_CLASS'] = idline.shift
 99     else
100       tmp['MOLECULE_TYPE'] = idline.shift
101     end
102     tmp['DIVISION'] = idline.shift
103     tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
104 
105     @data['ID'] = tmp
106   end
107   
108   if key
109     @data['ID'][key]
110   else
111     @data['ID']
112   end
113 end
molecule() click to toggle source

returns MOLECULE_TYPE in the ID line.

    # File lib/bio/db/embl/embl.rb
125 def molecule
126   id_line('MOLECULE_TYPE')
127 end
Also aliased as: molecule_type
molecule_type()
Alias for: molecule
naseq()
Alias for: seq
ntseq()
Alias for: seq
os(num = nil) click to toggle source

returns contents in the OS line.

where <OS Hash> is:

[{'name'=>'Human', 'os'=>'Homo sapiens'}, 
 {'name'=>'Rat', 'os'=>'Rattus norveticus'}]

OS Line; organism species (>=1)

OS   Trifolium repens (white clover)

Typically, OS line shows “Genus species (name)” style:

OS   Genus species (name)

Other examples:

OS   uncultured bacterium
OS   xxxxxx metagenome
OS   Cloning vector xxxxxxxx

Complicated examples:

OS   Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
OS   Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
OS   Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
OS   Sicydiinae sp. 'Keith et al., 2010'
OS   Acanthopagrus sp. 'Jean & Lee, 2008'
OS   Gaussia princeps (T. Scott, 1894)
OS   Rana sp. 8 Hillis & Wilcox, 2005
OS   Contracaecum rudolphii C D'Amelio et al., 2007
OS   Partula sp. 'Mt. Marau, Tahiti'
OS   Leptocephalus sp. 'type II larva' (Smith, 1989)
OS   Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
OS   Non-A, non-B hepatitis virus
OS   Canidae (dog, coyote, wolf, fox)
OS   Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
OS   Yersinia enterocolitica (type O:5,27)
OS   Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
OS   Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
OS   Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
    # File lib/bio/db/embl/embl.rb
266 def os(num = nil)
267   unless @data['OS']
268     os = Array.new
269     tmp = fetch('OS')
270     if /([A-Z][a-z]* *[\w \:\'\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp
271       org = $1
272       name = $2
273       os.push({'name' => name, 'os' => org})
274     else
275       os.push({'name' => nil, 'os' => tmp})
276     end
277     @data['OS'] = os
278   end
279   if num
280     # EX. "Trifolium repens (white clover)"
281     "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
282   end
283   @data['OS']
284 end
release_created() click to toggle source

release number when created

    # File lib/bio/db/embl/embl.rb
472 def release_created
473   parse_release_version(self.dt['created'])[0]
474 end
release_modified() click to toggle source

release number when last updated

    # File lib/bio/db/embl/embl.rb
467 def release_modified
468   parse_release_version(self.dt['updated'])[0]
469 end
seq() click to toggle source

returns the nucleotie sequence in this entry.

@orig as sequence bb Line; (blanks) sequence data (>=1)

    # File lib/bio/db/embl/embl.rb
446 def seq
447   Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
448 end
Also aliased as: naseq, ntseq
seqlen()
Alias for: sequence_length
sequence_length() click to toggle source

returns SEQUENCE_LENGTH in the ID line.

  • Bio::EMBL#sequencelength -> String

    # File lib/bio/db/embl/embl.rb
146 def sequence_length
147   id_line('SEQUENCE_LENGTH')
148 end
Also aliased as: seqlen
species() click to toggle source

species

    # File lib/bio/db/embl/embl.rb
519 def species
520   self.fetch('OS')
521 end
sq(base = nil) click to toggle source

returns sequence header information in the sequence header (SQ) line.

where <SQ Hash> is:

{'ntlen' => Int, 'other' => Int,
 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}

SQ Line; sequence header (1/entry)

SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
    # File lib/bio/db/embl/embl.rb
423 def sq(base = nil)
424   unless @data['SQ']
425     fetch('SQ') =~ \
426            /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
427     @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
428                    'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
429   else
430     @data['SQ']
431   end
432 
433   if base
434     @data['SQ'][base.downcase]
435   else
436     @data['SQ']
437   end
438 end
sv() click to toggle source

returns the version information in the sequence version (SV) line.

SV Line; sequence version (1/entry)

SV    Accession.Version
    # File lib/bio/db/embl/embl.rb
162 def sv
163   if (v = field_fetch('SV').sub(/;/,'')) == ""
164     [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
165   else
166     v
167   end  
168 end
to_biosequence() click to toggle source

converts the entry to Bio::Sequence object


Arguments
Returns

Bio::Sequence object

    # File lib/bio/db/embl/embl.rb
531 def to_biosequence
532   Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
533 end
topology() click to toggle source
    # File lib/bio/db/embl/embl.rb
134 def topology
135   id_line('TOPOLOGY')
136 end
version() click to toggle source
    # File lib/bio/db/embl/embl.rb
169 def version
170   (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
171 end