class Bio::EMBL - BioRuby API documentation

Source

    # File lib/bio/db/embl/embl.rb
402 def cc
403   get('CC').to_s.gsub(/^CC   /, '')
404 end

returns comment text in the comments (CC) line.

CC Line; comments of notes (>=0)

Also aliased as: comment

comment ()

Alias for: cc

data_class ()

Source

    # File lib/bio/db/embl/embl.rb
130 def data_class
131   id_line('DATA_CLASS')
132 end

date_created ()

Source

    # File lib/bio/db/embl/embl.rb
462 def date_created
463   parse_date(self.dt['created'])
464 end

created date. Returns Date object, String or nil.

date_modified ()

Source

    # File lib/bio/db/embl/embl.rb
457 def date_modified
458   parse_date(self.dt['updated'])
459 end

modified date. Returns Date object, String or nil.

dblinks ()

Source

    # File lib/bio/db/embl/embl.rb
512 def dblinks
513   get('DR').split(/\n/).collect { |x|
514     Bio::Sequence::DBLink.parse_embl_DR_line(x)
515   }
516 end

database references (DR). Returns an array of Bio::Sequence::DBLink objects.

division ()

Source

    # File lib/bio/db/embl/embl.rb
140 def division
141   id_line('DIVISION')
142 end

returns DIVISION in the ID line.

Bio::EMBL#division -> String

dt (key=nil)

Source

    # File lib/bio/db/embl/embl.rb
182 def dt(key=nil)
183   unless @data['DT']
184     tmp = Hash.new
185     dt_line = self.get('DT').split(/\n/)
186     tmp['created'] = dt_line[0].sub(/\w{2}   /,'').strip
187     tmp['updated'] = dt_line[1].sub(/\w{2}   /,'').strip
188     @data['DT'] = tmp
189   end
190   if key
191     @data['DT'][key]
192   else
193     @data['DT']
194   end
195 end

returns contents in the date (DT) line.

Bio::EMBL#dt -> <DT Hash>

where <DT Hash> is:

{}

Bio::EMBL#dt(key) -> String

keys: ‘created’ and ‘updated’

DT Line; date (2/entry)

each_cds () { |cds_feature| ... }

Source

    # File lib/bio/db/embl/embl.rb
381 def each_cds
382   ft.each do |cds_feature|
383     if cds_feature.feature == 'CDS'
384       yield cds_feature
385     end
386   end
387 end

iterates on CDS features in the FT lines.

each_gene () { |gene_feature| ... }

Source

    # File lib/bio/db/embl/embl.rb
390 def each_gene
391   ft.each do |gene_feature|
392     if gene_feature.feature == 'gene'
393       yield gene_feature
394     end
395   end
396 end

iterates on gene features in the FT lines.

entry ()

Source

    # File lib/bio/db/embl/embl.rb
117 def entry
118   id_line('ENTRY_NAME')
119 end

returns ENTRY_NAME in the ID line.

Bio::EMBL#entry -> String

Also aliased as: entry_name, entry_id

entry_id ()

Alias for: entry

entry_name ()

Alias for: entry

entry_version ()

Source

    # File lib/bio/db/embl/embl.rb
477 def entry_version
478   parse_release_version(self.dt['updated'])[1]
479 end

entry version number numbered by EMBL

features ()

Alias for: ft

fh ()

Source

    # File lib/bio/db/embl/embl.rb
326 def fh
327   fetch('FH')
328 end

returns feature table header (String) in the feature header (FH) line.

FH Line; feature table header (0 or 2)

ft () { |feature| ... }

Source

    # File lib/bio/db/embl/embl.rb
337 def ft
338   unless @data['FT']
339     ary = Array.new
340     in_quote = false
341     @orig['FT'].each_line do |line|
342       next if line =~ /^FEATURES/
343 
344       #head = line[0,20].strip  # feature key (source, CDS, ...)
345       body = line[20,60].chomp # feature value (position, /qualifier=)
346       if line =~ /^FT {3}(\S+)/
347         ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
348       elsif body =~ /^ \// and not in_quote
349         ary.last.push(body)    # /q="data..., /q=data, /q
350 
351         if body =~ /=" / and body !~ /"$/
352           in_quote = true
353         end
354 
355       else
356         ary.last.last << body # ...data..., ...data..."
357 
358         if body =~ /"$/
359           in_quote = false
360         end
361       end
362     end
363 
364     ary.map! do |subary|
365       parse_qualifiers(subary)
366     end
367 
368     @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
369   end
370   if block_given?
371     @data['FT'].each do |feature|
372       yield feature
373     end
374   else
375     @data['FT']
376   end
377 end

returns contents in the feature table (FT) lines.

Bio::EMBL#ft -> Bio::Features
Bio::EMBL#ft {} -> {|Bio::Feature| }

same as features method in bio/db/genbank.rb

FT Line; feature table data (>=0)

Also aliased as: features

id_line (key=nil)

Source

    # File lib/bio/db/embl/embl.rb
 89 def id_line(key=nil)
 90   unless @data['ID']
 91     tmp = Hash.new
 92     idline = fetch('ID').split(/; +/)         
 93     tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
 94     if idline.first =~ /^SV/
 95       tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
 96       tmp['TOPOLOGY'] = idline.shift
 97       tmp['MOLECULE_TYPE'] = idline.shift
 98       tmp['DATA_CLASS'] = idline.shift
 99     else
100       tmp['MOLECULE_TYPE'] = idline.shift
101     end
102     tmp['DIVISION'] = idline.shift
103     tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
104 
105     @data['ID'] = tmp
106   end
107   
108   if key
109     @data['ID'][key]
110   else
111     @data['ID']
112   end
113 end

returns contents in the ID line.

Bio::EMBL#id_line -> <ID Hash>

where <ID Hash> is:

{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}

ID Line

"ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."

DATA_CLASS = [‘standard’]

MOLECULE_TYPE: DNA RNA XXX

Code ( DIVISION )

EST (ESTs)
PHG (Bacteriophage)
FUN (Fungi)
GSS (Genome survey)
HTC (High Throughput cDNAs) 
HTG (HTGs)
HUM (Human)
INV (Invertebrates)
ORG (Organelles)
MAM (Other Mammals)
VRT (Other Vertebrates)
PLN (Plants)
PRO (Prokaryotes)
ROD (Rodents)
SYN (Synthetic)
STS (STSs)
UNC (Unclassified)
VRL (Viruses)

Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.

Primary accession number
Sequence version number
Topology: ‘circular’ or ‘linear’
Molecule type (see note 1 below)
Data class (see section 3.1)
Taxonomic division (see section 3.2)
Sequence length (see note 2 below)

molecule ()

Source

    # File lib/bio/db/embl/embl.rb
125 def molecule
126   id_line('MOLECULE_TYPE')
127 end

returns MOLECULE_TYPE in the ID line.

Bio::EMBL#molecule -> String

Also aliased as: molecule_type

molecule_type ()

Alias for: molecule

naseq ()

Alias for: seq

ntseq ()

Alias for: seq

os (num = nil)

Source

    # File lib/bio/db/embl/embl.rb
266 def os(num = nil)
267   unless @data['OS']
268     os = Array.new
269     tmp = fetch('OS')
270     if /([A-Z][a-z]* *[\w \:\'\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp
271       org = $1
272       name = $2
273       os.push({'name' => name, 'os' => org})
274     else
275       os.push({'name' => nil, 'os' => tmp})
276     end
277     @data['OS'] = os
278   end
279   if num
280     # EX. "Trifolium repens (white clover)"
281     "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
282   end
283   @data['OS']
284 end

returns contents in the OS line.

Bio::EMBL#os -> Array of <OS Hash>

where <OS Hash> is:

[{'name'=>'Human', 'os'=>'Homo sapiens'}, 
 {'name'=>'Rat', 'os'=>'Rattus norveticus'}]

Bio::EMBL#os[0][‘name’] => “Human”
Bio::EMBL#os[0] => {‘name’=>“Human”, ‘os’=>‘Homo sapiens’}

OS Line; organism species (>=1)

OS   Trifolium repens (white clover)

Typically, OS line shows “Genus species (name)” style:

OS   Genus species (name)

Other examples:

OS   uncultured bacterium
OS   xxxxxx metagenome
OS   Cloning vector xxxxxxxx

Complicated examples:

OS   Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
OS   Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
OS   Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
OS   Sicydiinae sp. 'Keith et al., 2010'
OS   Acanthopagrus sp. 'Jean & Lee, 2008'
OS   Gaussia princeps (T. Scott, 1894)
OS   Rana sp. 8 Hillis & Wilcox, 2005
OS   Contracaecum rudolphii C D'Amelio et al., 2007
OS   Partula sp. 'Mt. Marau, Tahiti'
OS   Leptocephalus sp. 'type II larva' (Smith, 1989)
OS   Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
OS   Non-A, non-B hepatitis virus
OS   Canidae (dog, coyote, wolf, fox)
OS   Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
OS   Yersinia enterocolitica (type O:5,27)
OS   Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
OS   Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
OS   Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L

release_created ()

Source

    # File lib/bio/db/embl/embl.rb
472 def release_created
473   parse_release_version(self.dt['created'])[0]
474 end

release number when created

release_modified ()

Source

    # File lib/bio/db/embl/embl.rb
467 def release_modified
468   parse_release_version(self.dt['updated'])[0]
469 end

release number when last updated

seq ()

Source

    # File lib/bio/db/embl/embl.rb
446 def seq
447   Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
448 end

returns the nucleotie sequence in this entry.

Bio::EMBL#seq -> Bio::Sequence::NA

@orig as sequence bb Line; (blanks) sequence data (>=1)

Also aliased as: naseq, ntseq

seqlen ()

Alias for: sequence_length

sequence_length ()

Source

    # File lib/bio/db/embl/embl.rb
146 def sequence_length
147   id_line('SEQUENCE_LENGTH')
148 end

returns SEQUENCE_LENGTH in the ID line.

Bio::EMBL#sequencelength -> String

Also aliased as: seqlen

species ()

Source

    # File lib/bio/db/embl/embl.rb
519 def species
520   self.fetch('OS')
521 end

species

sq (base = nil)

Source

    # File lib/bio/db/embl/embl.rb
423 def sq(base = nil)
424   unless @data['SQ']
425     fetch('SQ') =~ \
426            /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
427     @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
428                    'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
429   else
430     @data['SQ']
431   end
432 
433   if base
434     @data['SQ'][base.downcase]
435   else
436     @data['SQ']
437   end
438 end

returns sequence header information in the sequence header (SQ) line.

Bio::EMBL#sq -> <SQ Hash>

where <SQ Hash> is:

{'ntlen' => Int, 'other' => Int,
 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}

Bio::EMBL#sq(base) -> <base content in Int>
Bio::EMBL#sq[base] -> <base content in Int>

SQ Line; sequence header (1/entry)

SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;

sv ()

Source

    # File lib/bio/db/embl/embl.rb
162 def sv
163   if (v = field_fetch('SV').sub(/;/,'')) == ""
164     [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
165   else
166     v
167   end  
168 end

returns the version information in the sequence version (SV) line.

Bio::EMBL#sv -> Accession.Version in String
Bio::EMBL#version -> accession in Int

SV Line; sequence version (1/entry)

SV    Accession.Version

to_biosequence ()

Source

    # File lib/bio/db/embl/embl.rb
531 def to_biosequence
532   Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
533 end

converts the entry to Bio::Sequence object

Arguments
Returns: Bio::Sequence object

topology ()

Source

    # File lib/bio/db/embl/embl.rb
134 def topology
135   id_line('TOPOLOGY')
136 end

version ()

Source

    # File lib/bio/db/embl/embl.rb
169 def version
170   (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
171 end