class Bio::EMBL

Public Instance Methods

cc() click to toggle source

returns comment text in the comments (CC) line.

CC Line; comments of notes (>=0)

# File lib/bio/db/embl/embl.rb, line 402
def cc
  get('CC').to_s.gsub(/^CC   /, '')
end
Also aliased as: comment
comment()
Alias for: cc
data_class() click to toggle source
# File lib/bio/db/embl/embl.rb, line 130
def data_class
  id_line('DATA_CLASS')
end
date_created() click to toggle source

created date. Returns Date object, String or nil.

# File lib/bio/db/embl/embl.rb, line 462
def date_created
  parse_date(self.dt['created'])
end
date_modified() click to toggle source

modified date. Returns Date object, String or nil.

# File lib/bio/db/embl/embl.rb, line 457
def date_modified
  parse_date(self.dt['updated'])
end
division() click to toggle source

returns DIVISION in the ID line.

# File lib/bio/db/embl/embl.rb, line 140
def division
  id_line('DIVISION')
end
dt(key=nil) click to toggle source

returns contents in the date (DT) line.

  • #dt -> <DT Hash>

where <DT Hash> is:

{}

keys: 'created' and 'updated'

DT Line; date (2/entry)

# File lib/bio/db/embl/embl.rb, line 182
def dt(key=nil)
  unless @data['DT']
    tmp = Hash.new
    dt_line = self.get('DT').split(/\n/)
    tmp['created'] = dt_line[0].sub(/\w{2}   /,'').strip
    tmp['updated'] = dt_line[1].sub(/\w{2}   /,'').strip
    @data['DT'] = tmp
  end
  if key
    @data['DT'][key]
  else
    @data['DT']
  end
end
each_cds() { |cds_feature| ... } click to toggle source

iterates on CDS features in the FT lines.

# File lib/bio/db/embl/embl.rb, line 381
def each_cds
  ft.each do |cds_feature|
    if cds_feature.feature == 'CDS'
      yield cds_feature
    end
  end
end
each_gene() { |gene_feature| ... } click to toggle source

iterates on gene features in the FT lines.

# File lib/bio/db/embl/embl.rb, line 390
def each_gene
  ft.each do |gene_feature|
    if gene_feature.feature == 'gene'
      yield gene_feature
    end
  end
end
entry() click to toggle source

returns ENTRY_NAME in the ID line.

# File lib/bio/db/embl/embl.rb, line 117
def entry
  id_line('ENTRY_NAME')
end
Also aliased as: entry_name, entry_id
entry_id()
Alias for: entry
entry_name()
Alias for: entry
entry_version() click to toggle source

entry version number numbered by EMBL

# File lib/bio/db/embl/embl.rb, line 477
def entry_version
  parse_release_version(self.dt['updated'])[1]
end
features()
Alias for: ft
fh() click to toggle source

returns feature table header (String) in the feature header (FH) line.

FH Line; feature table header (0 or 2)

# File lib/bio/db/embl/embl.rb, line 326
def fh
  fetch('FH')
end
ft() { |feature| ... } click to toggle source

returns contents in the feature table (FT) lines.

same as features method in bio/db/genbank.rb

FT Line; feature table data (>=0)

# File lib/bio/db/embl/embl.rb, line 337
def ft
  unless @data['FT']
    ary = Array.new
    in_quote = false
    @orig['FT'].each_line do |line|
      next if line =~ /^FEATURES/

      #head = line[0,20].strip  # feature key (source, CDS, ...)
      body = line[20,60].chomp # feature value (position, /qualifier=)
      if line =~ /^FT {3}(\S+)/
        ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
      elsif body =~ /^ \// and not in_quote
        ary.last.push(body)    # /q="data..., /q=data, /q

        if body =~ /=" / and body !~ /"$/
          in_quote = true
        end

      else
        ary.last.last << body # ...data..., ...data..."

        if body =~ /"$/
          in_quote = false
        end
      end
    end

    ary.map! do |subary|
      parse_qualifiers(subary)
    end

    @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
  end
  if block_given?
    @data['FT'].each do |feature|
      yield feature
    end
  else
    @data['FT']
  end
end
Also aliased as: features
id_line(key=nil) click to toggle source

returns contents in the ID line.

where <ID Hash> is:

{'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}

ID Line

"ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."

DATA_CLASS = ['standard']

MOLECULE_TYPE: DNA RNA XXX

Code ( DIVISION )

EST (ESTs)
PHG (Bacteriophage)
FUN (Fungi)
GSS (Genome survey)
HTC (High Throughput cDNAs) 
HTG (HTGs)
HUM (Human)
INV (Invertebrates)
ORG (Organelles)
MAM (Other Mammals)
VRT (Other Vertebrates)
PLN (Plants)
PRO (Prokaryotes)
ROD (Rodents)
SYN (Synthetic)
STS (STSs)
UNC (Unclassified)
VRL (Viruses)

Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.

  1. Primary accession number

  2. Sequence version number

  3. Topology: 'circular' or 'linear'

  4. Molecule type (see note 1 below)

  5. Data class (see section 3.1)

  6. Taxonomic division (see section 3.2)

  7. Sequence length (see note 2 below)

# File lib/bio/db/embl/embl.rb, line 89
def id_line(key=nil)
  unless @data['ID']
    tmp = Hash.new
    idline = fetch('ID').split(/; +/)         
    tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
    if idline.first =~ /^SV/
      tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
      tmp['TOPOLOGY'] = idline.shift
      tmp['MOLECULE_TYPE'] = idline.shift
      tmp['DATA_CLASS'] = idline.shift
    else
      tmp['MOLECULE_TYPE'] = idline.shift
    end
    tmp['DIVISION'] = idline.shift
    tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i

    @data['ID'] = tmp
  end
  
  if key
    @data['ID'][key]
  else
    @data['ID']
  end
end
molecule() click to toggle source

returns MOLECULE_TYPE in the ID line.

# File lib/bio/db/embl/embl.rb, line 125
def molecule
  id_line('MOLECULE_TYPE')
end
Also aliased as: molecule_type
molecule_type()
Alias for: molecule
naseq()
Alias for: seq
ntseq()
Alias for: seq
os(num = nil) click to toggle source

returns contents in the OS line.

  • #os -> Array of <OS Hash>

where <OS Hash> is:

[{'name'=>'Human', 'os'=>'Homo sapiens'}, 
 {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
  • #os['name'] => “Human”

  • #os => {'name'=>“Human”, 'os'=>'Homo sapiens'}

OS Line; organism species (>=1)

OS   Trifolium repens (white clover)

Typically, OS line shows “Genus species (name)” style:

OS   Genus species (name)

Other examples:

OS   uncultured bacterium
OS   xxxxxx metagenome
OS   Cloning vector xxxxxxxx

Complicated examples:

OS   Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
OS   Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
OS   Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
OS   Sicydiinae sp. 'Keith et al., 2010'
OS   Acanthopagrus sp. 'Jean & Lee, 2008'
OS   Gaussia princeps (T. Scott, 1894)
OS   Rana sp. 8 Hillis & Wilcox, 2005
OS   Contracaecum rudolphii C D'Amelio et al., 2007
OS   Partula sp. 'Mt. Marau, Tahiti'
OS   Leptocephalus sp. 'type II larva' (Smith, 1989)
OS   Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
OS   Non-A, non-B hepatitis virus
OS   Canidae (dog, coyote, wolf, fox)
OS   Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
OS   Yersinia enterocolitica (type O:5,27)
OS   Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
OS   Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
OS   Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
# File lib/bio/db/embl/embl.rb, line 266
def os(num = nil)
  unless @data['OS']
    os = Array.new
    tmp = fetch('OS')
    if /([A-Z][a-z]* *[\w \:\\+\-]+\w) *\(([\w ]+)\)\s*\z/ =~ tmp
      org = $1
      name = $2
      os.push({'name' => name, 'os' => org})
    else
      os.push({'name' => nil, 'os' => tmp})
    end
    @data['OS'] = os
  end
  if num
    # EX. "Trifolium repens (white clover)"
    "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
  end
  @data['OS']
end
release_created() click to toggle source

release number when created

# File lib/bio/db/embl/embl.rb, line 472
def release_created
  parse_release_version(self.dt['created'])[0]
end
release_modified() click to toggle source

release number when last updated

# File lib/bio/db/embl/embl.rb, line 467
def release_modified
  parse_release_version(self.dt['updated'])[0]
end
seq() click to toggle source

returns the nucleotie sequence in this entry.

@orig as sequence bb Line; (blanks) sequence data (>=1)

# File lib/bio/db/embl/embl.rb, line 446
def seq
  Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
end
Also aliased as: naseq, ntseq
seqlen()
Alias for: sequence_length
sequence_length() click to toggle source

returns SEQUENCE_LENGTH in the ID line.

  • Bio::EMBL#sequencelength -> String

# File lib/bio/db/embl/embl.rb, line 146
def sequence_length
  id_line('SEQUENCE_LENGTH')
end
Also aliased as: seqlen
species() click to toggle source

species

# File lib/bio/db/embl/embl.rb, line 519
def species
  self.fetch('OS')
end
sq(base = nil) click to toggle source

returns sequence header information in the sequence header (SQ) line.

  • #sq -> <SQ Hash>

where <SQ Hash> is:

{'ntlen' => Int, 'other' => Int,
 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
  • #sq -> <base content in Int>

  • #sq -> <base content in Int>

SQ Line; sequence header (1/entry)

SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
# File lib/bio/db/embl/embl.rb, line 423
def sq(base = nil)
  unless @data['SQ']
    fetch('SQ') =~               /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
    @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
                   'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
  else
    @data['SQ']
  end

  if base
    @data['SQ'][base.downcase]
  else
    @data['SQ']
  end
end
sv() click to toggle source

returns the version information in the sequence version (SV) line.

SV Line; sequence version (1/entry)

SV    Accession.Version
# File lib/bio/db/embl/embl.rb, line 162
def sv
  if (v = field_fetch('SV').sub(/;/,'')) == ""
    [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
  else
    v
  end  
end
to_biosequence() click to toggle source

converts the entry to Bio::Sequence object


Arguments
Returns

Bio::Sequence object

# File lib/bio/db/embl/embl.rb, line 531
def to_biosequence
  Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
end
topology() click to toggle source
# File lib/bio/db/embl/embl.rb, line 134
def topology
  id_line('TOPOLOGY')
end
version() click to toggle source
# File lib/bio/db/embl/embl.rb, line 169
def version
  (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
end