module Bio::EMBLDB::Common

Constants

DELIMITER
RS
TAGSIZE

Public Class Methods

new(entry) click to toggle source
Calls superclass method
# File lib/bio/db/embl/common.rb, line 86
def initialize(entry)
  super(entry, TAGSIZE)
end

Public Instance Methods

ac() click to toggle source

returns a Array of accession numbers in the AC lines.

AC Line

"AC   A12345; B23456;"
AC [AC1;]+

Accession numbers format:

1       2     3          4          5          6
[O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]
# File lib/bio/db/embl/common.rb, line 99
def ac
  unless @data['AC']
    tmp = Array.new
    field_fetch('AC').split(/ /).each do |e|
      tmp.push(e.sub(/;/,''))
    end
    @data['AC'] = tmp
  end
  @data['AC']
end
Also aliased as: accessions
accession() click to toggle source

returns the first accession number in the AC lines

# File lib/bio/db/embl/common.rb, line 113
def accession
  ac[0]
end
accessions()
Alias for: ac
de() click to toggle source

returns a String int the DE line.

DE Line

# File lib/bio/db/embl/common.rb, line 121
def de
  unless @data['DE']
    @data['DE'] = fetch('DE')
  end
  @data['DE']
end
Also aliased as: description, definition
definition()
Alias for: de
description()
Alias for: de
dr() { |k, v| ... } click to toggle source

returns contents in the DR line.

  • #dr -> [ <Database cross-reference Hash>* ]

where <Database cross-reference Hash> is:

DR Line; defabases cross-reference (>=0) a cross_ref pre one line

"DR  database_identifier; primary_identifier; secondary_identifier."
# File lib/bio/db/embl/common.rb, line 329
def dr
  unless @data['DR']
    tmp = Hash.new
    self.get('DR').split(/\n/).each do |db|
      a = db.sub(/^DR   /,'').sub(/.$/,'').strip.split(/;[ ]/)
      dbname = a.shift
      tmp[dbname] = Array.new unless tmp[dbname]
      tmp[dbname].push(a)
    end
    @data['DR'] = tmp
  end
  if block_given?
    @data['DR'].each do |k,v|
      yield(k, v)
    end
  else
    @data['DR']
  end
end
keywords()
Alias for: kw
kw() click to toggle source

returns keywords in the KW line.

  • #kw -> [ <keyword>* ]

KW Line; keyword (>=1)

KW   [Keyword;]+
# File lib/bio/db/embl/common.rb, line 220
def kw
  unless @data['KW']
    if get('KW').size > 0
      tmp = fetch('KW').sub(/.$/,'')
      @data['KW'] = tmp.split(/;/).map {|e| e.strip }
    else
      @data['KW'] = []
    end
  end
  @data['KW']
end
Also aliased as: keywords
oc() click to toggle source

returns contents in the OC line.

  • #oc -> [ <organism class String>* ]

OC Line; organism classification (>=1)

OC   Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;
OC   Theileria.
# File lib/bio/db/embl/common.rb, line 203
def oc
  unless @data['OC']
    begin
      @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e|
        e.strip 
      }
    rescue NameError
      nil
    end
  end
  @data['OC']
end
og() click to toggle source

returns contents in the OG line.

  • #og -> [ <ogranella String>* ]

OG Line; organella (0 or 1/entry)

OG   Plastid; Chloroplast.
OG   Mitochondrion.
OG   Plasmid sym pNGR234a.
OG   Plastid; Cyanelle.
OG   Plasmid pSymA (megaplasmid 1).
OG   Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.
# File lib/bio/db/embl/common.rb, line 180
def og
  unless @data['OG']
    og = Array.new
    if get('OG').size > 0
      ogstr = fetch('OG')
      ogstr.sub!(/\.$/,'')
      ogstr.sub!(/ and/,'')
      ogstr.sub!(/;/, ',')
      ogstr.split(',').each do |tmp|
        og.push(tmp.strip)
      end
    end
    @data['OG'] = og
  end
  @data['OG']
end
os(num = nil) click to toggle source

returns contents in the OS line.

  • Bio::EMBLDB#os -> Array of <OS Hash>

where <OS Hash> is:

[{'name'=>'Human', 'os'=>'Homo sapiens'}, 
 {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
  • Bio::SPTR#os['name'] => “Human”

  • Bio::SPTR#os => {'name'=>“Human”, 'os'=>'Homo sapiens'}

  • Bio::STPR#os(0) => “Homo sapiens (Human)”

OS Line; organism species (>=1)

"OS   Trifolium repens (white clover)"

OS   Genus species (name).
OS   Genus species (name0) (name1).
OS   Genus species (name0) (name1).
OS   Genus species (name0), G s0 (name0), and G s (name1).
# File lib/bio/db/embl/common.rb, line 148
def os(num = nil)
  unless @data['OS']
    os = Array.new
    fetch('OS').split(/, and|, /).each do |tmp|
      if tmp =~ /([A-Z][a-z]* *[\w \:\\+\-]+\w)/
        org = $1
        tmp =~ /(\(.+\))/ 
        os.push({'name' => $1, 'os' => org})
      else
        raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
      end
    end
    @data['OS'] = os
  end
  if num
    # EX. "Trifolium repens (white clover)"
    "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
  end
  @data['OS']
end
ref() click to toggle source

returns contents in the R lines.

  • #ref -> [ <refernece information Hash>* ]

where <reference information Hash> is:

{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}

R Lines

  • RN RC RP RX RA RT RL RG

# File lib/bio/db/embl/common.rb, line 242
def ref
  unless @data['R']
    ary = Array.new
    get('R').split(/\nRN   /).each do |str|
      raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
             'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
      str = 'RN   ' + str unless /^RN   / =~ str
      str.split("\n").each do |line|
        if /^(R[NPXARLCTG])   (.+)/ =~ line
          raw[$1] += $2 + ' '
        else
          raise "Invalid format in R lines, \n[#{line}]\n"
        end
      end
      raw.each_value {|v| 
        v.strip! 
        v.sub!(/^"/,'')
        v.sub!(/;$/,'')
        v.sub!(/"$/,'')
      }
      ary.push(raw)
    end
    @data['R'] = ary
  end
  @data['R']
end
references() click to toggle source

returns Bio::Reference object from #ref.

# File lib/bio/db/embl/common.rb, line 271
def references
  unless @data['references']
    ary = self.ref.map {|ent|
      hash = Hash.new
      ent.each {|key, value|
        case key
        when 'RN'
          if /\[(\d+)\]/ =~ value.to_s
            hash['embl_gb_record_number'] = $1.to_i
          end
        when 'RC'
          unless value.to_s.strip.empty?
            hash['comments'] ||= []
            hash['comments'].push value
          end
        when 'RP'
          hash['sequence_position'] = value
        when 'RA'
          a = value.split(/\, /)
          a.each do |x|
            x.sub!(/( [^ ]+)\z/, ",\\1")
          end
          hash['authors'] = a
        when 'RT'
          hash['title'] = value
        when 'RL'
          if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
            hash['journal'] = $1.rstrip
            hash['volume']  = $2
            hash['issue']   = $4
            hash['pages']   = $6
            hash['year']    = $7
          else
            hash['journal'] = value
          end
        when 'RX'  # PUBMED, DOI, (AGRICOLA)
          value.split(/\. /).each {|item|
            tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
            hash[ tag.downcase ]  = xref
          }
        end
      }
      Reference.new(hash)
    }
    @data['references'] = ary.extend(Bio::References::BackwardCompatibility)
  end
  @data['references']
end