module Bio::NCBIDB::Common

Description

This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.

References

Constants

DELIMITER
TAGSIZE

Public Class Methods

new(entry) click to toggle source
Calls superclass method
# File lib/bio/db/genbank/common.rb, line 30
def initialize(entry)
  super(entry, TAGSIZE)
end

Public Instance Methods

acc_version() click to toggle source

Returns the first part of the VERSION record as “ACCESSION.VERSION” String.

# File lib/bio/db/genbank/common.rb, line 57
def acc_version
  versions.first.to_s
end
accession() click to toggle source

Returns the ACCESSION part of the acc_version.

# File lib/bio/db/genbank/common.rb, line 62
def accession
  acc_version.split(/\./).first.to_s
end
accessions() click to toggle source

ACCESSION – Returns contents of the ACCESSION record as an Array.

# File lib/bio/db/genbank/common.rb, line 46
def accessions
  field_fetch('ACCESSION').strip.split(/\s+/)
end
comment() click to toggle source

COMMENT – Returns contents of the COMMENT record as a String.

# File lib/bio/db/genbank/common.rb, line 199
def comment
  str = get('COMMENT').to_s.sub(/\ACOMMENT     /, '')
  str.gsub!(/^ {12}/, '')
  str.chomp!
  str
end
common_name() click to toggle source
# File lib/bio/db/genbank/common.rb, line 120
def common_name
  source['common_name']
end
Also aliased as: vernacular_name
definition() click to toggle source

DEFINITION – Returns contents of the DEFINITION record as a String.

# File lib/bio/db/genbank/common.rb, line 40
def definition
  field_fetch('DEFINITION')
end
features() { |f| ... } click to toggle source

FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature objects.

# File lib/bio/db/genbank/common.rb, line 209
def features
  unless @data['FEATURES']
    ary = []
    in_quote = false
    get('FEATURES').each_line do |line|
      next if line =~ /^FEATURES/

      # feature type  (source, CDS, ...)
      head = line[0,20].to_s.strip

      # feature value (position or /qualifier=)
      body = line[20,60].to_s.chomp

      # sub-array [ feature type, position, /q="data", ... ]
      if line =~ /^ {5}\S/
        ary.push([ head, body ])

      # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
      elsif body =~ /^ \// and not in_quote           # gb:IRO125195
        ary.last.push(body)
        
        # flag for open quote (/q="data...)
        if body =~ /="/ and body !~ /"$/
          in_quote = true
        end

      # feature qualifier continued (...data..., ...data...")
      else
        ary.last.last << body

        # flag for closing quote (/q="data... lines  ...")
        if body =~ /"$/
          in_quote = false
        end
      end
    end

    ary.collect! do |subary|
      parse_qualifiers(subary)
    end

    @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
  end
  if block_given?
    @data['FEATURES'].each do |f|
      yield f
    end
  else
    @data['FEATURES']
  end
end
gi() click to toggle source

Returns the second part of the VERSION record as a “GI:#######” String.

# File lib/bio/db/genbank/common.rb, line 72
def gi
  versions.last
end
keywords() click to toggle source

KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.

# File lib/bio/db/genbank/common.rb, line 84
def keywords
  @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
end
locus() click to toggle source

LOCUS – Locus class must be defined in child classes.

# File lib/bio/db/genbank/common.rb, line 35
def locus
  # must be overrided in each subclass
end
nid() click to toggle source

NID – Returns contents of the NID record as a String.

# File lib/bio/db/genbank/common.rb, line 78
def nid
  field_fetch('NID')
end
organism() click to toggle source
# File lib/bio/db/genbank/common.rb, line 125
def organism
  source['organism']
end
origin() click to toggle source

ORIGIN – Returns contents of the ORIGIN record as a String.

# File lib/bio/db/genbank/common.rb, line 263
def origin
  unless @data['ORIGIN']
    ori, seqstr = get('ORIGIN').split("\n", 2)
    seqstr ||= ""
    @data['ORIGIN'] = truncate(tag_cut(ori))
    @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
  end
  @data['ORIGIN']
end
references() { |r| ... } click to toggle source

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.

# File lib/bio/db/genbank/common.rb, line 136
def references
  unless @data['REFERENCE']
    ary = []
    toptag2array(get('REFERENCE')).each do |ref|
      hash = Hash.new
      subtag2array(ref).each do |field|
        case tag_get(field)
        when /REFERENCE/
          if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
            hash['embl_gb_record_number'] = $1.to_i
            if $3 and $3 != 'sites' then
              seqpos = $3
              seqpos.sub!(/\A\s*bases\s+/, '')
              seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
              seqpos.gsub!(/\s*\;\s*/, ', ')
              hash['sequence_position'] = seqpos
            end
          end
        when /AUTHORS/
          authors = truncate(tag_cut(field))
          authors = authors.split(/, /)
          authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
          authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
          hash['authors']     = authors
        when /TITLE/
          hash['title']       = truncate(tag_cut(field))
          # CHECK Actually GenBank is not demanding for dot at the end of TITLE
          #+ '.'
        when /JOURNAL/
          journal = truncate(tag_cut(field))
          if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
      hash['journal']  = $1
      hash['volume']   = $2
      hash['issue']    = $3
      hash['pages']    = $4
      hash['year']     = $5
          else
      hash['journal'] = journal
          end
        when /MEDLINE/
          hash['medline']     = truncate(tag_cut(field))
        when /PUBMED/
          hash['pubmed']      = truncate(tag_cut(field))
        when /REMARK/
          hash['comments'] ||= []
          hash['comments'].push truncate(tag_cut(field))
        end
      end
      ary.push(Reference.new(hash))
    end
    @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
  end
  if block_given?
    @data['REFERENCE'].each do |r|
      yield r
    end
  else
    @data['REFERENCE']
  end
end
segment() click to toggle source

SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.

# File lib/bio/db/genbank/common.rb, line 90
def segment
  @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
end
source() click to toggle source

SOURCE – Returns contents of the SOURCE record as a Hash.

# File lib/bio/db/genbank/common.rb, line 96
def source
  unless @data['SOURCE']
    name, org = get('SOURCE').split('ORGANISM')
    org ||= ""
    if org[/\S+;/]
      organism = $`
      taxonomy = $& + $'
    elsif org[/\S+\./]                                # rs:NC_001741
      organism = $`
      taxonomy = $& + $'
    else
      organism = org
      taxonomy = ''
    end
    @data['SOURCE'] = {
      'common_name'   => truncate(tag_cut(name)),
      'organism'      => truncate(organism),
      'taxonomy'      => truncate(taxonomy),
    }
    @data['SOURCE'].default = ''
  end
  @data['SOURCE']
end
taxonomy() click to toggle source
# File lib/bio/db/genbank/common.rb, line 129
def taxonomy
  source['taxonomy']
end
vernacular_name()
Alias for: common_name
version() click to toggle source

Returns the VERSION part of the #acc_version as a Fixnum

# File lib/bio/db/genbank/common.rb, line 67
def version
  acc_version.split(/\./).last.to_i
end
versions() click to toggle source

VERSION – Returns contents of the VERSION record as an Array of Strings.

# File lib/bio/db/genbank/common.rb, line 52
def versions
  @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
end