class Bio::KEGG::Keggtab

Description

Parse 'keggtab' KEGG database definition file which also includes Taxonomic category of the KEGG organisms.

References

The 'keggtab' file is included in

Format

File format is something like

# KEGGTAB
#
# name            type            directory                    abbreviation
#
enzyme            enzyme          $BIOROOT/db/ideas/ligand     ec
ec                alias           enzyme
(snip)
# Human
h.sapiens         genes           $BIOROOT/db/kegg/genes       hsa
H.sapiens         alias           h.sapiens
hsa               alias           h.sapiens
(snip)
#
# Taxonomy
#
(snip)
animals           alias           hsa+mmu+rno+dre+dme+cel
eukaryotes        alias           animals+plants+protists+fungi
genes             alias           eubacteria+archaea+eukaryotes

Attributes

bioroot[R]

Returns a string of the BIOROOT path prefix.

db_names[R]

Public Class Methods

new(file_path, bioroot = nil) click to toggle source

Path for keggtab file and optionally set bioroot top directory. Environmental variable BIOROOT overrides bioroot.

# File lib/bio/db/kegg/keggtab.rb, line 54
def initialize(file_path, bioroot = nil)
  @bioroot = ENV['BIOROOT'] || bioroot
  @db_names = Hash.new
  @database = Hash.new
  @taxonomy = Hash.new
  File.open(file_path) do |f|
    parse_keggtab(f.read)
  end
end

Public Instance Methods

alias_list(db_name) click to toggle source

deprecated

# File lib/bio/db/kegg/keggtab.rb, line 141
def alias_list(db_name)
  if @db_names[db_name]
    @db_names[db_name].aliases
  end
end
aliases(db_abbrev) click to toggle source

Returns an Array containing all alias names for the database. (e.g. 'hsa' -> [“H.sapiens”, “hsa”], 'hpj' -> [“H.pylori_J99”, “hpj”])

# File lib/bio/db/kegg/keggtab.rb, line 112
def aliases(db_abbrev)
  if @database[db_abbrev]
    @database[db_abbrev].aliases
  end
end
child_nodes(node = 'genes') click to toggle source
# File lib/bio/db/kegg/keggtab.rb, line 196
def child_nodes(node = 'genes')
  return @taxonomy[node]
end
database(db_abbrev = nil) click to toggle source

Returns a hash containing DB definition section of the keggtab file. If database name is given as an argument, returns a Keggtab::DB object.

# File lib/bio/db/kegg/keggtab.rb, line 102
def database(db_abbrev = nil)
  if db_abbrev
    @database[db_abbrev]
  else
    @database
  end
end
db_by_abbrev(db_abbrev) click to toggle source

deprecated

# File lib/bio/db/kegg/keggtab.rb, line 157
def db_by_abbrev(db_abbrev)
  @db_names.each do |k, db|
    return db if db.abbrev == db_abbrev
  end
  return nil
end
db_path(db_name) click to toggle source

deprecated

# File lib/bio/db/kegg/keggtab.rb, line 148
def db_path(db_name)
  if @bioroot
    "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
  else
    "#{@db_names[db_name].path}/#{db_name}"
  end
end
db_path_by_abbrev(db_abbrev) click to toggle source

deprecated

# File lib/bio/db/kegg/keggtab.rb, line 170
def db_path_by_abbrev(db_abbrev)
  db_name = name_by_abbrev(db_abbrev)
  db_path(db_name)
end
keggorg2taxo(keggorg)
Alias for: korg2taxo
keggorg2taxonomy(keggorg)
Alias for: korg2taxo
korg2taxo(keggorg) click to toggle source

Returns an array of taxonomy names the organism belongs. (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes']) This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.

# File lib/bio/db/kegg/keggtab.rb, line 225
def korg2taxo(keggorg)
  tmp = Array.new
  traverse = Proc.new {|keggorg|
    @taxonomy.each do |k,v|
      if v.include?(keggorg)
        tmp.push(k)
        traverse.call(k)
        break
      end
    end
  }
  traverse.call(keggorg)
  return tmp
end
korg2taxonomy(keggorg)
Alias for: korg2taxo
name(db_abbrev) click to toggle source

Returns a canonical database name for the abbreviation. (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', …)

# File lib/bio/db/kegg/keggtab.rb, line 120
def name(db_abbrev)
  if @database[db_abbrev]
    @database[db_abbrev].name
  end
end
name_by_abbrev(db_abbrev) click to toggle source

deprecated

# File lib/bio/db/kegg/keggtab.rb, line 165
def name_by_abbrev(db_abbrev)
  db_by_abbrev(db_abbrev).name
end
path(db_abbrev) click to toggle source

Returns an absolute path for the flat file database. (e.g. '/bio/db/kegg/genes', …)

# File lib/bio/db/kegg/keggtab.rb, line 128
def path(db_abbrev)
  if @database[db_abbrev]
    file = @database[db_abbrev].name
    if @bioroot
      "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
    else
      "#{@database[db_abbrev].path}/#{file}"
    end
  end
end
taxa_list() click to toggle source

List of all node labels from Taxonomy section. (e.g. [“actinobacteria”, “animals”, “archaea”, “bacillales”, …)

# File lib/bio/db/kegg/keggtab.rb, line 192
def taxa_list
  @taxonomy.keys.sort
end
taxo2keggorgs(node = 'genes')
Alias for: taxo2korgs
taxo2korgs(node = 'genes') click to toggle source

Returns an array of organism names included in the specified taxon label. (e.g. 'proteobeta' -> [“nme”, “nma”, “rso”]) This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.

# File lib/bio/db/kegg/keggtab.rb, line 203
def taxo2korgs(node = 'genes')
  if node.length == 3
    return node
  else
    if @taxonomy[node]
      tmp = Array.new
      @taxonomy[node].each do |x|
        tmp.push(taxo2korgs(x))
      end
      return tmp
    else
      return nil
    end
  end
end
taxon2keggorgs(node = 'genes')
Alias for: taxo2korgs
taxon2korgs(node = 'genes')
Alias for: taxo2korgs
taxonomy(node = nil) click to toggle source

Returns a hash containing Taxonomy section of the keggtab file. If argument is given, returns a List of all child nodes belongs to the label node. (e.g. “eukaryotes” -> [“animals”, “plants”, “protists”, “fungi”], …)

# File lib/bio/db/kegg/keggtab.rb, line 182
def taxonomy(node = nil)
  if node
    @taxonomy[node]
  else
    @taxonomy
  end
end