class Bio::KEGG::Keggtab

Description

Parse ‘keggtab’ KEGG database definition file which also includes Taxonomic category of the KEGG organisms.

References

The ‘keggtab’ file is included in

Format

File format is something like

# KEGGTAB
#
# name            type            directory                    abbreviation
#
enzyme            enzyme          $BIOROOT/db/ideas/ligand     ec
ec                alias           enzyme
(snip)
# Human
h.sapiens         genes           $BIOROOT/db/kegg/genes       hsa
H.sapiens         alias           h.sapiens
hsa               alias           h.sapiens
(snip)
#
# Taxonomy
#
(snip)
animals           alias           hsa+mmu+rno+dre+dme+cel
eukaryotes        alias           animals+plants+protists+fungi
genes             alias           eubacteria+archaea+eukaryotes

Attributes

bioroot[R]

Returns a string of the BIOROOT path prefix.

db_names[R]

Public Class Methods

new(file_path, bioroot = nil) click to toggle source

Path for keggtab file and optionally set bioroot top directory. Environmental variable BIOROOT overrides bioroot.

   # File lib/bio/db/kegg/keggtab.rb
54 def initialize(file_path, bioroot = nil)
55   @bioroot = ENV['BIOROOT'] || bioroot
56   @db_names = Hash.new
57   @database = Hash.new
58   @taxonomy = Hash.new
59   File.open(file_path) do |f|
60     parse_keggtab(f.read)
61   end
62 end

Public Instance Methods

alias_list(db_name) click to toggle source

deprecated

    # File lib/bio/db/kegg/keggtab.rb
141 def alias_list(db_name)
142   if @db_names[db_name]
143     @db_names[db_name].aliases
144   end
145 end
aliases(db_abbrev) click to toggle source

Returns an Array containing all alias names for the database. (e.g. ‘hsa’ -> [“H.sapiens”, “hsa”], ‘hpj’ -> [“H.pylori_J99”, “hpj”])

    # File lib/bio/db/kegg/keggtab.rb
112 def aliases(db_abbrev)
113   if @database[db_abbrev]
114     @database[db_abbrev].aliases
115   end
116 end
child_nodes(node = 'genes') click to toggle source
    # File lib/bio/db/kegg/keggtab.rb
196 def child_nodes(node = 'genes')
197   return @taxonomy[node]
198 end
database(db_abbrev = nil) click to toggle source

Returns a hash containing DB definition section of the keggtab file. If database name is given as an argument, returns a Keggtab::DB object.

    # File lib/bio/db/kegg/keggtab.rb
102 def database(db_abbrev = nil)
103   if db_abbrev
104     @database[db_abbrev]
105   else
106     @database
107   end
108 end
db_by_abbrev(db_abbrev) click to toggle source

deprecated

    # File lib/bio/db/kegg/keggtab.rb
157 def db_by_abbrev(db_abbrev)
158   @db_names.each do |k, db|
159     return db if db.abbrev == db_abbrev
160   end
161   return nil
162 end
db_path(db_name) click to toggle source

deprecated

    # File lib/bio/db/kegg/keggtab.rb
148 def db_path(db_name)
149   if @bioroot
150     "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
151   else
152     "#{@db_names[db_name].path}/#{db_name}"
153   end
154 end
db_path_by_abbrev(db_abbrev) click to toggle source

deprecated

    # File lib/bio/db/kegg/keggtab.rb
170 def db_path_by_abbrev(db_abbrev)
171   db_name = name_by_abbrev(db_abbrev)
172   db_path(db_name)
173 end
keggorg2taxo(keggorg)
Alias for: korg2taxo
keggorg2taxonomy(keggorg)
Alias for: korg2taxo
korg2taxo(keggorg) click to toggle source

Returns an array of taxonomy names the organism belongs. (e.g. ‘eco’ -> [‘proteogamma’,‘proteobacteria’,‘eubacteria’,‘genes’]) This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.

    # File lib/bio/db/kegg/keggtab.rb
225 def korg2taxo(keggorg)
226   tmp = Array.new
227   traverse = Proc.new {|keggorg|
228     @taxonomy.each do |k,v|
229       if v.include?(keggorg)
230         tmp.push(k)
231         traverse.call(k)
232         break
233       end
234     end
235   }
236   traverse.call(keggorg)
237   return tmp
238 end
korg2taxonomy(keggorg)
Alias for: korg2taxo
name(db_abbrev) click to toggle source

Returns a canonical database name for the abbreviation. (e.g. ‘ec’ -> ‘enzyme’, ‘hsa’ -> ‘h.sapies’, …)

    # File lib/bio/db/kegg/keggtab.rb
120 def name(db_abbrev)
121   if @database[db_abbrev]
122     @database[db_abbrev].name
123   end
124 end
name_by_abbrev(db_abbrev) click to toggle source

deprecated

    # File lib/bio/db/kegg/keggtab.rb
165 def name_by_abbrev(db_abbrev)
166   db_by_abbrev(db_abbrev).name
167 end
path(db_abbrev) click to toggle source

Returns an absolute path for the flat file database. (e.g. ‘/bio/db/kegg/genes’, …)

    # File lib/bio/db/kegg/keggtab.rb
128 def path(db_abbrev)
129   if @database[db_abbrev]
130     file = @database[db_abbrev].name
131     if @bioroot
132       "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
133     else
134       "#{@database[db_abbrev].path}/#{file}"
135     end
136   end
137 end
taxa_list() click to toggle source

List of all node labels from Taxonomy section. (e.g. [“actinobacteria”, “animals”, “archaea”, “bacillales”, …)

    # File lib/bio/db/kegg/keggtab.rb
192 def taxa_list
193   @taxonomy.keys.sort
194 end
taxo2keggorgs(node = 'genes')
Alias for: taxo2korgs
taxo2korgs(node = 'genes') click to toggle source

Returns an array of organism names included in the specified taxon label. (e.g. ‘proteobeta’ -> [“nme”, “nma”, “rso”]) This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.

    # File lib/bio/db/kegg/keggtab.rb
203 def taxo2korgs(node = 'genes')
204   if node.length == 3
205     return node
206   else
207     if @taxonomy[node]
208       tmp = Array.new
209       @taxonomy[node].each do |x|
210         tmp.push(taxo2korgs(x))
211       end
212       return tmp
213     else
214       return nil
215     end
216   end
217 end
taxon2keggorgs(node = 'genes')
Alias for: taxo2korgs
taxon2korgs(node = 'genes')
Alias for: taxo2korgs
taxonomy(node = nil) click to toggle source

Returns a hash containing Taxonomy section of the keggtab file. If argument is given, returns a List of all child nodes belongs to the label node. (e.g. “eukaryotes” -> [“animals”, “plants”, “protists”, “fungi”], …)

    # File lib/bio/db/kegg/keggtab.rb
182 def taxonomy(node = nil)
183   if node
184     @taxonomy[node]
185   else
186     @taxonomy
187   end
188 end