class Bio::Tree

This is the class for phylogenetic tree. It stores a phylogenetic tree.

Internally, it is based on Bio::Pathway class. However, users cannot handle Bio::Pathway object directly.

This is alpha version. Incompatible changes may be made frequently.

Constants

DEFAULT_OPTIONS

default options

Attributes

options[RW]

tree options; mainly used for tree output

root[RW]

root node of this tree (even if unrooted tree, it is used by some methods)

Public Class Methods

new(tree = nil) click to toggle source

Creates a new phylogenetic tree. When no arguments are given, it creates a new empty tree. When a Tree object is given, it copies the tree. Note that the new tree shares Node and Edge objects with the given tree.

# File lib/bio/tree.rb, line 258
def initialize(tree = nil)
  # creates an undirected adjacency list graph
  @pathway = Bio::Pathway.new([], true)
  @root = nil
  @options = {}
  _init_cache
  self.concat(tree) if tree
end

Public Instance Methods

add_edge(source, target, edge = Edge.new) click to toggle source

Adds a new edge to the tree. Returns the newly added edge. If the edge already exists, it is overwritten with new one.

# File lib/bio/tree.rb, line 380
def add_edge(source, target, edge = Edge.new)
  _clear_cache
  @pathway.append(Bio::Relation.new(source, target, edge))
  edge
end
add_node(node) click to toggle source

Adds a node to the tree. Returns self. If the node already exists, it does nothing.

# File lib/bio/tree.rb, line 402
def add_node(node)
  _clear_cache
  @pathway.graph[node] ||= {}
  self
end
adjacency_matrix(nodes = nil, default_value = nil, diagonal_value = nil) { |source, target, edge| ... } click to toggle source

Shows the adjacency matrix representation of the tree. It shows matrix only for given nodes. If nodes is nil or is ommitted, it acts the same as tree.adjacency_matrix(tree.nodes). If a block is given, for each edge, it yields source, target, and edge, and uses the returned value of the block. Without blocks, it uses edge. Returns a matrix object.

# File lib/bio/tree.rb, line 822
def adjacency_matrix(nodes = nil,
                     default_value = nil,
                     diagonal_value = nil) #:yields: source, target, edge
  nodes ||= self.nodes
  size = nodes.size
  hash = {}
  nodes.each_with_index { |x, i| hash[x] = i }
  # prepares an matrix
  matrix = Array.new(size, nil)
  matrix.collect! { |x| Array.new(size, default_value) }
  (0...size).each { |i| matrix[i][i] = diagonal_value }
  # fills the matrix from each edge
  self.each_edge do |source, target, edge|
    i_source = hash[source]
    i_target = hash[target]
    if i_source and i_target then
      val = block_given? ? (yield source, target, edge) : edge
      matrix[i_source][i_target] = val
      matrix[i_target][i_source] = val
    end
  end
  Matrix.rows(matrix, false)
end
adjacent_nodes(node) click to toggle source

Returns an array of adjacent nodes of the given node.

# File lib/bio/tree.rb, line 331
def adjacent_nodes(node)
  h = @pathway.graph[node]
  h ? h.keys : []
end
ancestors(node, root = nil) click to toggle source

Gets all ancestral nodes of the node. If root isn't specified or root is nil, @root is used. Returns an array of Nodes. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 757
def ancestors(node, root = nil)
  root ||= @root
  (self.path(root, node) - [ node ]).reverse
end
children(node, root = nil) click to toggle source

Gets the adjacent children nodes of the node. If root isn't specified or root is nil, @root is used. Returns an array of Nodes. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 701
def children(node, root = nil)
  root ||= @root
  c = self.adjacent_nodes(node)
  c.delete(self.parent(node, root))
  c
end
clear() click to toggle source

Clears all nodes and edges. Returns self. Note that options and root are also cleared.

# File lib/bio/tree.rb, line 289
def clear
  initialize
  self
end
clear_node(node) click to toggle source

Removes all edges connected with the node. Returns self. If the node does not exist, raises IndexError.

# File lib/bio/tree.rb, line 417
def clear_node(node)
  unless self.include?(node)
    raise IndexError, 'the node does not exist'
  end
  _clear_cache
  @pathway.relations.delete_if do |rel|
    rel.node.include?(node)
  end
  @pathway.graph[node].each_key do |k|
    @pathway.graph[k].delete(node)
  end
  @pathway.graph[node].clear
  self
end
collect_edge!() { |source, target, edge| ... } click to toggle source

Replaces each edge by each block's return value. Returns self.

# File lib/bio/tree.rb, line 527
def collect_edge! #:yields: source, target, edge
  _clear_cache
  @pathway.relations.each do |rel|
    newedge = yield rel.node[0], rel.node[1], rel.relation
    rel.edge = newedge
    @pathway.append(rel, false)
  end
  self
end
collect_node!() { |node| ... } click to toggle source

Replaces each node by each block's return value. Returns self.

# File lib/bio/tree.rb, line 506
def collect_node! #:yields: node
  _clear_cache
  tr = {}
  self.each_node do |node|
    tr[node] = yield node
  end
  # replaces nodes in @pathway.relations
  @pathway.relations.each do |rel|
    rel.node.collect! { |node| tr[node] }
  end
  # re-generates @pathway from relations
  @pathway.to_list
  # adds orphan nodes
  tr.each_value do |newnode|
    @pathway.graph[newnode] ||= {}
  end
  self
end
concat(other) click to toggle source

Concatenates the other tree. If the same edge exists, the edge in other is used. Returns self. The result is unspecified if other isn't a Tree object. Note that the Node and Edge objects in the other tree are shared in the concatinated tree.

# File lib/bio/tree.rb, line 595
def concat(other)
  #raise TypeError unless other.kind_of?(self.class)
  _clear_cache
  other.each_node do |node|
    self.add_node(node)
  end
  other.each_edge do |node1, node2, edge|
    self.add_edge(node1, node2, edge)
  end
  self
end
descendents(node, root = nil) click to toggle source

Gets all descendent nodes of the node. If root isn't specified or root is nil, @root is used. Returns an array of Nodes. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 712
def descendents(node, root = nil)
  root ||= @root
  distance, route = @pathway.breadth_first_search(root)
  d = distance[node]
  result = []
  distance.each do |key, val|
    if val > d then
      x = key
      while x = route[x]
        if x == node then
          result << key
          break
        end
        break if distance[x] <= d
      end
    end
  end
  result
end
distance(node1, node2) click to toggle source

Returns distance between node1 and node2. It would raise error if the edges didn't contain distance values. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 640
def distance(node1, node2)
  distance = 0
  self.each_edge_in_path(node1, node2) do |source, target, edge|
    distance += get_edge_distance(edge)
  end
  distance
end
distance_matrix(nodes = nil) click to toggle source

Calculates distance matrix of given nodes. If nodes is nil, or is ommited, it acts the same as tree.distance_matrix(tree.leaves). Returns a matrix object. The result is unspecified for cyclic trees. Note 1: The diagonal values of the matrix are 0. Note 2: If the distance cannot be calculated, nil will be set.

# File lib/bio/tree.rb, line 793
def distance_matrix(nodes = nil)
  nodes ||= self.leaves
  matrix = []
  nodes.each_index do |i|
    row = []
    nodes.each_index do |j|
      if i == j then
        distance = 0
      elsif r = matrix[j] and val = r[i] then
        distance = val
      else
        distance = (self.distance(nodes[i], nodes[j]) rescue nil)
      end
      row << distance
    end
    matrix << row
  end
  Matrix.rows(matrix, false)
end
each_edge() { |source, target, edge| ... } click to toggle source

Iterates over each edges of this tree.

# File lib/bio/tree.rb, line 311
def each_edge #:yields: source, target, edge
  @pathway.relations.each do |rel|
    yield rel.node[0], rel.node[1], rel.relation
  end
  self
end
each_edge_in_path(node1, node2) { |source, target, edge| ... } click to toggle source

Iterates over each edge from node1 to node2. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 626
def each_edge_in_path(node1, node2)
  path = self.path(node1, node2)
  source = path.shift
  path.each do |target|
    edge = self.get_edge(source, target)
    yield source, target, edge
    source = target
  end
  self
end
each_node() { |node| ... } click to toggle source

Iterates over each node of this tree.

# File lib/bio/tree.rb, line 305
def each_node(&x) #:yields: node
  @pathway.graph.each_key(&x)
  self
end
each_out_edge(source) { |source, target, edge| ... } click to toggle source

Iterates over each connected edges of the given node. Returns self.

The reason why the method name is “each_out_edge” is that it comes from the Boost Graph Library.

# File lib/bio/tree.rb, line 355
def each_out_edge(source) #:yields: source, target, edge
  h = @pathway.graph[source]
  h.each { |key, val| yield source, key, val } if h
  self
end
edges() click to toggle source

Returns all edges an array of [ node0, node1, edge ]

# File lib/bio/tree.rb, line 319
def edges
  @pathway.relations.collect do |rel|
    [ rel.node[0], rel.node[1], rel.relation ]
  end
end
get_edge(source, target) click to toggle source

Returns an edge from source to target. If source and target are not adjacent nodes, returns nil.

# File lib/bio/tree.rb, line 372
def get_edge(source, target)
  h = @pathway.graph[source]
  h ? h[target] : nil
end
get_edge_distance(edge) click to toggle source

Gets distance value from the given edge. Returns float or any other numeric value or nil.

# File lib/bio/tree.rb, line 100
def get_edge_distance(edge)
  begin
    dist = edge.distance
  rescue NoMethodError
    dist = edge
  end
  dist
end
get_edge_distance_string(edge) click to toggle source

Gets distance string from the given edge. Returns a string or nil.

# File lib/bio/tree.rb, line 111
def get_edge_distance_string(edge)
  begin
    dist = edge.distance_string
  rescue NoMethodError
    dist = (edge ? edge.to_s : nil)
  end
  dist
end
get_edge_merged(edge1, edge2) click to toggle source

Returns edge1 + edge2

# File lib/bio/tree.rb, line 121
def get_edge_merged(edge1, edge2)
  dist1 = get_edge_distance(edge1)
  dist2 = get_edge_distance(edge2)
  if dist1 and dist2 then
    Edge.new(dist1 + dist2)
  elsif dist1 then
    Edge.new(dist1)
  elsif dist2 then
    Edge.new(dist2)
  else
    Edge.new
  end
end
get_node_bootstrap(node) click to toggle source
# File lib/bio/tree.rb, line 237
def get_node_bootstrap(node)
  begin
    node.bootstrap
  rescue NoMethodError
    nil
  end
end
get_node_bootstrap_string(node) click to toggle source
# File lib/bio/tree.rb, line 245
def get_node_bootstrap_string(node)
  begin
    node.bootstrap_string
  rescue NoMethodError
    nil
  end
end
get_node_by_name(str) click to toggle source

Finds a node in the tree by given name and returns the node. If the node does not found, returns nil. If multiple nodes with the same name exist, the result would be one of those (unspecified).

# File lib/bio/tree.rb, line 390
def get_node_by_name(str)
  self.each_node do |node|
    if get_node_name(node) == str
      return node
    end
  end
  nil
end
get_node_name(node) click to toggle source

Gets node name

# File lib/bio/tree.rb, line 229
def get_node_name(node)
  begin
    node.name
  rescue NoMethodError
    node.to_s
  end
end
include?(node) click to toggle source

If the node exists, returns true. Otherwise, returns false.

# File lib/bio/tree.rb, line 410
def include?(node)
  @pathway.graph[node] ? true : false
end
insert_node(node1, node2, new_node, new_distance = nil) click to toggle source

Insert a new node between adjacent nodes node1 and node2. The old edge between node1 and node2 are changed to the edge between new_node and node2. The edge between node1 and new_node is newly created.

If new_distance is specified, the distance between node1 and new_node is set to new_distance, and distance between new_node and node2 is set to tree.get_edge(node1, node2).distance - new_distance.

Returns self. If node1 and node2 are not adjacent, raises IndexError.

If new_node already exists in the tree, the tree would become circular. In addition, if the edge between new_node and node1 (or node2) already exists, it will be erased.

# File lib/bio/tree.rb, line 890
def insert_node(node1, node2, new_node, new_distance = nil)
  unless edge = self.get_edge(node1, node2) then
    raise IndexError, 'nodes not found or two nodes are not adjacent'
  end
  _clear_cache
  new_edge = Edge.new(new_distance)
  self.remove_edge(node1, node2)
  self.add_edge(node1, new_node, new_edge)
  if new_distance and old_distance = get_edge_distance(edge) then
    old_distance -= new_distance
    begin
      edge.distance = old_distance
    rescue NoMethodError
      edge = old_distance
    end
  end
  self.add_edge(new_node, node2, edge)
  self
end
leaves(node = nil, root = nil) click to toggle source

If node is nil, returns an array of all leaves (nodes connected with one edge). Otherwise, gets all descendent leaf nodes of the node. If root isn't specified or root is nil, @root is used. Returns an array of Nodes. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 738
def leaves(node = nil, root = nil)
  unless node then
    nodes = []
    self.each_node do |x|
      nodes << x if self.out_degree(x) == 1
    end
    return nodes
  else
    root ||= @root
    self.descendents(node, root).find_all do |x|
      self.adjacent_nodes(x).size == 1
    end
  end
end
lowest_common_ancestor(node1, node2, root = nil) click to toggle source

Gets the lowest common ancestor of the two nodes. If root isn't specified or root is nil, @root is used. Returns a Node object or nil. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 766
def lowest_common_ancestor(node1, node2, root = nil)
  root ||= @root
  _, route = @pathway.breadth_first_search(root)
  x = node1; r1 = []
  begin; r1 << x; end while x = route[x]
  x = node2; r2 = []
  begin; r2 << x; end while x = route[x]
  return (r1 & r2).first
end
newick(options = {})
Alias for: output_newick
nodes() click to toggle source

Returns all nodes as an array.

# File lib/bio/tree.rb, line 295
def nodes
  @pathway.graph.keys
end
number_of_edges() click to toggle source

Returns number of edges in the tree.

# File lib/bio/tree.rb, line 326
def number_of_edges
  @pathway.relations.size
end
number_of_nodes() click to toggle source

Number of nodes.

# File lib/bio/tree.rb, line 300
def number_of_nodes
  @pathway.nodes
end
out_degree(source) click to toggle source

Returns number of edges in the given node.

The reason why the method name is “out_degree” is that it comes from the Boost Graph Library.

# File lib/bio/tree.rb, line 365
def out_degree(source)
  h = @pathway.graph[source]
  h ? h.size : 0
end
out_edges(source) click to toggle source

Returns all connected edges with adjacent nodes. Returns an array of the array [ source, target, edge ].

The reason why the method name is “out_edges” is that it comes from the Boost Graph Library.

# File lib/bio/tree.rb, line 341
def out_edges(source)
  h = @pathway.graph[source]
  if h
    h.collect { |key, val| [ source, key, val ] }
  else
    []
  end
end
output(format, *arg, &block) click to toggle source

Returns formatted text (or something) of the tree Currently supported format is: :newick, :nhx

# File lib/bio/tree/output.rb, line 230
def output(format, *arg, &block)
  case format
  when :newick
    output_newick(*arg, &block)
  when :nhx
    output_nhx(*arg, &block)
  when :phylip_distance_matrix
    output_phylip_distance_matrix(*arg, &block)
  else
    raise 'Unknown format'
  end
end
output_newick(options = {}) { |node1, node2| ... } click to toggle source

Returns a newick formatted string. If block is given, the order of the node is sorted (as the same manner as Enumerable#sort).

Available options:

:indent

indent string; set false to disable (default: ' ')

:bootstrap_style

:disabled disables bootstrap representations. :traditional for traditional style. :molphy for Molphy style (default).

# File lib/bio/tree/output.rb, line 198
def output_newick(options = {}, &block) #:yields: node1, node2
  root = @root
  root ||= self.nodes.first
  return '();' unless root
  __to_newick([], root, 0, :__to_newick_format_leaf, options, &block) +
    __to_newick_format_leaf(root, Edge.new, options) +
    ";\n"
end
Also aliased as: newick
output_nhx(options = {}) { |node1, node2| ... } click to toggle source

Returns a NHX (New Hampshire eXtended) formatted string. If block is given, the order of the node is sorted (as the same manner as Enumerable#sort).

Available options:

:indent

indent string; set false to disable (default: ' ')

# File lib/bio/tree/output.rb, line 218
def output_nhx(options = {}, &block) #:yields: node1, node2
  root = @root
  root ||= self.nodes.first
  return '();' unless root
  __to_newick([], root, 0,
              :__to_newick_format_leaf_NHX, options, &block) +
    __to_newick_format_leaf_NHX(root, Edge.new, options) +
    ";\n"
end
output_phylip_distance_matrix(nodes = nil, options = {}) click to toggle source

Generates phylip-style distance matrix as a string. if nodes is not given, all leaves in the tree are used. If the names of some of the given (or default) nodes are not defined or are empty, the names are automatically generated.

# File lib/bio/tree/output.rb, line 251
def output_phylip_distance_matrix(nodes = nil, options = {})
  nodes = self.leaves unless nodes
  names = nodes.collect do |x|
    y = get_node_name(x)
    y = sprintf("%x", x.__id__.abs) if y.empty?
    y
  end
  m = self.distance_matrix(nodes)
  Bio::Phylip::DistanceMatrix.generate(m, names, options)
end
parent(node, root = nil) click to toggle source

Gets the parent node of the node. If root isn't specified or root is nil, @root is used. Returns an Node object or nil. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 687
def parent(node, root = nil)
  root ||= @root
  raise IndexError, 'can not get parent for unrooted tree' unless root
  unless ret = _get_cached_parent(node, root) then
    ret = self.path(root, node)[-2]
    _cache_parent(node, ret, root)
  end
  ret
end
path(node1, node2) click to toggle source

Gets path from node1 to node2. Retruns an array of nodes, including node1 and node2. If node1 and/or node2 do not exist, IndexError is raised. If node1 and node2 are not connected, NoPathError is raised. The result is unspecified for cyclic trees.

# File lib/bio/tree.rb, line 612
def path(node1, node2)
  raise IndexError, 'node1 not found' unless @pathway.graph[node1]
  raise IndexError, 'node2 not found' unless @pathway.graph[node2]
  return [ node1 ] if node1 == node2
  return [ node1, node2 ] if @pathway.graph[node1][node2]
  _, path = @pathway.bfs_shortest_path(node1, node2)
  unless path[0] == node1 and path[-1] == node2 then
    raise NoPathError, 'node1 and node2 are not connected'
  end
  path
end
remove_edge(source, target) click to toggle source

# Removes an edge between source and target. # Returns self. # If the edge does not exist, raises IndexError. +

# File lib/bio/tree.rb, line 465
def remove_edge(source, target)
  unless self.get_edge(source, target) then
    raise IndexError, 'edge not found'
  end
  _clear_cache
  fwd = [ source, target ]
  rev = [ target, source ]
  @pathway.relations.delete_if do |rel|
    rel.node == fwd or rel.node == rev
  end
  h = @pathway.graph[source]
  h.delete(target) if h
  h = @pathway.graph[target]
  h.delete(source) if h
  self
end
remove_edge_if() { |source, target, edge| ... } click to toggle source

Removes each edge if the block returns not nil. Returns self.

# File lib/bio/tree.rb, line 484
def remove_edge_if #:yields: source, target, edge
  _clear_cache
  removed_rel = []
  @pathway.relations.delete_if do |rel|
    if yield rel.node[0], rel.node[1], rel.edge then
      removed_rel << rel
      true
    end
  end
  removed_rel.each do |rel|
    source = rel.node[0]
    target = rel.node[1]
    h = @pathway.graph[source]
    h.delete(target) if h
    h = @pathway.graph[target]
    h.delete(source) if h
  end
  self
end
remove_node(node) click to toggle source

Removes the given node from the tree. All edges connected with the node are also removed. Returns self. If the node does not exist, raises IndexError.

# File lib/bio/tree.rb, line 436
def remove_node(node)
  #_clear_cache #done in clear_node(node)
  self.clear_node(node)
  @pathway.graph.delete(node)
  self
end
remove_node_if() { |node then clear_node| ... } click to toggle source

Removes each node if the block returns not nil. All edges connected with the removed nodes are also removed. Returns self.

# File lib/bio/tree.rb, line 446
def remove_node_if
  #_clear_cache #done in clear_node(node)
  all = self.nodes
  all.each do |node|
    if yield node then
      self.clear_node(node)
      @pathway.graph.delete(node)
    end
  end
  self
end
remove_nonsense_nodes() click to toggle source

Removes all nodes that are not branches nor leaves. That is, removes nodes connected with exactly two edges. For each removed node, two adjacent edges are merged and a new edge are created. Returns removed nodes. Note that orphan nodes are still kept unchanged.

# File lib/bio/tree.rb, line 852
def remove_nonsense_nodes
  _clear_cache
  hash = {}
  self.each_node do |node|
    hash[node] = true if @pathway.graph[node].size == 2
  end
  hash.each_key do |node|
    adjs = @pathway.graph[node].keys
    edges = @pathway.graph[node].values
    new_edge = get_edge_merged(edges[0], edges[1])
    @pathway.graph[adjs[0]].delete(node)
    @pathway.graph[adjs[1]].delete(node)
    @pathway.graph.delete(node)
    @pathway.append(Bio::Relation.new(adjs[0], adjs[1], new_edge))
  end
  #@pathway.to_relations
  @pathway.relations.reject! do |rel|
    hash[rel.node[0]] or hash[rel.node[1]]
  end
  return hash.keys
end
subtree(nodes) click to toggle source

Gets the sub-tree consisted of given nodes. nodes must be an array of nodes. Nodes that do not exist in the original tree are ignored. Returns a Tree object. Note that the sub-tree shares Node and Edge objects with the original tree.

# File lib/bio/tree.rb, line 543
def subtree(nodes)
  nodes = nodes.find_all do |x|
    @pathway.graph[x]
  end
  return self.class.new if nodes.empty?
  # creates subtree
  new_tree = self.class.new
  nodes.each do |x|
    new_tree.add_node(x)
  end
  self.each_edge do |node1, node2, edge|
    if new_tree.include?(node1) and new_tree.include?(node2) then
      new_tree.add_edge(node1, node2, edge)
    end
  end
  return new_tree
end
subtree_with_all_paths(nodes) click to toggle source

Gets the sub-tree consisted of given nodes and all internal nodes connected between given nodes. nodes must be an array of nodes. Nodes that do not exist in the original tree are ignored. Returns a Tree object. The result is unspecified for cyclic trees. Note that the sub-tree shares Node and Edge objects with the original tree.

# File lib/bio/tree.rb, line 569
def subtree_with_all_paths(nodes)
  hash = {}
  nodes.each { |x| hash[x] = true }
  nodes.each_index do |i|
    node1 = nodes[i]
    (0...i).each do |j|
      node2 = nodes[j]
      unless node1 == node2 then
        begin
          path = self.path(node1, node2)
        rescue IndexError, NoPathError
          path = []
        end
        path.each { |x| hash[x] = true }
      end
    end
  end
  self.subtree(hash.keys)
end
total_distance() click to toggle source

Returns total distance of all edges. It would raise error if some edges didn't contain distance values.

# File lib/bio/tree.rb, line 778
def total_distance
  distance = 0
  self.each_edge do |source, target, edge|
    distance += get_edge_distance(edge)
  end
  distance
end