class Bio::FlatFileIndex::Flat_1::FlatMappingFile

FlatMappingFile class.

Internal use only.

Attributes

filename[R]
mode[RW]

Public Class Methods

external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 923
def self.external_merge_proc(sort_program =  [ '/usr/bin/env', 
                                               'LC_ALL=C',
                                               '/usr/bin/sort' ])
  Proc.new do |out, in1, *files|
    # files (and in1) must be sorted
    cmd = sort_program + [ '-m', '-o', out, in1, *files ]
    system(*cmd)
  end
end
external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 900
def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 
                                                   'LC_ALL=C',
                                                   '/usr/bin/sort' ])
  Proc.new do |out, in1, *files|
    # (in1 may be sorted)
    tf_all = []
    tfn_all = []
    files.each do |fn|
      tf = Tempfile.open('sort')
      tf.close(false)
      cmd = sort_program + [ '-o', tf.path, fn ]
      system(*cmd)
      tf_all << tf
      tfn_all << tf.path
    end
    cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ]
    system(*cmd_fin)
    tf_all.each do |tf|
      tf.close(true)
    end
  end
end
external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 891
def self.external_sort_proc(sort_program = [ '/usr/bin/env', 
                                             'LC_ALL=C',
                                             '/usr/bin/sort' ])
  Proc.new do |out, in1, *files|
    cmd = sort_program + [ '-o', out, in1, *files ]
    system(*cmd)
  end
end
internal_sort_proc() click to toggle source
# File lib/bio/io/flatfile/index.rb, line 933
def self.internal_sort_proc
  Proc.new do |out, in1, *files|
    a = IO.readlines(in1)
    files.each do |fn|
      IO.foreach(fn) do |x|
        a << x
      end
    end
    a.sort!
    of = File.open(out, 'w')
    a.each { |x| of << x }
    of.close
  end
end
new(filename, mode = 'rb') click to toggle source
# File lib/bio/io/flatfile/index.rb, line 734
def initialize(filename, mode = 'rb')
  @filename = filename
  @mode = mode
  @file = nil
  #@file = File.open(filename, mode)
  @record_size = nil
  @records = nil
end
open(*arg) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 730
def self.open(*arg)
  self.new(*arg)
end

Public Instance Methods

add_record(str) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 806
def add_record(str)
  n = records
  rs = record_size
  @file.seek(0, IO::SEEK_END)
  write_record(str)
  @records += 1
end
close() click to toggle source
# File lib/bio/io/flatfile/index.rb, line 755
def close
  if @file then
    DEBUG.print "FlatMappingFile: close #{@filename}\n"
    @file.close
    @file = nil
  end
  nil
end
each() { |record(get_record(i))| ... } click to toggle source

export/import/edit data

# File lib/bio/io/flatfile/index.rb, line 841
def each
  n = records
  seek(0)
  (0...n).each do |i|
    yield Record.new(get_record(i))
  end
  self
end
export_tsv(stream) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 850
def export_tsv(stream)
  self.each do |x|
    stream << "#{x.to_s}\n"
  end
  stream
end
get_record(i) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 776
def get_record(i)
  rs = record_size
  seek(i)
  str = @file.read(rs)
  #DEBUG.print "get_record(#{i})=#{str.inspect}\n"
  str
end
import_tsv_files(flag_primary, mode, sort_proc, *files) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 948
def import_tsv_files(flag_primary, mode, sort_proc, *files)
  require 'tempfile'

  tmpfile1 = Tempfile.open('flat')
  self.export_tsv(tmpfile1) unless mode == :new
  tmpfile1.close(false)

  tmpfile0 = Tempfile.open('sorted')
  tmpfile0.close(false)

  sort_proc.call(tmpfile0.path, tmpfile1.path, *files)

  tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
  tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
  tmpmap.close
  self.close

  begin
    File.rename(self.filename, self.filename + ".#{$$}.bak~")
  rescue Errno::ENOENT
  end
  File.rename(tmpmap.filename, self.filename)
  begin
    File.delete(self.filename + ".#{$$}.bak~")
  rescue Errno::ENOENT
  end

  tmpfile0.close(true)
  tmpfile1.close(true)
  self
end
init(rs) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 827
def init(rs)
  unless 0 < rs and rs < 10 ** @@recsize_width then
    raise 'record size out of range'
  end
  open
  @record_size = rs
  str = sprintf("%0*d", @@recsize_width, rs)
  @file.truncate(0)
  @file.seek(0, IO::SEEK_SET)
  @file.write(str)
  @records = 0
end
init_with_sorted_tsv_file(filename, flag_primary = false) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 857
def init_with_sorted_tsv_file(filename, flag_primary = false)
  rec_size = 1
  f = File.open(filename)
  f.each do |y|
    rec_size = y.chomp.length if rec_size < y.chomp.length
  end
  self.init(rec_size)

  prev = nil
  f.rewind
  if flag_primary then
    f.each do |y|
      x = Record.new(y.chomp, rec_size)
      if prev then
        if x.key == prev.key
          DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
        else
          self.add_record(prev.to_s)
        end
      end
      prev = x
    end
    self.add_record(prev.to_s) if prev
  else
    f.each do |y|
      x = Record.new(y.chomp, rec_size)
      self.add_record(x.to_s) if x != prev
      prev = x
    end
  end
  f.close
  self
end
open() click to toggle source
# File lib/bio/io/flatfile/index.rb, line 745
def open
  unless @file then
    DEBUG.print "FlatMappingFile: open #{@filename}\n"
    @file = File.open(@filename, @mode)
    true
  else
    nil
  end
end
put_record(i, str) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 814
def put_record(i, str)
  n = records
  rs = record_size
  if i >= n then
    @file.seek(0, IO::SEEK_END)
    @file.write(sprintf("%-*s", rs, '') * (i - n))
    @records = i + 1
  else
    seek(i)
  end
  write_record(str)
end
record_size() click to toggle source
# File lib/bio/io/flatfile/index.rb, line 764
def record_size
  unless @record_size then
    open
    @file.seek(0, IO::SEEK_SET)
    s = @file.read(@@recsize_width)
    raise 'strange record size' unless s =~ @@recsize_regex
    @record_size = s.to_i
    DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
  end
  @record_size
end
records() click to toggle source
# File lib/bio/io/flatfile/index.rb, line 789
def records
  unless @records then
    rs = record_size
    @records = (@file.stat.size - @@recsize_width) / rs
    DEBUG.print "FlatMappingFile: records: #{@records}\n"
  end
  @records
end
Also aliased as: size
seek(i) click to toggle source
# File lib/bio/io/flatfile/index.rb, line 784
def seek(i)
  rs = record_size
  @file.seek(@@recsize_width + rs * i)
end
size()
Alias for: records
write_record(str) click to toggle source

methods for writing file

# File lib/bio/io/flatfile/index.rb, line 800
def write_record(str)
  rs = record_size
  rec = sprintf("%-*s", rs, str)[0..rs]
  @file.write(rec)
end