class Bio::FlatFileIndex::Flat_1::FlatMappingFile
FlatMappingFile class.
Internal use only.
Attributes
filename[R]
mode[RW]
Public Class Methods
external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 923 def self.external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| # files (and in1) must be sorted cmd = sort_program + [ '-m', '-o', out, in1, *files ] system(*cmd) end end
external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 900 def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| # (in1 may be sorted) tf_all = [] tfn_all = [] files.each do |fn| tf = Tempfile.open('sort') tf.close(false) cmd = sort_program + [ '-o', tf.path, fn ] system(*cmd) tf_all << tf tfn_all << tf.path end cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ] system(*cmd_fin) tf_all.each do |tf| tf.close(true) end end end
external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 891 def self.external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ]) Proc.new do |out, in1, *files| cmd = sort_program + [ '-o', out, in1, *files ] system(*cmd) end end
internal_sort_proc()
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 933 def self.internal_sort_proc Proc.new do |out, in1, *files| a = IO.readlines(in1) files.each do |fn| IO.foreach(fn) do |x| a << x end end a.sort! of = File.open(out, 'w') a.each { |x| of << x } of.close end end
new(filename, mode = 'rb')
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 734 def initialize(filename, mode = 'rb') @filename = filename @mode = mode @file = nil #@file = File.open(filename, mode) @record_size = nil @records = nil end
open(*arg)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 730 def self.open(*arg) self.new(*arg) end
Public Instance Methods
add_record(str)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 806 def add_record(str) n = records rs = record_size @file.seek(0, IO::SEEK_END) write_record(str) @records += 1 end
close()
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 755 def close if @file then DEBUG.print "FlatMappingFile: close #{@filename}\n" @file.close @file = nil end nil end
each() { |record(get_record(i))| ... }
click to toggle source
export/import/edit data
# File lib/bio/io/flatfile/index.rb, line 841 def each n = records seek(0) (0...n).each do |i| yield Record.new(get_record(i)) end self end
export_tsv(stream)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 850 def export_tsv(stream) self.each do |x| stream << "#{x.to_s}\n" end stream end
get_record(i)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 776 def get_record(i) rs = record_size seek(i) str = @file.read(rs) #DEBUG.print "get_record(#{i})=#{str.inspect}\n" str end
import_tsv_files(flag_primary, mode, sort_proc, *files)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 948 def import_tsv_files(flag_primary, mode, sort_proc, *files) require 'tempfile' tmpfile1 = Tempfile.open('flat') self.export_tsv(tmpfile1) unless mode == :new tmpfile1.close(false) tmpfile0 = Tempfile.open('sorted') tmpfile0.close(false) sort_proc.call(tmpfile0.path, tmpfile1.path, *files) tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+') tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary) tmpmap.close self.close begin File.rename(self.filename, self.filename + ".#{$$}.bak~") rescue Errno::ENOENT end File.rename(tmpmap.filename, self.filename) begin File.delete(self.filename + ".#{$$}.bak~") rescue Errno::ENOENT end tmpfile0.close(true) tmpfile1.close(true) self end
init(rs)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 827 def init(rs) unless 0 < rs and rs < 10 ** @@recsize_width then raise 'record size out of range' end open @record_size = rs str = sprintf("%0*d", @@recsize_width, rs) @file.truncate(0) @file.seek(0, IO::SEEK_SET) @file.write(str) @records = 0 end
init_with_sorted_tsv_file(filename, flag_primary = false)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 857 def init_with_sorted_tsv_file(filename, flag_primary = false) rec_size = 1 f = File.open(filename) f.each do |y| rec_size = y.chomp.length if rec_size < y.chomp.length end self.init(rec_size) prev = nil f.rewind if flag_primary then f.each do |y| x = Record.new(y.chomp, rec_size) if prev then if x.key == prev.key DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n" else self.add_record(prev.to_s) end end prev = x end self.add_record(prev.to_s) if prev else f.each do |y| x = Record.new(y.chomp, rec_size) self.add_record(x.to_s) if x != prev prev = x end end f.close self end
open()
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 745 def open unless @file then DEBUG.print "FlatMappingFile: open #{@filename}\n" @file = File.open(@filename, @mode) true else nil end end
put_record(i, str)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 814 def put_record(i, str) n = records rs = record_size if i >= n then @file.seek(0, IO::SEEK_END) @file.write(sprintf("%-*s", rs, '') * (i - n)) @records = i + 1 else seek(i) end write_record(str) end
record_size()
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 764 def record_size unless @record_size then open @file.seek(0, IO::SEEK_SET) s = @file.read(@@recsize_width) raise 'strange record size' unless s =~ @@recsize_regex @record_size = s.to_i DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n" end @record_size end
records()
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 789 def records unless @records then rs = record_size @records = (@file.stat.size - @@recsize_width) / rs DEBUG.print "FlatMappingFile: records: #{@records}\n" end @records end
Also aliased as: size
search(key)
click to toggle source
methods for searching
# File lib/bio/io/flatfile/index.rb, line 982 def search(key) n = records return [] if n <= 0 i = n / 2 i_prev = nil DEBUG.print "binary search starts...\n" begin rec = Record.new(get_record(i)) i_prev = i if key < rec.key then n = i i = i / 2 elsif key > rec.key then i = (i + n) / 2 else # key == rec.key result = [ rec.val ] j = i - 1 while j >= 0 and (rec = Record.new(get_record(j))).key == key result << rec.val j = j - 1 end result.reverse! j = i + 1 while j < n and (rec = Record.new(get_record(j))).key == key result << rec.val j = j + 1 end DEBUG.print "#{result.size} hits found!!\n" return result end end until i_prev == i DEBUG.print "no hits found\n" #nil [] end
seek(i)
click to toggle source
# File lib/bio/io/flatfile/index.rb, line 784 def seek(i) rs = record_size @file.seek(@@recsize_width + rs * i) end
write_record(str)
click to toggle source
methods for writing file
# File lib/bio/io/flatfile/index.rb, line 800 def write_record(str) rs = record_size rec = sprintf("%-*s", rs, str)[0..rs] @file.write(rec) end