class Bio::FlatFileIndex::Flat_1::FlatMappingFile
FlatMappingFile
class.
Internal use only.
Attributes
filename[R]
mode[RW]
Public Class Methods
external_merge_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb 923 def self.external_merge_proc(sort_program = [ '/usr/bin/env', 924 'LC_ALL=C', 925 '/usr/bin/sort' ]) 926 Proc.new do |out, in1, *files| 927 # files (and in1) must be sorted 928 cmd = sort_program + [ '-m', '-o', out, in1, *files ] 929 system(*cmd) 930 end 931 end
external_merge_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb 900 def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 901 'LC_ALL=C', 902 '/usr/bin/sort' ]) 903 Proc.new do |out, in1, *files| 904 # (in1 may be sorted) 905 tf_all = [] 906 tfn_all = [] 907 files.each do |fn| 908 tf = Tempfile.open('sort') 909 tf.close(false) 910 cmd = sort_program + [ '-o', tf.path, fn ] 911 system(*cmd) 912 tf_all << tf 913 tfn_all << tf.path 914 end 915 cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ] 916 system(*cmd_fin) 917 tf_all.each do |tf| 918 tf.close(true) 919 end 920 end 921 end
external_sort_proc(sort_program = [ '/usr/bin/env', 'LC_ALL=C', '/usr/bin/sort' ])
click to toggle source
# File lib/bio/io/flatfile/index.rb 891 def self.external_sort_proc(sort_program = [ '/usr/bin/env', 892 'LC_ALL=C', 893 '/usr/bin/sort' ]) 894 Proc.new do |out, in1, *files| 895 cmd = sort_program + [ '-o', out, in1, *files ] 896 system(*cmd) 897 end 898 end
internal_sort_proc()
click to toggle source
# File lib/bio/io/flatfile/index.rb 933 def self.internal_sort_proc 934 Proc.new do |out, in1, *files| 935 a = IO.readlines(in1) 936 files.each do |fn| 937 IO.foreach(fn) do |x| 938 a << x 939 end 940 end 941 a.sort! 942 of = File.open(out, 'w') 943 a.each { |x| of << x } 944 of.close 945 end 946 end
new(filename, mode = 'rb')
click to toggle source
# File lib/bio/io/flatfile/index.rb 734 def initialize(filename, mode = 'rb') 735 @filename = filename 736 @mode = mode 737 @file = nil 738 #@file = File.open(filename, mode) 739 @record_size = nil 740 @records = nil 741 end
open(*arg)
click to toggle source
# File lib/bio/io/flatfile/index.rb 730 def self.open(*arg) 731 self.new(*arg) 732 end
Public Instance Methods
add_record(str)
click to toggle source
# File lib/bio/io/flatfile/index.rb 806 def add_record(str) 807 n = records 808 rs = record_size 809 @file.seek(0, IO::SEEK_END) 810 write_record(str) 811 @records += 1 812 end
close()
click to toggle source
# File lib/bio/io/flatfile/index.rb 755 def close 756 if @file then 757 DEBUG.print "FlatMappingFile: close #{@filename}\n" 758 @file.close 759 @file = nil 760 end 761 nil 762 end
each() { |record(get_record(i))| ... }
click to toggle source
export/import/edit data
# File lib/bio/io/flatfile/index.rb 841 def each 842 n = records 843 seek(0) 844 (0...n).each do |i| 845 yield Record.new(get_record(i)) 846 end 847 self 848 end
export_tsv(stream)
click to toggle source
# File lib/bio/io/flatfile/index.rb 850 def export_tsv(stream) 851 self.each do |x| 852 stream << "#{x.to_s}\n" 853 end 854 stream 855 end
get_record(i)
click to toggle source
# File lib/bio/io/flatfile/index.rb 776 def get_record(i) 777 rs = record_size 778 seek(i) 779 str = @file.read(rs) 780 #DEBUG.print "get_record(#{i})=#{str.inspect}\n" 781 str 782 end
import_tsv_files(flag_primary, mode, sort_proc, *files)
click to toggle source
# File lib/bio/io/flatfile/index.rb 948 def import_tsv_files(flag_primary, mode, sort_proc, *files) 949 require 'tempfile' 950 951 tmpfile1 = Tempfile.open('flat') 952 self.export_tsv(tmpfile1) unless mode == :new 953 tmpfile1.close(false) 954 955 tmpfile0 = Tempfile.open('sorted') 956 tmpfile0.close(false) 957 958 sort_proc.call(tmpfile0.path, tmpfile1.path, *files) 959 960 tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+') 961 tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary) 962 tmpmap.close 963 self.close 964 965 begin 966 File.rename(self.filename, self.filename + ".#{$$}.bak~") 967 rescue Errno::ENOENT 968 end 969 File.rename(tmpmap.filename, self.filename) 970 begin 971 File.delete(self.filename + ".#{$$}.bak~") 972 rescue Errno::ENOENT 973 end 974 975 tmpfile0.close(true) 976 tmpfile1.close(true) 977 self 978 end
init(rs)
click to toggle source
# File lib/bio/io/flatfile/index.rb 827 def init(rs) 828 unless 0 < rs and rs < 10 ** @@recsize_width then 829 raise 'record size out of range' 830 end 831 open 832 @record_size = rs 833 str = sprintf("%0*d", @@recsize_width, rs) 834 @file.truncate(0) 835 @file.seek(0, IO::SEEK_SET) 836 @file.write(str) 837 @records = 0 838 end
init_with_sorted_tsv_file(filename, flag_primary = false)
click to toggle source
# File lib/bio/io/flatfile/index.rb 857 def init_with_sorted_tsv_file(filename, flag_primary = false) 858 rec_size = 1 859 f = File.open(filename) 860 f.each do |y| 861 rec_size = y.chomp.length if rec_size < y.chomp.length 862 end 863 self.init(rec_size) 864 865 prev = nil 866 f.rewind 867 if flag_primary then 868 f.each do |y| 869 x = Record.new(y.chomp, rec_size) 870 if prev then 871 if x.key == prev.key 872 DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n" 873 else 874 self.add_record(prev.to_s) 875 end 876 end 877 prev = x 878 end 879 self.add_record(prev.to_s) if prev 880 else 881 f.each do |y| 882 x = Record.new(y.chomp, rec_size) 883 self.add_record(x.to_s) if x != prev 884 prev = x 885 end 886 end 887 f.close 888 self 889 end
open()
click to toggle source
# File lib/bio/io/flatfile/index.rb 745 def open 746 unless @file then 747 DEBUG.print "FlatMappingFile: open #{@filename}\n" 748 @file = File.open(@filename, @mode) 749 true 750 else 751 nil 752 end 753 end
put_record(i, str)
click to toggle source
# File lib/bio/io/flatfile/index.rb 814 def put_record(i, str) 815 n = records 816 rs = record_size 817 if i >= n then 818 @file.seek(0, IO::SEEK_END) 819 @file.write(sprintf("%-*s", rs, '') * (i - n)) 820 @records = i + 1 821 else 822 seek(i) 823 end 824 write_record(str) 825 end
record_size()
click to toggle source
# File lib/bio/io/flatfile/index.rb 764 def record_size 765 unless @record_size then 766 open 767 @file.seek(0, IO::SEEK_SET) 768 s = @file.read(@@recsize_width) 769 raise 'strange record size' unless s =~ @@recsize_regex 770 @record_size = s.to_i 771 DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n" 772 end 773 @record_size 774 end
records()
click to toggle source
# File lib/bio/io/flatfile/index.rb 789 def records 790 unless @records then 791 rs = record_size 792 @records = (@file.stat.size - @@recsize_width) / rs 793 DEBUG.print "FlatMappingFile: records: #{@records}\n" 794 end 795 @records 796 end
Also aliased as: size
search(key)
click to toggle source
methods for searching
# File lib/bio/io/flatfile/index.rb 982 def search(key) 983 n = records 984 return [] if n <= 0 985 i = n / 2 986 i_prev = nil 987 DEBUG.print "binary search starts...\n" 988 begin 989 rec = Record.new(get_record(i)) 990 i_prev = i 991 if key < rec.key then 992 n = i 993 i = i / 2 994 elsif key > rec.key then 995 i = (i + n) / 2 996 else # key == rec.key 997 result = [ rec.val ] 998 j = i - 1 999 while j >= 0 and 1000 (rec = Record.new(get_record(j))).key == key 1001 result << rec.val 1002 j = j - 1 1003 end 1004 result.reverse! 1005 j = i + 1 1006 while j < n and 1007 (rec = Record.new(get_record(j))).key == key 1008 result << rec.val 1009 j = j + 1 1010 end 1011 DEBUG.print "#{result.size} hits found!!\n" 1012 return result 1013 end 1014 end until i_prev == i 1015 DEBUG.print "no hits found\n" 1016 #nil 1017 [] 1018 end
seek(i)
click to toggle source
# File lib/bio/io/flatfile/index.rb 784 def seek(i) 785 rs = record_size 786 @file.seek(@@recsize_width + rs * i) 787 end
write_record(str)
click to toggle source
methods for writing file
# File lib/bio/io/flatfile/index.rb 800 def write_record(str) 801 rs = record_size 802 rec = sprintf("%-*s", rs, str)[0..rs] 803 @file.write(rec) 804 end