module Bio::FlatFileIndex::Indexer

Constants

DEFAULT_ENV

default env program (run a program in a modified environment)

DEFAULT_ENV_ARGS

default arguments for env program

DEFAULT_SORT

default sort program

Public Class Methods

addindex_bdb(db, flag, need_update, parser, options) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 476
def self.addindex_bdb(db, flag, need_update, parser, options)
  DEBUG.print "reading files...\n"

  pn = db.primary
  pn.file.close
  pn.file.flag = flag

  db.secondary.each_files do |x|
    x.file.close
    x.file.flag = flag
    x.file.open
    x.file.close
  end

  need_update.each do |fileid|
    filename = db.fileids[fileid].filename
    parser.open_flatfile(fileid, filename)
    parser.each do |pos, len|
      p = parser.parse_primary
      #pn.file.add_exclusive(p, [ fileid, pos, len ])
      pn.file.add_overwrite(p, [ fileid, pos, len ])
      #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
      parser.parse_secondary do |sn, sp|
        db.secondary[sn].file.add_nr(sp, p)
        #DEBUG.print "#{sp} #{p}\n"
      end
    end
    parser.close_flatfile
  end
  true
end
addindex_flat(db, mode, need_update, parser, options) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 525
def self.addindex_flat(db, mode, need_update, parser, options)
  require 'tempfile'
  prog = options['sort_program']
  env = options['env_program']
  env_args = options['env_program_arguments']

  return false if need_update.to_a.size == 0

  DEBUG.print "prepare temporary files...\n"
  tempbase = "bioflat#{rand(10000)}-"
  pfile = Tempfile.open(tempbase + 'primary-')
  DEBUG.print "open temporary file #{pfile.path.inspect}\n"
  sfiles = {}
  parser.secondary.names.each do |x|
    sfiles[x] =  Tempfile.open(tempbase + 'secondary-')
    DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
  end

  DEBUG.print "reading files...\n"
  need_update.each do |fileid|
    filename = db.fileids[fileid].filename
    parser.open_flatfile(fileid, filename)
    parser.each do |pos, len|
      p = parser.parse_primary
      pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
      #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
      parser.parse_secondary do |sn, sp|
        sfiles[sn] << "#{sp}\t#{p}\n"
        #DEBUG.print "#{sp} #{p}\n"
      end
    end
    parser.close_flatfile
    fileid += 1
  end

  sort_proc = chose_sort_proc(prog, mode, env, env_args)
  pfile.close(false)
  DEBUG.print "sorting primary (#{parser.primary.name})...\n"
  db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
  pfile.close(true)

  parser.secondary.names.each do |x|
    DEBUG.print "sorting secondary (#{x})...\n"
    sfiles[x].close(false)
    db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
                                          sfiles[x].path)
    sfiles[x].close(true)
  end
  true
end
chose_sort_proc(prog, mode = :new, env = nil, env_args = nil) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 585
def self.chose_sort_proc(prog, mode = :new,
                         env = nil, env_args = nil)
  case prog
  when /^builtin$/i, /^hs$/i, /^lm$/i
    DEBUG.print "sort: internal sort routine\n"
    sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
  when nil, ''
    if FileTest.executable?(DEFAULT_SORT)
      return chose_sort_proc(DEFAULT_SORT, mode, env, env_args)
    else
      DEBUG.print "sort: internal sort routine\n"
      sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
    end
  else
    env_args ||= DEFAULT_ENV_ARGS
    if env == '' or env == false then # inhibit to use env program
      prefixes = [ prog ]
    elsif env then # uses given env program
      prefixes = [ env ] + env_args + [ prog ]
    else # env == nil; uses default env program if possible
      if FileTest.executable?(DEFAULT_ENV)
        prefixes = [ DEFAULT_ENV ] + env_args + [ prog ]
      else
        prefixes = [ prog ]
      end
    end
    DEBUG.print "sort: #{prefixes.join(' ')}\n"
    if mode == :new then
      sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes)
    else
      sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes)
    end
  end
  sort_proc
end
makeindexBDB(name, parser, options, *files) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 451
def self.makeindexBDB(name, parser, options, *files)
  # options are not used in this method
  unless defined?(BDB)
    raise RuntimeError, "Berkeley DB support not found"
  end
  DEBUG.print "makeing BDB DataBank...\n"
  db = DataBank.new(name, MAGIC_BDB)
  db.format = parser.format
  db.fileids.add(*files)
  db.fileids.recalc

  db.primary = parser.primary.name
  db.secondary = parser.secondary.names

  DEBUG.print "writing config.dat, config, fileids ...\n"
  db.write('wb', BDBdefault::flag_write)

  DEBUG.print "reading files...\n"

  addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
               parser, options)
  db.close
  true
end
makeindexFlat(name, parser, options, *files) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 508
def self.makeindexFlat(name, parser, options, *files)
  DEBUG.print "makeing flat/1 DataBank using temporary files...\n"

  db = DataBank.new(name, nil)
  db.format = parser.format
  db.fileids.add(*files)
  db.primary = parser.primary.name
  db.secondary = parser.secondary.names
  db.fileids.recalc
  DEBUG.print "writing DabaBank...\n"
  db.write('wb')

  addindex_flat(db, :new, (0...(files.size)), parser, options)
  db.close
  true
end
update_index(name, parser, options, *files) click to toggle source
# File lib/bio/io/flatfile/indexer.rb, line 621
def self.update_index(name, parser, options, *files)
  db = DataBank.open(name)

  if parser then
    raise 'file format mismatch' if db.format != parser.format
  else

    begin
      dbclass_orig =
        Bio::FlatFile.autodetect_file(db.fileids[0].filename)
    rescue TypeError, Errno::ENOENT
    end
    begin
      dbclass_new =
        Bio::FlatFile.autodetect_file(files[0])
    rescue TypeError, Errno::ENOENT
    end

    case db.format
    when 'swiss', 'embl'
      parser = Parser.new(db.format)
      if dbclass_new and dbclass_new != parser.dbclass
        raise 'file format mismatch'
      end
    when 'genbank'
      dbclass = dbclass_orig or dbclass_new
      if dbclass == Bio::GenBank or dbclass == Bio::GenPept
        parser = Parser.new(dbclass_orig)
      elsif !dbclass then
        raise 'cannnot determine format. please specify manually.'
      else
        raise 'file format mismatch'
      end
      if dbclass_new and dbclass_new != parser.dbclass
        raise 'file format mismatch'
      end
    else
      raise 'unsupported format'
    end
  end

  parser.set_primary_namespace(db.primary.name)
  parser.add_secondary_namespaces(*db.secondary.names)

  if options['renew'] then
    newfiles = db.fileids.filenames.find_all do |x|
      FileTest.exist?(x)
    end
    newfiles.concat(files)
    newfiles2 = newfiles.sort
    newfiles2.uniq!
    newfiles3 = []
    newfiles.each do |x|
      newfiles3 << x if newfiles2.delete(x)
    end
    t = db.index_type
    db.close
    case t
    when MAGIC_BDB
      Indexer::makeindexBDB(name, parser, options, *newfiles3)
    when MAGIC_FLAT
      Indexer::makeindexFlat(name, parser, options, *newfiles3)
    else
      raise 'Unsupported index type'
    end
    return true
  end

  need_update = []
  newfiles = files.dup
  db.fileids.cache_all
  db.fileids.each_with_index do |f, i|
    need_update << i unless f.check
    newfiles.delete(f.filename)
  end

  b = db.fileids.size
  begin
    db.fileids.recalc
  rescue Errno::ENOENT => evar
    DEBUG.print "Error: #{evar}\n"
    DEBUG.print "assumed --renew option\n"
    db.close
    options = options.dup
    options['renew'] = true
    update_index(name, parser, options, *files)
    return true
  end
  # add new files
  db.fileids.add(*newfiles)
  db.fileids.recalc

  need_update.concat((b...(b + newfiles.size)).to_a)

  DEBUG.print "writing DabaBank...\n"
  db.write('wb', BDBdefault::flag_append)

  case db.index_type
  when MAGIC_BDB
    addindex_bdb(db, BDBdefault::flag_append,
                 need_update, parser, options)
  when MAGIC_FLAT
    addindex_flat(db, :add, need_update, parser, options)
  else
    raise 'Unsupported index type'
  end

  db.close
  true
end