module Bio::FlatFileIndex::Indexer

Constants

DEFAULT_ENV

default env program (run a program in a modified environment)

DEFAULT_ENV_ARGS

default arguments for env program

DEFAULT_SORT

default sort program

Public Class Methods

addindex_bdb(db, flag, need_update, parser, options) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
476 def self.addindex_bdb(db, flag, need_update, parser, options)
477   DEBUG.print "reading files...\n"
478 
479   pn = db.primary
480   pn.file.close
481   pn.file.flag = flag
482 
483   db.secondary.each_files do |x|
484     x.file.close
485     x.file.flag = flag
486     x.file.open
487     x.file.close
488   end
489 
490   need_update.each do |fileid|
491     filename = db.fileids[fileid].filename
492     parser.open_flatfile(fileid, filename)
493     parser.each do |pos, len|
494       p = parser.parse_primary
495       #pn.file.add_exclusive(p, [ fileid, pos, len ])
496       pn.file.add_overwrite(p, [ fileid, pos, len ])
497       #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
498       parser.parse_secondary do |sn, sp|
499         db.secondary[sn].file.add_nr(sp, p)
500         #DEBUG.print "#{sp} #{p}\n"
501       end
502     end
503     parser.close_flatfile
504   end
505   true
506 end
addindex_flat(db, mode, need_update, parser, options) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
525 def self.addindex_flat(db, mode, need_update, parser, options)
526   require 'tempfile'
527   prog = options['sort_program']
528   env = options['env_program']
529   env_args = options['env_program_arguments']
530 
531   return false if need_update.to_a.size == 0
532 
533   DEBUG.print "prepare temporary files...\n"
534   tempbase = "bioflat#{rand(10000)}-"
535   pfile = Tempfile.open(tempbase + 'primary-')
536   DEBUG.print "open temporary file #{pfile.path.inspect}\n"
537   sfiles = {}
538   parser.secondary.names.each do |x|
539     sfiles[x] =  Tempfile.open(tempbase + 'secondary-')
540     DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
541   end
542 
543   DEBUG.print "reading files...\n"
544   need_update.each do |fileid|
545     filename = db.fileids[fileid].filename
546     parser.open_flatfile(fileid, filename)
547     parser.each do |pos, len|
548       p = parser.parse_primary
549       pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
550       #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
551       parser.parse_secondary do |sn, sp|
552         sfiles[sn] << "#{sp}\t#{p}\n"
553         #DEBUG.print "#{sp} #{p}\n"
554       end
555     end
556     parser.close_flatfile
557     fileid += 1
558   end
559 
560   sort_proc = chose_sort_proc(prog, mode, env, env_args)
561   pfile.close(false)
562   DEBUG.print "sorting primary (#{parser.primary.name})...\n"
563   db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
564   pfile.close(true)
565 
566   parser.secondary.names.each do |x|
567     DEBUG.print "sorting secondary (#{x})...\n"
568     sfiles[x].close(false)
569     db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
570                                           sfiles[x].path)
571     sfiles[x].close(true)
572   end
573   true
574 end
chose_sort_proc(prog, mode = :new, env = nil, env_args = nil) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
585 def self.chose_sort_proc(prog, mode = :new,
586                          env = nil, env_args = nil)
587   case prog
588   when /^builtin$/i, /^hs$/i, /^lm$/i
589     DEBUG.print "sort: internal sort routine\n"
590     sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
591   when nil, ''
592     if FileTest.executable?(DEFAULT_SORT)
593       return chose_sort_proc(DEFAULT_SORT, mode, env, env_args)
594     else
595       DEBUG.print "sort: internal sort routine\n"
596       sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
597     end
598   else
599     env_args ||= DEFAULT_ENV_ARGS
600     if env == '' or env == false then # inhibit to use env program
601       prefixes = [ prog ]
602     elsif env then # uses given env program
603       prefixes = [ env ] + env_args + [ prog ]
604     else # env == nil; uses default env program if possible
605       if FileTest.executable?(DEFAULT_ENV)
606         prefixes = [ DEFAULT_ENV ] + env_args + [ prog ]
607       else
608         prefixes = [ prog ]
609       end
610     end
611     DEBUG.print "sort: #{prefixes.join(' ')}\n"
612     if mode == :new then
613       sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes)
614     else
615       sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes)
616     end
617   end
618   sort_proc
619 end
makeindexBDB(name, parser, options, *files) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
451 def self.makeindexBDB(name, parser, options, *files)
452   # options are not used in this method
453   unless defined?(BDB)
454     raise RuntimeError, "Berkeley DB support not found"
455   end
456   DEBUG.print "makeing BDB DataBank...\n"
457   db = DataBank.new(name, MAGIC_BDB)
458   db.format = parser.format
459   db.fileids.add(*files)
460   db.fileids.recalc
461 
462   db.primary = parser.primary.name
463   db.secondary = parser.secondary.names
464 
465   DEBUG.print "writing config.dat, config, fileids ...\n"
466   db.write('wb', BDBdefault::flag_write)
467 
468   DEBUG.print "reading files...\n"
469 
470   addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
471                parser, options)
472   db.close
473   true
474 end
makeindexFlat(name, parser, options, *files) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
508 def self.makeindexFlat(name, parser, options, *files)
509   DEBUG.print "makeing flat/1 DataBank using temporary files...\n"
510 
511   db = DataBank.new(name, nil)
512   db.format = parser.format
513   db.fileids.add(*files)
514   db.primary = parser.primary.name
515   db.secondary = parser.secondary.names
516   db.fileids.recalc
517   DEBUG.print "writing DabaBank...\n"
518   db.write('wb')
519 
520   addindex_flat(db, :new, (0...(files.size)), parser, options)
521   db.close
522   true
523 end
update_index(name, parser, options, *files) click to toggle source
    # File lib/bio/io/flatfile/indexer.rb
621 def self.update_index(name, parser, options, *files)
622   db = DataBank.open(name)
623 
624   if parser then
625     raise 'file format mismatch' if db.format != parser.format
626   else
627 
628     begin
629       dbclass_orig =
630         Bio::FlatFile.autodetect_file(db.fileids[0].filename)
631     rescue TypeError, Errno::ENOENT
632     end
633     begin
634       dbclass_new =
635         Bio::FlatFile.autodetect_file(files[0])
636     rescue TypeError, Errno::ENOENT
637     end
638 
639     case db.format
640     when 'swiss', 'embl'
641       parser = Parser.new(db.format)
642       if dbclass_new and dbclass_new != parser.dbclass
643         raise 'file format mismatch'
644       end
645     when 'genbank'
646       dbclass = dbclass_orig or dbclass_new
647       if dbclass == Bio::GenBank or dbclass == Bio::GenPept
648         parser = Parser.new(dbclass_orig)
649       elsif !dbclass then
650         raise 'cannnot determine format. please specify manually.'
651       else
652         raise 'file format mismatch'
653       end
654       if dbclass_new and dbclass_new != parser.dbclass
655         raise 'file format mismatch'
656       end
657     else
658       raise 'unsupported format'
659     end
660   end
661 
662   parser.set_primary_namespace(db.primary.name)
663   parser.add_secondary_namespaces(*db.secondary.names)
664 
665   if options['renew'] then
666     newfiles = db.fileids.filenames.find_all do |x|
667       FileTest.exist?(x)
668     end
669     newfiles.concat(files)
670     newfiles2 = newfiles.sort
671     newfiles2.uniq!
672     newfiles3 = []
673     newfiles.each do |x|
674       newfiles3 << x if newfiles2.delete(x)
675     end
676     t = db.index_type
677     db.close
678     case t
679     when MAGIC_BDB
680       Indexer::makeindexBDB(name, parser, options, *newfiles3)
681     when MAGIC_FLAT
682       Indexer::makeindexFlat(name, parser, options, *newfiles3)
683     else
684       raise 'Unsupported index type'
685     end
686     return true
687   end
688 
689   need_update = []
690   newfiles = files.dup
691   db.fileids.cache_all
692   db.fileids.each_with_index do |f, i|
693     need_update << i unless f.check
694     newfiles.delete(f.filename)
695   end
696 
697   b = db.fileids.size
698   begin
699     db.fileids.recalc
700   rescue Errno::ENOENT => evar
701     DEBUG.print "Error: #{evar}\n"
702     DEBUG.print "assumed --renew option\n"
703     db.close
704     options = options.dup
705     options['renew'] = true
706     update_index(name, parser, options, *files)
707     return true
708   end
709   # add new files
710   db.fileids.add(*newfiles)
711   db.fileids.recalc
712 
713   need_update.concat((b...(b + newfiles.size)).to_a)
714 
715   DEBUG.print "writing DabaBank...\n"
716   db.write('wb', BDBdefault::flag_append)
717 
718   case db.index_type
719   when MAGIC_BDB
720     addindex_bdb(db, BDBdefault::flag_append,
721                  need_update, parser, options)
722   when MAGIC_FLAT
723     addindex_flat(db, :add, need_update, parser, options)
724   else
725     raise 'Unsupported index type'
726   end
727 
728   db.close
729   true
730 end