module Bio::FlatFileIndex::Indexer
Constants
- DEFAULT_ENV
default env program (run a program in a modified environment)
- DEFAULT_ENV_ARGS
default arguments for env program
- DEFAULT_SORT
default sort program
Public Class Methods
addindex_bdb(db, flag, need_update, parser, options)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 476 def self.addindex_bdb(db, flag, need_update, parser, options) 477 DEBUG.print "reading files...\n" 478 479 pn = db.primary 480 pn.file.close 481 pn.file.flag = flag 482 483 db.secondary.each_files do |x| 484 x.file.close 485 x.file.flag = flag 486 x.file.open 487 x.file.close 488 end 489 490 need_update.each do |fileid| 491 filename = db.fileids[fileid].filename 492 parser.open_flatfile(fileid, filename) 493 parser.each do |pos, len| 494 p = parser.parse_primary 495 #pn.file.add_exclusive(p, [ fileid, pos, len ]) 496 pn.file.add_overwrite(p, [ fileid, pos, len ]) 497 #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" 498 parser.parse_secondary do |sn, sp| 499 db.secondary[sn].file.add_nr(sp, p) 500 #DEBUG.print "#{sp} #{p}\n" 501 end 502 end 503 parser.close_flatfile 504 end 505 true 506 end
addindex_flat(db, mode, need_update, parser, options)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 525 def self.addindex_flat(db, mode, need_update, parser, options) 526 require 'tempfile' 527 prog = options['sort_program'] 528 env = options['env_program'] 529 env_args = options['env_program_arguments'] 530 531 return false if need_update.to_a.size == 0 532 533 DEBUG.print "prepare temporary files...\n" 534 tempbase = "bioflat#{rand(10000)}-" 535 pfile = Tempfile.open(tempbase + 'primary-') 536 DEBUG.print "open temporary file #{pfile.path.inspect}\n" 537 sfiles = {} 538 parser.secondary.names.each do |x| 539 sfiles[x] = Tempfile.open(tempbase + 'secondary-') 540 DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n" 541 end 542 543 DEBUG.print "reading files...\n" 544 need_update.each do |fileid| 545 filename = db.fileids[fileid].filename 546 parser.open_flatfile(fileid, filename) 547 parser.each do |pos, len| 548 p = parser.parse_primary 549 pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n" 550 #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" 551 parser.parse_secondary do |sn, sp| 552 sfiles[sn] << "#{sp}\t#{p}\n" 553 #DEBUG.print "#{sp} #{p}\n" 554 end 555 end 556 parser.close_flatfile 557 fileid += 1 558 end 559 560 sort_proc = chose_sort_proc(prog, mode, env, env_args) 561 pfile.close(false) 562 DEBUG.print "sorting primary (#{parser.primary.name})...\n" 563 db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path) 564 pfile.close(true) 565 566 parser.secondary.names.each do |x| 567 DEBUG.print "sorting secondary (#{x})...\n" 568 sfiles[x].close(false) 569 db.secondary[x].file.import_tsv_files(false, mode, sort_proc, 570 sfiles[x].path) 571 sfiles[x].close(true) 572 end 573 true 574 end
chose_sort_proc(prog, mode = :new, env = nil, env_args = nil)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 585 def self.chose_sort_proc(prog, mode = :new, 586 env = nil, env_args = nil) 587 case prog 588 when /^builtin$/i, /^hs$/i, /^lm$/i 589 DEBUG.print "sort: internal sort routine\n" 590 sort_proc = Flat_1::FlatMappingFile::internal_sort_proc 591 when nil, '' 592 if FileTest.executable?(DEFAULT_SORT) 593 return chose_sort_proc(DEFAULT_SORT, mode, env, env_args) 594 else 595 DEBUG.print "sort: internal sort routine\n" 596 sort_proc = Flat_1::FlatMappingFile::internal_sort_proc 597 end 598 else 599 env_args ||= DEFAULT_ENV_ARGS 600 if env == '' or env == false then # inhibit to use env program 601 prefixes = [ prog ] 602 elsif env then # uses given env program 603 prefixes = [ env ] + env_args + [ prog ] 604 else # env == nil; uses default env program if possible 605 if FileTest.executable?(DEFAULT_ENV) 606 prefixes = [ DEFAULT_ENV ] + env_args + [ prog ] 607 else 608 prefixes = [ prog ] 609 end 610 end 611 DEBUG.print "sort: #{prefixes.join(' ')}\n" 612 if mode == :new then 613 sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes) 614 else 615 sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes) 616 end 617 end 618 sort_proc 619 end
makeindexBDB(name, parser, options, *files)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 451 def self.makeindexBDB(name, parser, options, *files) 452 # options are not used in this method 453 unless defined?(BDB) 454 raise RuntimeError, "Berkeley DB support not found" 455 end 456 DEBUG.print "makeing BDB DataBank...\n" 457 db = DataBank.new(name, MAGIC_BDB) 458 db.format = parser.format 459 db.fileids.add(*files) 460 db.fileids.recalc 461 462 db.primary = parser.primary.name 463 db.secondary = parser.secondary.names 464 465 DEBUG.print "writing config.dat, config, fileids ...\n" 466 db.write('wb', BDBdefault::flag_write) 467 468 DEBUG.print "reading files...\n" 469 470 addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)), 471 parser, options) 472 db.close 473 true 474 end
makeindexFlat(name, parser, options, *files)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 508 def self.makeindexFlat(name, parser, options, *files) 509 DEBUG.print "makeing flat/1 DataBank using temporary files...\n" 510 511 db = DataBank.new(name, nil) 512 db.format = parser.format 513 db.fileids.add(*files) 514 db.primary = parser.primary.name 515 db.secondary = parser.secondary.names 516 db.fileids.recalc 517 DEBUG.print "writing DabaBank...\n" 518 db.write('wb') 519 520 addindex_flat(db, :new, (0...(files.size)), parser, options) 521 db.close 522 true 523 end
update_index(name, parser, options, *files)
click to toggle source
# File lib/bio/io/flatfile/indexer.rb 621 def self.update_index(name, parser, options, *files) 622 db = DataBank.open(name) 623 624 if parser then 625 raise 'file format mismatch' if db.format != parser.format 626 else 627 628 begin 629 dbclass_orig = 630 Bio::FlatFile.autodetect_file(db.fileids[0].filename) 631 rescue TypeError, Errno::ENOENT 632 end 633 begin 634 dbclass_new = 635 Bio::FlatFile.autodetect_file(files[0]) 636 rescue TypeError, Errno::ENOENT 637 end 638 639 case db.format 640 when 'swiss', 'embl' 641 parser = Parser.new(db.format) 642 if dbclass_new and dbclass_new != parser.dbclass 643 raise 'file format mismatch' 644 end 645 when 'genbank' 646 dbclass = dbclass_orig or dbclass_new 647 if dbclass == Bio::GenBank or dbclass == Bio::GenPept 648 parser = Parser.new(dbclass_orig) 649 elsif !dbclass then 650 raise 'cannnot determine format. please specify manually.' 651 else 652 raise 'file format mismatch' 653 end 654 if dbclass_new and dbclass_new != parser.dbclass 655 raise 'file format mismatch' 656 end 657 else 658 raise 'unsupported format' 659 end 660 end 661 662 parser.set_primary_namespace(db.primary.name) 663 parser.add_secondary_namespaces(*db.secondary.names) 664 665 if options['renew'] then 666 newfiles = db.fileids.filenames.find_all do |x| 667 FileTest.exist?(x) 668 end 669 newfiles.concat(files) 670 newfiles2 = newfiles.sort 671 newfiles2.uniq! 672 newfiles3 = [] 673 newfiles.each do |x| 674 newfiles3 << x if newfiles2.delete(x) 675 end 676 t = db.index_type 677 db.close 678 case t 679 when MAGIC_BDB 680 Indexer::makeindexBDB(name, parser, options, *newfiles3) 681 when MAGIC_FLAT 682 Indexer::makeindexFlat(name, parser, options, *newfiles3) 683 else 684 raise 'Unsupported index type' 685 end 686 return true 687 end 688 689 need_update = [] 690 newfiles = files.dup 691 db.fileids.cache_all 692 db.fileids.each_with_index do |f, i| 693 need_update << i unless f.check 694 newfiles.delete(f.filename) 695 end 696 697 b = db.fileids.size 698 begin 699 db.fileids.recalc 700 rescue Errno::ENOENT => evar 701 DEBUG.print "Error: #{evar}\n" 702 DEBUG.print "assumed --renew option\n" 703 db.close 704 options = options.dup 705 options['renew'] = true 706 update_index(name, parser, options, *files) 707 return true 708 end 709 # add new files 710 db.fileids.add(*newfiles) 711 db.fileids.recalc 712 713 need_update.concat((b...(b + newfiles.size)).to_a) 714 715 DEBUG.print "writing DabaBank...\n" 716 db.write('wb', BDBdefault::flag_append) 717 718 case db.index_type 719 when MAGIC_BDB 720 addindex_bdb(db, BDBdefault::flag_append, 721 need_update, parser, options) 722 when MAGIC_FLAT 723 addindex_flat(db, :add, need_update, parser, options) 724 else 725 raise 'Unsupported index type' 726 end 727 728 db.close 729 true 730 end