module Bio::NCBIDB::Common
Description¶ ↑
This module defines a common framework among GenBank
, GenPept
, RefSeq
, and DDBJ
. For more details, see the documentations in each genbank/*.rb files.
References
¶ ↑
Constants
- DELIMITER
- TAGSIZE
Public Class Methods
# File lib/bio/db/genbank/common.rb 30 def initialize(entry) 31 super(entry, TAGSIZE) 32 end
Public Instance Methods
Returns the first part of the VERSION record as “ACCESSION.VERSION” String.
# File lib/bio/db/genbank/common.rb 57 def acc_version 58 versions.first.to_s 59 end
Returns the ACCESSION part of the acc_version.
# File lib/bio/db/genbank/common.rb 62 def accession 63 acc_version.split(/\./).first.to_s 64 end
ACCESSION – Returns contents of the ACCESSION record as an Array.
# File lib/bio/db/genbank/common.rb 46 def accessions 47 field_fetch('ACCESSION').strip.split(/\s+/) 48 end
COMMENT – Returns contents of the COMMENT record as a String.
# File lib/bio/db/genbank/common.rb 199 def comment 200 str = get('COMMENT').to_s.sub(/\ACOMMENT /, '') 201 str.gsub!(/^ {12}/, '') 202 str.chomp! 203 str 204 end
# File lib/bio/db/genbank/common.rb 120 def common_name 121 source['common_name'] 122 end
DEFINITION – Returns contents of the DEFINITION record as a String.
# File lib/bio/db/genbank/common.rb 40 def definition 41 field_fetch('DEFINITION') 42 end
FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature
objects.
# File lib/bio/db/genbank/common.rb 209 def features 210 unless @data['FEATURES'] 211 ary = [] 212 in_quote = false 213 get('FEATURES').each_line do |line| 214 next if line =~ /^FEATURES/ 215 216 # feature type (source, CDS, ...) 217 head = line[0,20].to_s.strip 218 219 # feature value (position or /qualifier=) 220 body = line[20,60].to_s.chomp 221 222 # sub-array [ feature type, position, /q="data", ... ] 223 if line =~ /^ {5}\S/ 224 ary.push([ head, body ]) 225 226 # feature qualifier start (/q="data..., /q="data...", /q=data, /q) 227 elsif body =~ /^ \// and not in_quote # gb:IRO125195 228 ary.last.push(body) 229 230 # flag for open quote (/q="data...) 231 if body =~ /="/ and body !~ /"$/ 232 in_quote = true 233 end 234 235 # feature qualifier continued (...data..., ...data...") 236 else 237 ary.last.last << body 238 239 # flag for closing quote (/q="data... lines ...") 240 if body =~ /"$/ 241 in_quote = false 242 end 243 end 244 end 245 246 ary.collect! do |subary| 247 parse_qualifiers(subary) 248 end 249 250 @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) 251 end 252 if block_given? 253 @data['FEATURES'].each do |f| 254 yield f 255 end 256 else 257 @data['FEATURES'] 258 end 259 end
Returns the second part of the VERSION record as a “GI:#######” String.
# File lib/bio/db/genbank/common.rb 72 def gi 73 versions.last 74 end
KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.
# File lib/bio/db/genbank/common.rb 84 def keywords 85 @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) 86 end
LOCUS – Locus class must be defined in child classes.
# File lib/bio/db/genbank/common.rb 35 def locus 36 # must be overrided in each subclass 37 end
NID – Returns contents of the NID record as a String.
# File lib/bio/db/genbank/common.rb 78 def nid 79 field_fetch('NID') 80 end
# File lib/bio/db/genbank/common.rb 125 def organism 126 source['organism'] 127 end
ORIGIN – Returns contents of the ORIGIN record as a String.
# File lib/bio/db/genbank/common.rb 263 def origin 264 unless @data['ORIGIN'] 265 ori, seqstr = get('ORIGIN').split("\n", 2) 266 seqstr ||= "" 267 @data['ORIGIN'] = truncate(tag_cut(ori)) 268 @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') 269 end 270 @data['ORIGIN'] 271 end
REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference
objects.
# File lib/bio/db/genbank/common.rb 136 def references 137 unless @data['REFERENCE'] 138 ary = [] 139 toptag2array(get('REFERENCE')).each do |ref| 140 hash = Hash.new 141 subtag2array(ref).each do |field| 142 case tag_get(field) 143 when /REFERENCE/ 144 if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then 145 hash['embl_gb_record_number'] = $1.to_i 146 if $3 and $3 != 'sites' then 147 seqpos = $3 148 seqpos.sub!(/\A\s*bases\s+/, '') 149 seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") 150 seqpos.gsub!(/\s*\;\s*/, ', ') 151 hash['sequence_position'] = seqpos 152 end 153 end 154 when /AUTHORS/ 155 authors = truncate(tag_cut(field)) 156 authors = authors.split(/, /) 157 authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1] 158 authors = authors.flatten.map { |a| a.sub(/,/, ', ') } 159 hash['authors'] = authors 160 when /TITLE/ 161 hash['title'] = truncate(tag_cut(field)) 162 # CHECK Actually GenBank is not demanding for dot at the end of TITLE 163 #+ '.' 164 when /JOURNAL/ 165 journal = truncate(tag_cut(field)) 166 if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ 167 hash['journal'] = $1 168 hash['volume'] = $2 169 hash['issue'] = $3 170 hash['pages'] = $4 171 hash['year'] = $5 172 else 173 hash['journal'] = journal 174 end 175 when /MEDLINE/ 176 hash['medline'] = truncate(tag_cut(field)) 177 when /PUBMED/ 178 hash['pubmed'] = truncate(tag_cut(field)) 179 when /REMARK/ 180 hash['comments'] ||= [] 181 hash['comments'].push truncate(tag_cut(field)) 182 end 183 end 184 ary.push(Reference.new(hash)) 185 end 186 @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) 187 end 188 if block_given? 189 @data['REFERENCE'].each do |r| 190 yield r 191 end 192 else 193 @data['REFERENCE'] 194 end 195 end
SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.
# File lib/bio/db/genbank/common.rb 90 def segment 91 @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/") 92 end
SOURCE – Returns contents of the SOURCE record as a Hash.
# File lib/bio/db/genbank/common.rb 96 def source 97 unless @data['SOURCE'] 98 name, org = get('SOURCE').split('ORGANISM') 99 org ||= "" 100 if org[/\S+;/] 101 organism = $` 102 taxonomy = $& + $' 103 elsif org[/\S+\./] # rs:NC_001741 104 organism = $` 105 taxonomy = $& + $' 106 else 107 organism = org 108 taxonomy = '' 109 end 110 @data['SOURCE'] = { 111 'common_name' => truncate(tag_cut(name)), 112 'organism' => truncate(organism), 113 'taxonomy' => truncate(taxonomy), 114 } 115 @data['SOURCE'].default = '' 116 end 117 @data['SOURCE'] 118 end
# File lib/bio/db/genbank/common.rb 129 def taxonomy 130 source['taxonomy'] 131 end
Returns the VERSION part of the acc_version
as a Fixnum
# File lib/bio/db/genbank/common.rb 67 def version 68 acc_version.split(/\./).last.to_i 69 end
VERSION – Returns contents of the VERSION record as an Array of Strings.
# File lib/bio/db/genbank/common.rb 52 def versions 53 @data['VERSION'] ||= fetch('VERSION').split(/\s+/) 54 end