module Bio::NCBIDB::Common

Description

This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.

References

Constants

DELIMITER
TAGSIZE

Public Class Methods

new(entry) click to toggle source
Calls superclass method
   # File lib/bio/db/genbank/common.rb
30 def initialize(entry)
31   super(entry, TAGSIZE)
32 end

Public Instance Methods

acc_version() click to toggle source

Returns the first part of the VERSION record as “ACCESSION.VERSION” String.

   # File lib/bio/db/genbank/common.rb
57 def acc_version
58   versions.first.to_s
59 end
accession() click to toggle source

Returns the ACCESSION part of the acc_version.

   # File lib/bio/db/genbank/common.rb
62 def accession
63   acc_version.split(/\./).first.to_s
64 end
accessions() click to toggle source

ACCESSION – Returns contents of the ACCESSION record as an Array.

   # File lib/bio/db/genbank/common.rb
46 def accessions
47   field_fetch('ACCESSION').strip.split(/\s+/)
48 end
comment() click to toggle source

COMMENT – Returns contents of the COMMENT record as a String.

    # File lib/bio/db/genbank/common.rb
199 def comment
200   str = get('COMMENT').to_s.sub(/\ACOMMENT     /, '')
201   str.gsub!(/^ {12}/, '')
202   str.chomp!
203   str
204 end
common_name() click to toggle source
    # File lib/bio/db/genbank/common.rb
120 def common_name
121   source['common_name']
122 end
Also aliased as: vernacular_name
definition() click to toggle source

DEFINITION – Returns contents of the DEFINITION record as a String.

   # File lib/bio/db/genbank/common.rb
40 def definition
41   field_fetch('DEFINITION')
42 end
features() { |f| ... } click to toggle source

FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature objects.

    # File lib/bio/db/genbank/common.rb
209 def features
210   unless @data['FEATURES']
211     ary = []
212     in_quote = false
213     get('FEATURES').each_line do |line|
214       next if line =~ /^FEATURES/
215 
216       # feature type  (source, CDS, ...)
217       head = line[0,20].to_s.strip
218 
219       # feature value (position or /qualifier=)
220       body = line[20,60].to_s.chomp
221 
222       # sub-array [ feature type, position, /q="data", ... ]
223       if line =~ /^ {5}\S/
224         ary.push([ head, body ])
225 
226       # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
227       elsif body =~ /^ \// and not in_quote           # gb:IRO125195
228         ary.last.push(body)
229         
230         # flag for open quote (/q="data...)
231         if body =~ /="/ and body !~ /"$/
232           in_quote = true
233         end
234 
235       # feature qualifier continued (...data..., ...data...")
236       else
237         ary.last.last << body
238 
239         # flag for closing quote (/q="data... lines  ...")
240         if body =~ /"$/
241           in_quote = false
242         end
243       end
244     end
245 
246     ary.collect! do |subary|
247       parse_qualifiers(subary)
248     end
249 
250     @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
251   end
252   if block_given?
253     @data['FEATURES'].each do |f|
254       yield f
255     end
256   else
257     @data['FEATURES']
258   end
259 end
gi() click to toggle source

Returns the second part of the VERSION record as a “GI:#######” String.

   # File lib/bio/db/genbank/common.rb
72 def gi
73   versions.last
74 end
keywords() click to toggle source

KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.

   # File lib/bio/db/genbank/common.rb
84 def keywords
85   @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
86 end
locus() click to toggle source

LOCUS – Locus class must be defined in child classes.

   # File lib/bio/db/genbank/common.rb
35 def locus
36   # must be overrided in each subclass
37 end
nid() click to toggle source

NID – Returns contents of the NID record as a String.

   # File lib/bio/db/genbank/common.rb
78 def nid
79   field_fetch('NID')
80 end
organism() click to toggle source
    # File lib/bio/db/genbank/common.rb
125 def organism
126   source['organism']
127 end
origin() click to toggle source

ORIGIN – Returns contents of the ORIGIN record as a String.

    # File lib/bio/db/genbank/common.rb
263 def origin
264   unless @data['ORIGIN']
265     ori, seqstr = get('ORIGIN').split("\n", 2)
266     seqstr ||= ""
267     @data['ORIGIN'] = truncate(tag_cut(ori))
268     @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
269   end
270   @data['ORIGIN']
271 end
references() { |r| ... } click to toggle source

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.

    # File lib/bio/db/genbank/common.rb
136 def references
137   unless @data['REFERENCE']
138     ary = []
139     toptag2array(get('REFERENCE')).each do |ref|
140       hash = Hash.new
141       subtag2array(ref).each do |field|
142         case tag_get(field)
143         when /REFERENCE/
144           if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
145             hash['embl_gb_record_number'] = $1.to_i
146             if $3 and $3 != 'sites' then
147               seqpos = $3
148               seqpos.sub!(/\A\s*bases\s+/, '')
149               seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
150               seqpos.gsub!(/\s*\;\s*/, ', ')
151               hash['sequence_position'] = seqpos
152             end
153           end
154         when /AUTHORS/
155           authors = truncate(tag_cut(field))
156           authors = authors.split(/, /)
157           authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
158           authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
159           hash['authors']     = authors
160         when /TITLE/
161           hash['title']       = truncate(tag_cut(field))
162           # CHECK Actually GenBank is not demanding for dot at the end of TITLE
163           #+ '.'
164         when /JOURNAL/
165           journal = truncate(tag_cut(field))
166           if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
167       hash['journal']  = $1
168       hash['volume']   = $2
169       hash['issue']    = $3
170       hash['pages']    = $4
171       hash['year']     = $5
172           else
173       hash['journal'] = journal
174           end
175         when /MEDLINE/
176           hash['medline']     = truncate(tag_cut(field))
177         when /PUBMED/
178           hash['pubmed']      = truncate(tag_cut(field))
179         when /REMARK/
180           hash['comments'] ||= []
181           hash['comments'].push truncate(tag_cut(field))
182         end
183       end
184       ary.push(Reference.new(hash))
185     end
186     @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
187   end
188   if block_given?
189     @data['REFERENCE'].each do |r|
190       yield r
191     end
192   else
193     @data['REFERENCE']
194   end
195 end
segment() click to toggle source

SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.

   # File lib/bio/db/genbank/common.rb
90 def segment
91   @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
92 end
source() click to toggle source

SOURCE – Returns contents of the SOURCE record as a Hash.

    # File lib/bio/db/genbank/common.rb
 96 def source
 97   unless @data['SOURCE']
 98     name, org = get('SOURCE').split('ORGANISM')
 99     org ||= ""
100     if org[/\S+;/]
101       organism = $`
102       taxonomy = $& + $'
103     elsif org[/\S+\./]                                # rs:NC_001741
104       organism = $`
105       taxonomy = $& + $'
106     else
107       organism = org
108       taxonomy = ''
109     end
110     @data['SOURCE'] = {
111       'common_name'   => truncate(tag_cut(name)),
112       'organism'      => truncate(organism),
113       'taxonomy'      => truncate(taxonomy),
114     }
115     @data['SOURCE'].default = ''
116   end
117   @data['SOURCE']
118 end
taxonomy() click to toggle source
    # File lib/bio/db/genbank/common.rb
129 def taxonomy
130   source['taxonomy']
131 end
vernacular_name()
Alias for: common_name
version() click to toggle source

Returns the VERSION part of the acc_version as a Fixnum

   # File lib/bio/db/genbank/common.rb
67 def version
68   acc_version.split(/\./).last.to_i
69 end
versions() click to toggle source

VERSION – Returns contents of the VERSION record as an Array of Strings.

   # File lib/bio/db/genbank/common.rb
52 def versions
53   @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
54 end