require 'open3' require 'readahl' class RegBuilder def initialize @t2tab = {} @t1tab = {} @a1tab = {} @cctab = {} @bucket = [] @s = nil @area = nil end def learn str t1 = str[0] t2 = str[1] a1 = str[2] a2 = str[3] ii = str[4,2] cc = str[-4..-1] @t2tab[t2] = {} unless @t2tab[t2] @t2tab[t2][ii] = true @t1tab[t1] = {} unless @t1tab[t1] @t1tab[t1][a2] = true @a1tab[a1] = true @cctab[cc] = true @bucket.push Regexp.quote(str) end def sa2re sa if sa.size > 1 then ra = sa.map{|s| Regexp.quote(s)} if (va = ra.grep(/^\d5$/)).size > 2 ra = ra - va + [ia2re(va.map{|ii| ii[0]}) + '5'] end if (va = ra.grep(/^\d0$/)).size > 2 ra = ra - va + [ia2re(va.map{|ii| ii[0]}) + '0'] end if (va = ra.grep(/^0\d$/)).size > 2 ra = ra - va + ['0' + ia2re(va.map{|ii| ii[1]})] end if (va = ra.grep(/^9\d$/)).size > 2 ra = ra - va + ['9' + ia2re(va.map{|ii| ii[1]})] end '(' + ra.join('|') + ')' else Regexp.quote(sa.join) end end def ia2re ia if ia.size > 1 then a = [] a.push ']' if ia.include? ?] a.push '-' if ia.include? ?- sa = (ia - [?], ?-]).sort seq = nil while i = sa.shift a.push Regexp.quote(i.to_i.chr) if sa.first == i.succ iorg = i while sa.first == i.succ i = sa.shift end a.push '-' if (i - iorg) > 1 a.push Regexp.quote(i.to_i.chr) end end if a.size > 1 then '[' + a.join + ']' else a.join end else Regexp.quote(ia.first.to_i.chr) end end def compile return if @s @area = @a1tab.keys.sort.map{|i| i.chr.sub(/[\W]/,'.')}.join pin, pout, perr = Open3.popen3("/usr/bin/perl re-assemble.pl") @bucket.each {|res| pin.puts res} pin.close @s = pout.read.chomp unless @s.empty? @s = @s.sub(/^\(\?-xism:/,'').sub(/\)$/,'').gsub(/\(\?:/, '(').\ gsub(/\[[A-Z0-9]+\]/){|s| ia2re(s[1..-2].unpack('C*')) } return end # failsafe iitab = {} for t2 in @t2tab.keys.sort iis = sa2re(@t2tab[t2].keys.sort) iitab[iis] = [] unless iitab[iis] iitab[iis].push t2 end a = [] for t1 in @t1tab.keys.sort b = [] for iis in iitab.keys.sort c = [] c.push ia2re(iitab[iis].dup) c.push ia2re(@a1tab.keys) c.push ia2re(@t1tab[t1].keys) c.push iis b.push c.join end if b.size > 1 bs = '(' + b.join('|') + ')' else bs = b.join end a.push(t1.chr + bs) end if a.size > 1 @s = '(' + a.join('|') + ')' else @s = a.join end end def to_s compile @s end def area compile @area end end def areahack str a = str.split(/\s+/, 4).map{|s| r = Float(s) r -= 360.0 if r > 180.0 r } s = a[0,2].min n = a[0,2].max w = a[2,2].min e = a[2,2].max w, e = e, w if (e - w) > 180 or (e - w) < 10 vals = [s, n, w, e].join(' ') desc = [] if ((-10)..0) === (e - w) desc.push 'all-longitude' else desc.push [w, e].map{|lo| (lo < 0 ? '%gW' : '%gE') % lo.abs }.join('-') end case [s, n] when [-90, 0] then desc[desc.size-1] = 'whole' if desc.last == 'all-longitude' desc.push 'SH' when [0, 90] then desc[desc.size-1] = 'whole' if desc.last == 'all-longitude' desc.push 'NH' when [-90, 90] then if desc.last == 'all-longitude' desc.pop; desc.push 'whole globe' else desc.push '90S-90N' end else desc.push [s, n].map{|la| (la < 0 ? '%gS' : la > 0 ? '%gN' : 'EQ') % la.abs }.join('-') end [vals, desc.join(' ')] end ds = {} as = {} for line in ARGF ttaaii, cccc, model, grid, resolution, area, level, updtime, access \ = line.chomp.split(/\t/) ahl = [ttaaii, cccc].join props = [ model, resolution, area ].join("\t") ds[props] = {} unless ds.include?(props) ds[props][ahl] = true unless as.include?(props) as[props] = { :lev => {}, :upd => {}, :acc => {}, :el => {}, :ft => {} } end ah = as[props] ah[:lev][level] = true ah[:upd][updtime.sub(/ \[h\]$/, '')] = true ah[:acc][access] = true ah[:el][AHL.element(ttaaii, cccc)] = true ah[:ft][AHL.fcsttime(ttaaii, cccc)] = true end for props in ds.keys.sort rb = RegBuilder.new for ahl in ds[props].keys rb.learn ahl end model, resol, area = props.split(/\t/) fres = resol.split(/\s+/).map{|s| Float(s)}.max xprops = [ model, fres, areahack(area), resol, area, as[props][:lev].keys.sort.join('|'), as[props][:upd].keys.sort.join('|'), as[props][:acc].keys.sort.join('|'), as[props][:el].keys.sort.join('|'), as[props][:ft].keys.sort.join('|') ].flatten name = model.sub(/One-week/, 'Week').gsub(/[- a-z]/, '') \ + resol.sub(/ .*/, '').sub(/\./, '') + rb.area puts [name, ds[props].size, rb.to_s, xprops].join("\t") end