require 'net/http' require 'time' require 'uri' class App def initialize argv @tsv = argv.first || 'pages.tsv' @srv = 'www.wmo.int' @port = 80 @path = '/pages/prog/www/WIS/wiswiki/tiki-index.php' @tasks = [] end def link title, page return '' if '-' == page ['"', (title || page).gsub(/"/, 'Q'), '":', URI::HTTP.build([nil, @srv, @port, @path, "page=#{page}", nil]).to_s ].join end def loadtab File.open(@tsv, 'r') {|fp| for line in fp lev, page, desc = line.chomp.split(/\t/, 3) lev = lev.to_i next if lev.zero? @tasks.push({:lev => lev, :page => page, :desc => desc}) end } end def checkpage http, page path = [@path, '?page=', page].join resp = http.get(path) unless /\A2/ === resp.code return({:code => resp.code}) end title = if /([^<]*)<\/title/ === resp.body then $1.strip.sub(/ : WIS WIKI/, '') else "" end if title == page and /<div class="titlebar">([^<]*)<\/div/ === resp.body title = $1.strip end if title == 'This page has not yet been developed' title = "(#{page})" end title = "#{title[0,32]}..." if title.size > 32 update = if /<p class="editdate">\s*Page last modified on ([^<]*)<\/p/ === resp.body then Time.parse($1.strip.sub(/ of/, '').sub(/CEST/, '-0100')) else nil end return({:code => resp.code, :title => title, :update => update, :page => page, :size => resp.body.size}) end def checkweb Net::HTTP.start(@srv, @port) {|http| for task in @tasks if '-' == task[:page] task[:title] = task[:desc] task[:desc] = nil else STDERR.puts "GET #{task[:page]}" if STDERR.tty? r = checkpage(http, task[:page]) task[:title] = r[:title] task[:update] = r[:update].strftime('%Y-%m-%d') if r[:update] task[:update] = 'stub' if /\A2013-07/ === task[:update] r = checkpage(http, '%2A' + task[:page]) task[:draft] = r[:title] ? true : false end report(task) end } end def report task puts ['', [('~ ' * task[:lev]), task[:title]].join, link(task[:update], task[:page]), task[:draft] ? link('draft', '%2A' + task[:page]) : ' ', task[:desc], ''].join('|') end def run loadtab checkweb end end App.new(ARGV).run