inpre = false isutf = false for line in ARGF line.sub!(/\r?\n$/, '') line.gsub!(/&(amp|lt|gt|quot|apos);/) { |c| case c when '&' then '&' when '<' then '<' when '>' then '>' when '"' then '"' when ''' then "'" else raise end } case line when /charset=utf-8/i then isutf = true when /^
/i then
inpre = true
when /^<\/PRE>/i, /^$/ then
inpre = false
puts ''
when /([^<]+)<\/H\1>/i then
puts "= #{$2}"
else
if inpre
line.sub!(/^(
)?/, '')
line.sub!(/^ *
/, '')
line.sub!(/
()?$/, '')
line.sub!(/^<\/I>/, '')
line.gsub!(/ Reserved, /, " Reserved\n")
# latin1 to utf8
if not isutf
line.gsub!(/[\xA0-\xFE]/) { |c|
if 0xA0..0xBF === c[0] then
"\xC2" + c
else
"\xC3" + (c[0] & 0x7F).chr
end
}
end
puts line
end
end
end