#!/usr/bin/env ruby
# 
# rfc2822validator
#
# $Id: rfc2822validator 12 2006-09-23 02:22:22Z tommy $
# 
# Copyright (c) 2006 TOMITA Masahiro
# 

$LOAD_PATH << File.dirname(File.expand_path($0))

require "strscan"
require "rfc2822parser"

module RFC2822

  class ParseError < StandardError
    def inspect()
      "#<#{self.class}: #{message.inspect}>"
    end
  end

  HEADER_TYPE = {
    "date"              => :DATE_TIME,
    "from"              => :MAILBOX_LIST,
    "sender"            => :MAILBOX,
    "reply-to"          => :ADDRESS_LIST,
    "to"                => :ADDRESS_LIST,
    "cc"                => :ADDRESS_LIST,
    "bcc"               => :ADDRESS_LIST_BCC,
    "message-id"        => :MSG_ID,
    "in-reply-to"       => :MSG_ID_LIST,
    "references"        => :MSG_ID_LIST,
    "subject"           => :UNSTRUCTURED,
    "comments"          => :UNSTRUCTURED,
    "keywords"          => :PHRASE_LIST,
    "resent-date"       => :DATE_TIME,
    "resent-from"       => :MAILBOX_LIST,
    "resent-sender"     => :MAILBOX,
    "resent-to"         => :ADDRESS_LIST,
    "resent-cc"         => :ADDRESS_LIST,
    "resent-bcc"        => :ADDRESS_LIST_BCC,
    "resent-message-id" => :MSG_ID,
    "return-path"       => :RETURN_PATH,
    "received"          => :RECEIVED,
  }
  
  module_function
  def parse(name, value)
    htype = HEADER_TYPE[name.downcase] || :UNSTRUCTURED
    parser = Parser.new
    parser.parse(htype, value)
  end

  class Parser
    def parse(header_type, value)
      @header_type = header_type
      @value = value
      @scanner = RFC2822::Scanner.new(@header_type, value)
      ret = yyparse(self, :parse_sub)
      @comments = @scanner.comments
      ret
    end

    def parse_sub(&block)
      yield @header_type, @value
      @scanner.scan(&block)
    end

    def on_error(t, val, vstack)
      raise ParseError, val
    end
  end

  class Scanner
    TEXT_RE = '\x01-\x09\x0b\x0c\x0e-\x7f'
    QTEXT_RE = '\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f'
    ATEXT_RE = 'A-Za-z0-9\!\#\$\%\&\'\*\+\\-\/\=\?\^\_\`\{\|\}\~'
    CTEXT_RE = '\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x27\x2a-\x5b\x5d-\x7f'
    UTEXT_RE = '\x01-\x7f'
    DTEXT_RE = '\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x5e-\x7e'

    def initialize(header_type, str)
      @header_type = header_type
      @comments = []
      @ss = StringScanner.new(str)
      @ws_mode = false
      @datetime_mode = false
    end

    attr_reader :comments
    attr_accessor :ws_mode, :datetime_mode

    def scan(&block)
      case @header_type
      when :UNSTRUCTURED
        scan_unstructured(&block)
      else
        scan_structured(&block)
      end
    end

    def scan_unstructured()
      if @ss.eos? then
        yield :UNSTRUCTURED, ""
        yield nil
        return
      end
      until @ss.eos?
        if s = @ss.scan(/(\s*|[#{UTEXT_RE}])*\s*/no) then
          yield :UNSTRUCTURED, s
        end
      end 
      yield nil
    end

    def scan_structured()
      until @ss.eos?
        if @datetime_mode and s = @ss.scan(/\d\d:\d\d(:\d\d)?(?=\s)/)
          yield :TIME_OF_DAY, s
        end
        case
        when s = @ss.scan(/\s*\(/nmo)
          s << cfws(@ss)
          yield :CFWS, s if @ws_mode
          next
        when s = @ss.scan(/\s+/nmo)
          yield :FWS, s if @ws_mode
          next
        when s = @ss.scan(/\"(\\[#{TEXT_RE}]|[#{QTEXT_RE}])*\"/no)
          yield :NO_FOLD_QUOTE, s
        when s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/nmo)
          yield :QUOTED_STRING, s
        when s = @ss.scan(/\[(\\[#{TEXT_RE}]|[#{DTEXT_RE}])*\]/no)
          yield :NO_FOLD_LITERAL, s
        when s = @ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/nmo)
          yield :DOMAIN_LITERAL, s
        when s = @ss.scan(/[#{ATEXT_RE}]+/no)
          if @datetime_mode and s =~ /\A\d+\z/ then
            yield :DIGIT, s
          else
            yield :ATOM, s
          end
        when s = @ss.scan(/./no)
          yield s, s
        end
      end
      yield nil
    end

    def cfws(ss)
      comments = []
      while true
        c = cfws_sub(ss)
        ss.skip(/\s+/nmo)
        comments << "(#{c})"
        break unless @ss.scan(/\(/no)
      end
      @comments.concat comments
      return comments.join
    end

    # コメント部の処理
    # return: コメント部の文字列
    def cfws_sub(ss)
      ret = ""
      until ss.eos? do
        if ss.scan(/(\s*(\\[#{TEXT_RE}]|[#{CTEXT_RE}]))*\s*/nmo) then
          ret << ss.matched
        end
        if ss.scan(/\)/no) then      # 「)」が来たら復帰
          return ret
        elsif ss.scan(/\(/no) then      # 「(」が来たら再帰
          c = cfws_sub(ss)
          break if c.nil?
          ret << "(" << c << ")"
        else
          raise RFC2822::ParseError, ss.rest
        end
      end
      # 「)」がなかったら例外
      raise RFC2822::ParseError, ss.rest
    end
  end

end

if __FILE__ == $0 then
  if ARGV.empty? then
    STDERR.puts "Usage: rfc2822validator files..."
    exit 1
  end

  ARGV.each do |fname|
    File.open(fname) do |f|
      header = []
      f.each do |line|
        break if line.chomp.empty?
        if line =~ /^\s/ then
          header[-1] << line
        else
          header << line
        end
      end
      header.each do |h|
        begin
          RFC2822.parse(*h.split(/\s*:\s*/, 2))
        rescue RFC2822::ParseError => e
          puts fname
          p e
          p h
        end
      end
    end
  end
end
