#!/usr/bin/env ruby
# DocDiff: word/character-oriented text comparison utility
# Copyright (C) 2002-2011 Hisashi MORITA
# Requirements: Ruby (>= 2.0)
require 'docdiff'
require 'optparse'

# do_config_stuff

default_config = {
  :resolution    => "word",
  :encoding      => "auto",
  :eol           => "auto",
  :format        => "html",
  :cache         => true,
  :digest        => false,
  :verbose       => false
}

clo = command_line_options = {}

# if invoked as "worddiff" or "chardiff",
# appropriate resolution is set respectively.
case File.basename($0, ".*")
when "worddiff" then; clo[:resolution] = "word"
when "chardiff" then; clo[:resolution] = "char"
end

ARGV.options {|o|
  o.def_option('--resolution=RESOLUTION',
    possible_resolutions = ['line', 'word', 'char'],
    'specify resolution (granularity)',
    possible_resolutions.join('|') + ' (default is word)'
    ){|s| clo[:resolution] = (s || "word")}
  o.def_option('--line', 'set resolution to line'){clo[:resolution] = "line"}
  o.def_option('--word', 'set resolution to word'){clo[:resolution] = "word"}
  o.def_option('--char', 'set resolution to char'){clo[:resolution] = "char"}

  o.def_option('--encoding=ENCODING',
    possible_encodings = ['ASCII','EUC-JP','Shift_JIS','CP932','UTF-8','auto'],
    'specify character encoding',
    possible_encodings.join('|'), "(default is auto. try ASCII for single byte encodings such as ISO-8859-X)"
    ){|s| clo[:encoding] = (s || "auto")}
  o.def_option('--ascii', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"}
  o.def_option('--iso8859x', 'same as --encoding=ASCII'){clo[:encoding] = "ASCII"}
  o.def_option('--eucjp', 'same as --encoding=EUC-JP'){clo[:encoding] = "EUC-JP"}
  o.def_option('--sjis', 'same as --encoding=Shift_JIS'){clo[:encoding] = "Shift_JIS"}
  o.def_option('--cp932', 'same as --encoding=CP932'){clo[:encoding] = "CP932"}
  o.def_option('--utf8', 'same as --encoding=UTF-8'){clo[:encoding] = "UTF-8"}

  o.def_option('--eol=EOL',
    possible_eols = ['CR','LF','CRLF','auto'],
    'specify end-of-line character',
    possible_eols.join('|') + ' (default is auto)'
    ){|s| clo[:eol] = (s || "auto")}
  o.def_option('--cr', 'same as --eol=CR'){clo[:eol] = "CR"}
  o.def_option('--lf', 'same as --eol=LF'){clo[:eol] = "LF"}
  o.def_option('--crlf', 'same as --eol=CRLF'){clo[:eol] = "CRLF"}

  o.def_option('--format=FORMAT',
    possible_formats = ['tty','manued','html','wdiff','stat','user'],
    'specify output format',
    possible_formats.join('|'),
    "(default is html)",
    '(user tags can be defined in config file)'
    ){|s| clo[:format] = (s || "manued")}
  o.def_option('--tty', 'same as --format=tty'){clo[:format] = "tty"}
  o.def_option('--manued', 'same as --format=manued'){clo[:format] = "manued"}
  o.def_option('--html', 'same as --format=html'){clo[:format] = "html"}
  o.def_option('--wdiff', 'same as --format=wdiff'){clo[:format] = "wdiff"}
  o.def_option('--stat', 'same as --format=stat (not supported yet)'){clo[:format] = "stat"}

  o.def_option('--label LABEL', '-L LABEL',
    'Use label instead of filename (not supported yet)'
    ){|s1, s2| clo[:label1], clo[:label2] = s1, s2}

  o.def_option('--digest', 'digest output, do not show all'){clo[:digest] = true}
  o.def_option('--summary', 'same as --digest'){clo[:digest] = true}
  o.def_option('--display=DISPLAY',
    possible_types = ['inline', 'multi'],
    'specify presentation type (effective only with digest; experimental feature)',
    possible_types.join('|'),
    '(default is inline)'){|s| clo[:display] ||= s.downcase}
  o.def_option('--cache', 'use file cache (not supported yet)'){clo[:cache] = true}
  o.def_option('--no-config-file',
    'do not read config files'){clo[:no_config_file] = true}
  o.def_option('--verbose', 'run verbosely (not supported yet)'){clo[:verbose] = true}

  o.def_option('--help', 'show this message'){puts o; exit(0)}
  o.def_option('--version', 'show version'){puts DocDiff::AppVersion; exit(0)}
  o.def_option('--license', 'show license'){puts DocDiff::License; exit(0)}
  o.def_option('--author', 'show author(s)'){puts DocDiff::Author; exit(0)}

  o.on_tail("When invoked as worddiff or chardiff, resolution will be set accordingly.",
    "Config files: /etc/docdiff/docdiff.conf, ~/etc/docdiff/docdiff.conf")

  o.parse!
} or exit(1)

docdiff = DocDiff.new()
docdiff.config.update(default_config)
unless clo[:no_config_file] == true # process_commandline_option
  message = docdiff.process_config_file(DocDiff::SystemConfigFileName)
  if clo[:verbose] == true || docdiff.config[:verbose] == true
    STDERR.print message
  end
  #    message = docdiff.process_config_file(DocDiff::UserConfigFileName)
  case
  when File.exist?(DocDiff::UserConfigFileName) && File.exist?(DocDiff::AltUserConfigFileName)
    raise "#{DocDiff::UserConfigFileName} and #{DocDiff::AltUserConfigFileName} cannot be used at the same time.  Remove or rename either one."
  when File.exist?(DocDiff::UserConfigFileName)
    message = docdiff.process_config_file(DocDiff::UserConfigFileName)
  when File.exist?(DocDiff::AltUserConfigFileName)
    message = docdiff.process_config_file(DocDiff::AltUserConfigFileName)
  end
  if clo[:verbose] == true || docdiff.config[:verbose] == true
    STDERR.print message
  end
end
docdiff.config.update(clo)

# config stuff done

# process the documents

file1_content = nil
file2_content = nil
raise "Try `#{File.basename($0)} --help' for more information." if ARGV[0].nil?
raise "Specify at least 2 target files." unless ARGV[0] && ARGV[1]
raise "No such file: #{ARGV[0]}." unless FileTest.exist?(ARGV[0])
raise "No such file: #{ARGV[1]}." unless FileTest.exist?(ARGV[1])
raise "#{ARGV[0]} is not a file." unless FileTest.file?(ARGV[0])
raise "#{ARGV[1]} is not a file." unless FileTest.file?(ARGV[1])
File.open(ARGV[0], "r"){|f| file1_content = f.read}
File.open(ARGV[1], "r"){|f| file2_content = f.read}

doc1 = nil
doc2 = nil

encoding1 = docdiff.config[:encoding]
encoding2 = docdiff.config[:encoding]
eol1 = docdiff.config[:eol]
eol2 = docdiff.config[:eol]

if docdiff.config[:encoding] == "auto"
  encoding1 = DocDiff::CharString.guess_encoding(file1_content)
  encoding2 = DocDiff::CharString.guess_encoding(file2_content)
  case
  when (encoding1 == "UNKNOWN" or encoding2 == "UNKNOWN")
    raise "Document encoding unknown (#{encoding1}, #{encoding2})."
  when encoding1 != encoding2
    raise "Document encoding mismatch (#{encoding1}, #{encoding2})."
  end
end

if docdiff.config[:eol] == "auto"
  eol1 = DocDiff::CharString.guess_eol(file1_content)
  eol2 = DocDiff::CharString.guess_eol(file2_content)
  case
  when (eol1.nil? or eol2.nil?)
    raise "Document eol is nil (#{eol1.inspect}, #{eol2.inspect}).  The document might be empty."
  when (eol1 == 'UNKNOWN' or eol2 == 'UNKNOWN')
    raise "Document eol unknown (#{eol1.inspect}, #{eol2.inspect})."
  when (eol1 != eol2)
    raise "Document eol mismatch (#{eol1}, #{eol2})."
  end
end

doc1 = DocDiff::Document.new(file1_content, encoding1, eol1)
doc2 = DocDiff::Document.new(file2_content, encoding2, eol2)

output = docdiff.run(doc1, doc2,
  {:resolution => docdiff.config[:resolution],
    :format     => docdiff.config[:format],
    :digest     => docdiff.config[:digest],
    :display    => docdiff.config[:display]})
print output
