#!/usr/bin/env ruby2.1
# -*- coding: utf-8 -*-
require 'kramdown-rfc2629'
require 'yaml'
require 'erb'
require 'date'

Encoding.default_external = "UTF-8" # wake up, smell the coffee

RE_NL = /(?:\n|\r|\r\n)/
RE_SECTION = /---(?:\s+(\w+)(-?))?\s*#{RE_NL}(.*?#{RE_NL})(?=---(?:\s+\w+-?)?\s*#{RE_NL}|\Z)/m

NMDTAGS = ["{:/nomarkdown}\n\n", "\n\n{::nomarkdown}\n"]

NORMINFORM = { "!" => :normative, "?" => :informative }

def xml_from_sections(input)
  sections = input.scan(RE_SECTION)
  # resulting in an array; each section is [section-label, nomarkdown-flag, section-text]

  # the first section is a YAML with front matter parameters (don't put a label here)
  # We put back the "---" plus gratuitous blank lines to hack the line number in errors
  yaml_in = input[/---\s*/] << sections.shift[2]
  ps = ParameterSet.new(YAML.load(yaml_in))
  coding_override = (ps.has(:coding) =~ /ascii/i) ? :symbolic : :as_char

  # all the other sections are put in a Hash, possibly concatenated from parts there
  sechash = Hash.new{ |h,k| h[k] = ""}
  snames = []                   # a stack of section names
  sections.each do |sname, nmdflag, text|
    nmdin, nmdout = {
      "-" => ["", ""],          # stay in nomarkdown
      "" => NMDTAGS, # pop out temporarily
    }[nmdflag || ""]
    if sname
      snames << sname           # "--- label" -> push label (now current)
    else
      snames.pop                # just "---" -> pop label (previous now current)
    end
    sechash[snames.last] << "#{nmdin}#{text}#{nmdout}"
  end

  ref_replacements = { }

  [:ref, :normative, :informative].each do |sn|
    if refs = ps.has(sn)
      warn "*** bad section #{sn}: #{refs.inspect}" unless refs.respond_to? :each
      refs.each do |k, v|
        if v.respond_to? :to_str
          ref_replacements[v.to_str] = k
        end
        if v.respond_to? :[]
          if aliasname = v.delete("-")
            ref_replacements[aliasname] = k
          end
        end
      end
    end
  end
  open_refs = ps[:ref] || { }       # consumed

  norm_ref = { }

  # convenience replacement of {{-coap}} with {{I-D.ietf-core-coap}}
  # collect normative/informative tagging {{!RFC2119}} {{?RFC4711}}
  sechash.each do |k, v|
    next if k == "fluff"
    v.gsub!(/{{(?:([?!])(-)?|(-))([\w.\-]+)}}/) do |match|
      norminform = $1
      replacing = $2 || $3
      word = $4
      if replacing
        if new = ref_replacements[word]
          word = new
        else
          warn "*** no alias replacement for {{-#{word}}}" 
          word = "-#{word}"
        end
      end
      # things can be normative in one place and informative in another -> normative
      # collect norm/inform above and assign it by priority here
      if norminform
        norm_ref[word] ||= norminform == '!' # one normative ref is enough
      end
      "{{#{word}}}"
    end
  end

  [:normative, :informative].each do |k|
    ps.rest[k.to_s] ||= { }
  end

  norm_ref.each do |k, v|
    # could check bibtagsys here: needed if open_refs is nil or string
    target = ps.has(v ? :normative : :informative)
    warn "*** overwriting #{k}" if target.has_key?(k)
    target[k] = open_refs[k] # add reference to normative/informative
  end
  # note that unused items from ref are considered OK, therefore no check for that here

  # also should allow norm/inform check of other references
  # {{?coap}} vs. {{!coap}} vs. {{-coap}} (undecided)
  # or {{?-coap}} vs. {{!-coap}} vs. {{-coap}} (undecided)
  # could require all references to be decided by a global flag
  overlap = [:normative, :informative].map { |s| (ps.has(s) || { }).keys }.reduce(:&)
  unless overlap.empty?
    warn "*** #{overlap.join(', ')}: both normative and informative"
  end

  stand_alone = ps[:stand_alone]
  link_defs = {}

  [:normative, :informative].each do |sn|
    if refs = ps[sn]
      refs.each do |k, v|
        href = k.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an IDREF with a number
        link_defs[k] = ["##{href}", nil]            # allow [RFC2119] in addition to {{RFC2119}}
        if bts = bibtagsys(k)
          if v
            warn "*** redundant in #{k}: #{v.inspect}" unless v.respond_to? :to_str
          end
          if stand_alone
            sechash[sn.to_s] << %{\n#{NMDTAGS[0]}\n![:include:](#{bts[0]})\n#{NMDTAGS[1]}\n}
          else
            (ps.rest["bibxml"] ||= []) << k
            sechash[sn.to_s] << %{&#{bts[0]};\n}
          end
        else
          unless v && Hash === v
            warn "*** don't know how to expand ref #{k}" 
            next
          end
          vps = ParameterSet.new(v)
          erb = ERB.new <<-REFERB, nil, '-'
<reference anchor="<%= k %>" <%= vps.attr("target") %>>
  <front>
    <%= vps.ele("title") -%>

<% vps.arr("author", true, true) do |au|
   aups = authorps_from_hash(au)
 -%>
    <author <%=aups.attrs("initials", "surname", "fullname=name", "role")%>>
      <%= aups.ele("organization=org", aups.attr("abbrev=orgabbrev"), "") %>
    </author>
<%   aups.warn_if_leftovers  -%>
<% end -%>
    <date <%= dateattrs(vps[:date]) %>/>
  </front>
<% vps.arr("seriesinfo", false) do |k, v| -%>
  <seriesInfo name="<%=k%>" value="<%=v%>"/>
<% end -%>
<% vps.arr("format", false) do |k, v| -%>
  <format type="<%=k%>" target="<%=v%>"/>
<% end -%>
<%= vps.ele("annotation=ann") -%>
</reference>
          REFERB
          sechash[sn.to_s] << erb.result(binding)
          vps.warn_if_leftovers
        end
      end
    end
  end

  erbfilename = File.expand_path '../../data/kramdown-rfc2629.erb', __FILE__
  erbfile = File.read(erbfilename, coding: "UTF-8")
  erb = ERB.new(erbfile, nil, '-')
  # remove redundant nomarkdown pop outs/pop ins as they confuse kramdown
  input = erb.result(binding).gsub(%r"{::nomarkdown}\s*{:/nomarkdown}"m, "")
  ps.warn_if_leftovers
  sechash.delete("fluff")       # fluff is a "commented out" section
  if !sechash.empty?            # any sections unused by the ERb file?
    warn "*** sections left #{sechash.keys.inspect}!"
  end

  [input, coding_override, link_defs]
end

class ParameterSet
  include Kramdown::Utils::Html

  attr_reader :f
  def initialize(y)
    raise "*** invalid parameter set #{y.inspect}" unless Hash === y
    @f = y
  end
  def [](pn)
    @f.delete(pn.to_s)
  end
  def has(pn)
    @f[pn.to_s]
  end
  def van(pn)                   # pn is a parameter name, possibly with an =alias
    an, pn = pn.to_s.split("=")
    pn ||= an
    [self[pn] || self[an], an]
  end
  def attr(pn)
    val, an = van(pn)
    %{#{an}="#{val}"}    if val
  end
  def attrs(*pns)
    pns.map{ |pn| attr(pn) }.compact.join(" ")
  end
  def ele(pn, attr=nil, defcontent=nil)
    val, an = van(pn)
    val ||= defcontent
    Array(val).map do |val1|
      %{<#{[an, *Array(attr).map(&:to_s)].join(" ").strip}>#{escape_html(val1.to_s.strip)}</#{an}>}
    end.join(" ")
  end
  def arr(an, converthash=true, must_have_one=false, &block)
    arr = self[an] || []
    arr = [arr] if Hash === arr && converthash
    arr << { } if must_have_one && arr.empty?
    Array(arr).each(&block)
  end
  def rest
    @f
  end
  def warn_if_leftovers
    if !@f.empty?
      warn "*** attributes left #{@f.inspect}!"
    end
  end
end

XML_RESOURCE_ORG_PREFIX = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_PREFIX

def bibtagsys(bib)
  if bib =~ /\Arfc(\d+)/i
    rfc4d = "%04d" % $1.to_i
    [bib.upcase,
     "#{XML_RESOURCE_ORG_PREFIX}/bibxml/reference.RFC.#{rfc4d}.xml"]
  elsif bib =~ /\A([-A-Z0-9]+)\./ &&
        dir = Kramdown::Converter::Rfc2629::XML_RESOURCE_ORG_MAP[$1]
    bib1 = bib.gsub(/\A[0-9]/) { "_#{$&}" } # can't start an ID with a number
    [bib1,
     "#{XML_RESOURCE_ORG_PREFIX}/#{dir}/reference.#{bib}.xml"]
  end
end

def authorps_from_hash(au)
  aups = ParameterSet.new(au)
  if ins = aups[:ins]
    parts = ins.split('.').map(&:strip)
    aups.rest["initials"] = parts[0..-2].join('.') << '.'
    aups.rest["surname"] = parts[-1]
  end
  # hack ("heuristic for") initials and surname from name
  # -- only works for people with exactly one last name and uncomplicated first names
  if n = aups.rest["name"]
    n = n.split
    aups.rest["initials"] ||= n[0..-2].map(&:chr).join('.') << '.'
    aups.rest["surname"] ||= n[-1]
  end
  aups
end

def dateattrs(date)
  begin
    case date
    when Integer
      %{year="#{"%04d" % date}"}
    when String
      Date.parse("#{date}-01").strftime(%{year="%Y" month="%B"})
    when Date
      date.strftime(%{year="%Y" month="%B" day="%d"})
    when Array                  # this allows to explicitly give a string
      %{year="#{date.join(" ")}"}
    when nil
      %{year="n.d."}
    end

  rescue ArgumentError
    warn "*** Invalid date: #{date} -- use 2012, 2012-07, or 2012-07-28"
  end
end

coding_override = :as_char
input = ARGF.read
if input[0] == "\uFEFF"
   warn "*** There is a leading byte order mark. Ignored."
   input[0..0] = ''
end
input.gsub!(/^\{::include\s+(.*?)\}/) {
  File.read($1).chomp
} unless ENV["KRAMDOWN_SAFE"]
link_defs = {}
if input =~ /\A---/        # this is a sectionized file
  input, coding_override, link_defs = xml_from_sections(input)
end
if input =~ /\A<\?xml/          # if this is a whole XML file, protect it
  input = "{::nomarkdown}\n#{input}\n{:/nomarkdown}\n"
end
options = {input: 'RFC2629Kramdown', entity_output: coding_override, link_defs: link_defs}
case coding_override
when :symbolic
  options[:smart_quotes] = ["'".ord, "'".ord, '"'.ord, '"'.ord]
end
# warn "options: #{options.inspect}"
doc = Kramdown::Document.new(input, options)
$stderr.puts doc.warnings.to_yaml unless doc.warnings.empty?
puts doc.to_rfc2629
