#! /bin/sh
#
# make-afmtodit-tables -- script for creating the 'unicode_decomposed'
#                         and 'AGL_to_unicode' tables
#
# Copyright (C) 2005-2020 Free Software Foundation, Inc.
#      Written by Werner Lemberg <wl@gnu.org>
#
# This file is part of groff.
#
# groff is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# groff is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

#
# usage:
#
#   make-afmtodit-tables \
#     UnicodeData.txt version-string glyphlist.txt > afmtodit.in
#
# 'UnicodeData.txt' is the central database file from the Unicode
# standard.  Unfortunately, it doesn't contain a version number, which
# must be thus provided manually as an additional parameter.
#
# 'glyphlist.txt' holds the Adobe Glyph List (AGL).
#
# This program needs a C preprocessor.
#

if [ $# -ne 3 ]
then
  echo "usage: $0 UnicodeData.txt UNICODE-VERSION-STRING" \
    "glyphlist.txt > afmtodit.tables"
  exit 2
fi

unicode_data="$1"
unicode_version="$2"
glyph_list="$3"

for f in "$1" "$3"
do
  if ! [ -r "$f" ]
  then
    echo "$0: '$f' does not exist or is not readable" >&2
    exit 1
  fi
done

# Handle UnicodeData.txt.
#
# Remove ranges and control characters,
# then extract the decomposition field,
# then remove lines without decomposition,
# then remove all compatibility decompositions.
cat "$1" \
| sed -e '/^[^;]*;</d' \
| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
| sed -e '/^[^;]*;$/d' \
| sed -e '/^[^;]*;</d' > $$1

# Prepare input for running cpp.
cat $$1 \
| sed -e 's/^\([^;]*\);/#define \1 /' \
      -e 's/ / u/g' > $$2
cat $$1 \
| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2

# Run C preprocessor to recursively decompose.
"${CPP:-cpp}" $$2 $$3

# Convert it back to original format.
cat $$3 \
| sed -e '/#/d' \
      -e '/^$/d' \
      -e 's/ \+/ /g' \
      -e 's/ *$//' \
      -e 's/u//g' \
      -e 's/^\([^ ]*\) /\1;/' > $$4

# Write comment.
cat <<END
# This table was algorithmically derived from the file 'UnicodeData.txt'
# for Unicode $unicode_version, available from unicode.org,
# on `date '+%Y-%m-%d'`.
END

# Emit first table.
echo 'my %unicode_decomposed = ('
cat $$4 \
| sed -e 's/ /_/g' \
      -e 's/\(.*\);\(.*\)/  "\1", "\2",/'
echo ');'
echo ''

# Write comment.
cat <<END
# This table was algorithmically derived from the Adobe Glyph List (AGL)
# file 'glyphlist.txt' from the GitHub Adobe Type Tools agl-aglfn
# project, on `date '+%Y-%m-%d'`.
#
# See "groff:" comments for altered mappings.
END

# Convert AGL syntax to a chunk of Perl.
cat "$3" \
| sed -e '/#/d' \
      -e 's/ /_/g' \
      -e '/;\(E\|F[0-8]\)/d' \
      -e 's/\(.*\);\(.*\)/  "\1", "\2",/' > $$5

# Perform groff replacements.
sed \
  -e 's/\("Delta"\), "2206",$/\1, "0394", # groff: not U+2206/' \
  -e 's/\("Omega"\), "2126",$/\1, "03A9", # groff: not U+2126/' \
  -e 's/\("mu"\), "00B5",$/\1, "03BC", # groff: not U+00B5/' \
  < $$5 > $$6

# Emit second table.
echo 'my %AGL_to_unicode = ('
cat $$6
echo ');'

# Remove temporary files.
rm $$1 $$2 $$3 $$4 $$5 $$6

# Local Variables:
# fill-column: 72
# End:
# vim: set textwidth=72:
