#!/bin/sh

# --------------------------------------------------------------------------
# This is the script to create the Unicode chars property table.
# It expects a UnicodeData file (obtainable from http://www.unicode.org/ucd/)
# as argument, and produces C code on its standard output.  This C code
# should be included in char.c.  See unitable.h for relevant definitions.
#
# Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part of basic
# Unicode support.
#
# The Hugs 98 system is Copyright (c) Mark P Jones, Alastair Reid, the
# Yale Haskell Group, and the OGI School of Science & Engineering at OHSU,
# 1994-2003, All rights reserved.  It is distributed as free software under
# the license in the file "License", which is included in the distribution.
# -------------------------------------------------------------------------

case $# in
1)	;;
*)	echo "usage: $0 unidata-file" >&2
	exit 1 ;;
esac

# Output the file header

echo "/*----------------------------------------------------
This is an automatically generated file: DO NOT EDIT.
Generated by $0 from `basename $1`,
which was obtained from http://www.unicode.org/ucd/
-----------------------------------------------------*/
"

# Convert the file to the C table

awk '
BEGIN {
	FS = ";"
	num_props = 0
	num_blocks = 0
	first_code = -1
	digits = "0123456789ABCDEF"
	for (i=0; i<16; i++)
		hex[substr(digits,i+1,1)] = i
}
function readhex(a) {
	l = length(a)
	acc = 0
	for (i=1; i<=l; i++)
		acc = acc*16+hex[substr(a,i,1)]
	return acc
}
function endblock() {
	if (first_code >= 0) {
		blocks[num_blocks] = block_start ", " (next_code-first_code) ", &prop" props[block_prop]
		num_blocks++
	}
	block_start = hex_code
	first_code = this_code
	block_prop = prop
}
{
	hex_code = "0x" $1
	this_code = readhex($1)
	name = $2
	cat = $3
	updist = $13 == "" ? 0 : readhex($13) - this_code
	lowdist = $14 == "" ? 0 : readhex($14) - this_code
	ttldist = $15 == "" ? 0 : readhex($15) - this_code
	prop = "GENCAT_" cat ", " updist ", " lowdist ", " ttldist
	if (props[prop] == "")
		props[prop] = num_props++
	if (index(name, "Last>") == 0 && (this_code != next_code || prop != block_prop))
		endblock()
	next_code = this_code+1
}
END {
	endblock()
	print "#define NUM_BLOCKS " num_blocks
	print ""
	for (p in props)
		props_inv[props[p]] = p
	for (i=0; i<num_props; i++)
		printf "static const struct CharProperties prop" i " = { " props_inv[i] " };\n"
	print ""
	print "static const struct CharBlock char_block[] = {"
	for (i=0; i<num_blocks; i++) {
		printf "    { " blocks[i] " }"
		print i<num_blocks-1 ? "," : ""
	}
	print "};"
}
' $1
