#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-08-09 15:23:23 +0300 (Mon, 09 Aug 2021) $
#$Revision: 8860 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v3.4.0/scripts/cif_validate $
#------------------------------------------------------------------------------
#*
#* Validate CIF files against CIF dictionaries (DDL1 or DDLm).
#*
#* USAGE:
#*    $0 --dictionaries 'cif_core.dic,cif_cod.dic' --options input1.cif input*.cif
#*
#* ENVIRONMENT:
#*   COD_TOOLS_DDLM_IMPORT_PATH
#*                     A list of directories in which to look for the
#*                     DDLm-compliant CIF dictionaries that are imported
#*                     by other DDLm-compliant CIF dictionaries. List
#*                     elements are separated by the colon symbol (':').
#*                     Directories listed in COD_TOOLS_DDLM_IMPORT_PATH
#*                     have a lower priority than those provided using
#*                     the command line option (--add-dictionary-import-path),
#*                     but higher than the default import path directory
#*                     (directory of the importing dictionary).
#**

use strict;
use warnings;
use Clone qw( clone );
use File::Basename qw( fileparse );
use List::MoreUtils qw( any none uniq );

use COD::CIF::ChangeLog qw( summarise_messages );
use COD::CIF::Parser qw( parse_cif ) ;
use COD::CIF::DDL qw( is_local_data_name
                      is_general_local_data_name );
use COD::CIF::DDL::DDL1 qw( canonicalise_value
                            classify_dic_blocks
                            convert_pseudo_data_name_to_category_name
                            get_category_name
                            get_data_name
                            get_data_names
                            get_data_type
                            get_enumeration_defaults
                            get_list_constraint_type
                            get_list_mandatory_flag );
use COD::CIF::DDL::DDLm qw( build_ddlm_dic );
use COD::CIF::DDL::DDLm::Import qw( get_ddlm_import_path_from_env
                                    resolve_dic_imports );
use COD::CIF::DDL::DDLm::Validate qw( ddlm_validate_data_block );
use COD::CIF::DDL::Ranges qw( parse_range
                              range_to_string
                              is_in_range );
use COD::CIF::DDL::Validate qw( check_enumeration_set );
use COD::CIF::Tags::Manage qw( exclude_tag
                               has_special_value
                               has_numeric_value
                               get_item_loop_index
                               new_datablock
                               set_loop_tag
                               set_tag );
use COD::CIF::Tags::CanonicalNames qw( canonical_tag_name
                                       canonicalize_names );
use COD::CIF::DDL::Validate qw( canonicalise_tag
                                check_enumeration_set );
use COD::CIF::Unicode2CIF qw( unicode2cif );
use COD::SOptions qw( getOptions
                      get_value
                      get_int
                      get_float );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_parser_messages
                          report_message );
use COD::UserMessage qw( sprint_message );
use COD::ToolsVersion qw( get_version_string );

##
# Parses the provided DDL dictionary files and determines the DDL generation
# that the dictionary belongs to (DDL1, DDL2 or DDLm).
#
# @param $dics_grouped_by_dll
#       Reference to a data structure of the following form:
#       #
#       # Hash keys denote the DDL generation that the associated
#       # dictionaries have been explicitly assigned to outside
#       # of the subroutine. Accepted key values:
#       #  '1' for DDL1,
#       #  '2' for DDL2,
#       #  'm' for DDLm,
#       #  'unspecified' for any mixture of DDL1, DDL2 and DDLm
#       #
#       # Each hash key refers to an array containing the filenames
#       # of CIF dictionary files that should be parsed
#       {
#         # Dictionaries that have been explicitly declared as DDL1 dictionaries
#           '1' => [
#               'cif_core.dic',
#               'cod_cif.dic',
#               ...
#           ],
#         # Dictionaries that have been explicitly declared as DDL2 dictionaries
#           '2' => [
#               'cod_cif_ddl2.dic',
#               ...
#           ],
#         # Dictionaries that have been explicitly declared as DDLm dictionaries
#           'm' => [
#               ...
#           ],
#         # Dictionaries that have not yet been assigned a DDL generation
#         # and may potentially belong to any generation
#           'unspecified' => [
#               'cif_core_ddl2.dic',
#               'cod_cif_ddlm.dic',
#               ...
#           ]
#       }
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#         # Reference to a hash of parser options as accepted by
#         # the COD::CIF::Parser
#           'parser_options' => {
#               'parser' => c,
#               'no_print' => 1,
#               '...'
#            },
#         # Reference to a hash that specifies which error levels are fatal
#           'die_on_error_level' => {
#               'ERROR'   => 1,
#               'WARNING' => 0,
#               'NOTE'    => 1,
#           }
#       }
# @return
#       Reference to a data structure of the following form:
#       [
#           {
#             # The determined DDL type:
#             # '1' for DDL1,
#             # '2' for DDL2,
#             # 'm' for DDLm
#               'ddl_type' => '1',
#             # Name of the parsed dictionary file
#               'filename' => 'cod_cif.dic'
#             # Reference to a parsed CIF file as returned by
#             # the COD::CIF::Parser
#               'parsed_file' => ...,
#           },
#           {
#             # The determined DDL type:
#             # '1' for DDL1,
#             # '2' for DDL2,
#             # 'm' for DDLm
#               'ddl_type' => '2',
#             # Name of the parsed dictionary file
#               'filename' => 'cod_cif_ddl2.dic'
#             # Reference to a parsed CIF file as returned by
#             # the COD::CIF::Parser
#               'parsed_file' => ...,
#           },
#       ]
##
sub parse_and_classify_dics
{
    my ($dics_grouped_by_ddl, $options) = @_;

    my $parser_options = $options->{'parser_options'};
    my $die_on_error_level = $options->{'die_on_error_level'};

    my @parsed_dics;
    for my $provided_ddl_type (sort keys %{$dics_grouped_by_ddl}) {
        my $dic_filenames = $dics_grouped_by_ddl->{$provided_ddl_type};

        for my $dic ( @{$dic_filenames} ) {
            my ( $data, $err_count, $messages ) = parse_cif( $dic, $parser_options );
            process_parser_messages( $messages, $die_on_error_level );

            local $SIG{__WARN__} = sub { process_warnings( {
                                           'message'       => @_,
                                           'program'       => $0,
                                           'filename'      => $dic,
                                         }, $die_on_error_level ) };

            my $recognised_ddl_type = determine_ddl_generation( $data );

            if ( !defined $recognised_ddl_type ) {
                if ( $provided_ddl_type eq 'unspecified' ) {
                    warn 'file was not recognised as a proper DDL ' .
                         'dictionary -- file will be skipped' . "\n";
                    next;
                } else {
                    $recognised_ddl_type = $provided_ddl_type;
                    warn 'file was explicitly provided as a ' .
                         "DDL$provided_ddl_type dictionary, however, " .
                         'it could not be recognised as a proper DDL ' .
                         'dictionary -- file will be treated as a ' .
                         "DDL$provided_ddl_type dictionary" . "\n";
                }
            }

            if ( $recognised_ddl_type eq '2' ) {
                warn 'file was recognised as a DDL2 dictionary, however, ' .
                     'only DDL1 and DDLm dictionaries are supported by ' .
                     'the current version of the program -- file ' .
                     'will be ignored' . "\n";
                next;
            }

            if ( $provided_ddl_type ne $recognised_ddl_type &&
                 $provided_ddl_type ne 'unspecified' ) {
                warn 'file was explicitly provided as a ' .
                     "DDL$provided_ddl_type dictionary, however, " .
                     "it was recognised as a DDL$recognised_ddl_type " .
                     'dictionary -- file will be treated as a ' .
                     "DDL$provided_ddl_type dictionary" . "\n";
                $recognised_ddl_type = $provided_ddl_type;
            }

            push @parsed_dics,
                 {
                     'ddl_type'    => $recognised_ddl_type,
                     'filename'    => $dic,
                     'parsed_file' => $data,
                 };
        }
    }

    return \@parsed_dics;
}

# BEGIN: subroutines related to validation against DDL2 dictionaries
##
# Builds a DDL2 dictionary structure from a parsed DDL2 dictionary.
#
# @param $dic_data_blocks
#       Reference to a DDL2 dictionary structure as returned by the
#       COD::CIF::Parser. Normally, a DDL2 dictionary consists of
#       a single data block with multiple save frames each defining
#       a data category or a data item.
# @return
#       Reference to a hash containing data item definitions.
##
sub build_ddl2_dic
{
    my ( $dic_block ) = @_;

    my %definitions;
    for my $save_frame ( @{$dic_block->{'save_blocks'}} ) {
        next if !exists $save_frame->{'values'}{'_item.name'};
        for ( @{$save_frame->{'values'}{'_item.name'}} ) {
            $definitions{lc $_} = $save_frame;
            $definitions{lc $_}{'values'}{'_dataname'} = $_;
        }
    }

    return \%definitions;
}

##
# Validates a CIF data frame against a DDL2 dictionary.
#
# @param $data_block
#       Reference to a data block or a save frame as returned by
#       the COD::CIF::Parser.
# @param $dic
#       Reference to a dictionary data structure as returned by
#       the build_ddl2_dic() subroutine.
# @param $options
#       Reference to a hash of options. Currently no options
#       are recognised.
# @return
#       Array reference to a list of validation messages.
##
sub ddl2_validate_data_block
{
    my ( $data_block, $dic, $options ) = @_;

    my @notes;
    for my $tag ( @{$data_block->{'tags'}} ) {
         my $lc_tag = lc $tag;
         my $dic_item = $dic->{$lc_tag};
         push @notes,
              @{ ddl2_validate_data_type( $data_block, $tag, $dic_item ) };
     }

    return \@notes;
}

##
# Checks if values satisfy the DDL2 data type constraints.
#
# @param $data_frame
#       Data frame that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl2_dic() subroutine.
# @return
#       Array reference to a list of validation messages.
##
sub ddl2_validate_data_type
{
    my ( $data_frame, $tag, $dic_item ) = @_;

    # FIXME: the DDL2 data type validation is much more complex than
    # assumed in the current implementation. For example, the basic
    # data type are described in the DDL2 dictionary using regular
    # expressions, but these data types can be extended or even overridden
    # in any other DDL2 dic
    return [] if !$dic_item->{'values'}{'_item_type.code'};
    return [] if  $dic_item->{'values'}{'_item_type.code'}[0] ne 'float' &&
                  $dic_item->{'values'}{'_item_type.code'}[0] ne 'int';

    my @validation_messages;
    for ( my $i = 0; $i < @{$data_frame->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_frame, $tag, $i);
        next if has_numeric_value($data_frame, $tag, $i);
        push @validation_messages,
            'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
            " value '$data_frame->{'values'}{$tag}[$i]' is of type " .
            q{'} . $data_frame->{'types'}{$tag}[$i] . q{'} .
            ' while it should be numeric, i.e. \'FLOAT\' or \'INT\'';
    }

    return \@validation_messages;
}
# END: subroutines related to validation against DDL2 dictionaries

my @generic_dic_files;
my @ddl1_dic_files;
my @ddlm_dic_files;
my $merge_ddl1_dics = 1;
my @ddlm_dic_import_path;
my $use_parser = 'c';
my $enum_as_set_tags = ['_atom_site_refinement_flags'];
my $ignore_case = 0;

my $report_local_tags = 0;
my $report_deprecated = 0;
my $allow_double_precision_notation = 0;
my $range_su_multiplier = 3;
my $report_missing_su = 0;
my $max_message_count = -1;

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

sub check_list_link_parent($$$);

#* OPTIONS:
#*   -d, --dictionaries 'cif_core.dic,cif_cod.dic'
#*                     A list of CIF dictionaries that the CIF files should
#*                     be validated against. May include DDL1 and DDLm
#*                     dictionaries. The order of DDL1 dictionaries
#*                     in the list is important (see --ddl1-dictionaries).
#*                     List elements are separated either by ',' or by ' '.
#*                     To include dictionaries with filenames containing
#*                     these symbols, the --add-dictionary option should
#*                     be used.
#*   -D, --add-dictionary 'cif new dictionary.dic'
#*                     Add an additional CIF dictionary to the end of the list.
#*   --clear-dictionaries
#*                     Remove all CIF dictionaries from the list.
#*
#*   --ddl1-dictionaries 'cif_core.dic,cif_cod.dic'
#*                     A list of DDL1 dictionaries that the CIF files should
#*                     be validated against. The order of dictionaries in
#*                     the list is important as it determines the dictionary
#*                     merge order. The first DDL1 dictionary serves as the
#*                     base while the remaining ones are merged sequentially
#*                     using the OVERLAY mode. DDL1 dictionaries provided in
#*                     the generic dictionary list (see --dictionaries) are
#*                     appended to the end of this list.
#*                     List elements are separated either by ',' or by ' '.
#*                     To include dictionaries with filenames containing
#*                     these symbols, the --ddl1-add-dictionary option
#*                     should be used.
#*   --ddl1-add-dictionary
#*                     Add an additional dictionary to the end of DDL1
#*                     dictionary list.
#*   --ddl1-clear-dictionaries
#*                     Remove all dictionaries from the DDL1 dictionary list.
#*
#*   --merge-ddl1-dictionaries
#*                     Merge all DDL1 dictionaries into a single virtual
#*                     dictionary prior to validation (default).
#*   --no-merge-ddl1-dictionaries
#*                     Do not merge DDL1 dictionaries into a single virtual
#*                     dictionary prior to validation. Using this option
#*                     may result in incorrect validation results.
#*
#*   --ddlm-dictionaries
#*                     A list of DDLm dictionaries that the CIF files should
#*                     be validated against. DDLm dictionaries provided in
#*                     the generic dictionary list (see --dictionaries) are
#*                     appended to the end of this list.
#*                     List elements are separated either by ',' or by ' '.
#*                     To include dictionaries with filenames containing
#*                     these symbols, the --ddlm-add-dictionary option
#*                     should be used.
#*   --ddlm-add-dictionary
#*                     Add an additional dictionary to the end of DDLm
#*                     dictionary list.
#*   --ddlm-clear-dictionaries
#*                     Remove all dictionaries from the DDLm dictionary list.
#*
#*   -I, --add-ddlm-import-path './ddlm/cod/'
#*                     Prepend an additional directory to the dictionary
#*                     import path. The dictionary import path specifies
#*                     a list of directories in which to look for files
#*                     that are imported by DDLm-compliant CIF dictionaries.
#*                     Directories provided using this option are assigned
#*                     the highest priority and are searched prior to
#*                     the directories listed in the COD_TOOLS_DDLM_IMPORT_PATH
#*                     environment variable or the default import path
#*                     (directory of the importing dictionary).
#*
#*   --clear-ddlm-import-path
#*                     Remove all directories from the dictionary import path
#*                     that were added using the --add-ddlm-import-path option.
#*                     Neither COD_TOOLS_DDLM_IMPORT_PATH environment variable
#*                     nor the default import path is affected by this option.
#*   --max-message-count 5
#*                     Maximum number of validation messages that are reported
#*                     for each unique combination of validation criteria and
#*                     validated data items. Provide a negative value (i.e. -1)
#*                     to output all of the generated validation messages
#*                     (default: -1).
#*
#*   --range-su-multiplier 3.5
#*                      Multiplier that should be applied to the standard
#*                      uncertainty (s.u.) when determining if a numeric
#*                      value resides in the specified range. For example,
#*                      a multiplier of 3.5 means that the value is treated
#*                      as valid if it falls in the interval of
#*                      [lower bound - 3.5 * s.u.; upper bound + 3.5 * s.u.]
#*                      (default: 3).
#*
#*   --treat-as-set _atom_site_refinement_flags
#*                     Treat values of given data items as a set. For example,
#*                     more than one enumeration value could be defined
#*                     for a single element. Any number of data item tags can
#*                     be provided in the following way:
#*                     $0 --treat-as-set _tag_1 --treat-as-set _tag_2
#*                     Default: '_atom_site_refinement_flags'.
#*   --no-treat-as-set
#*                     Do not treat values of any data items as sets.
#*                     (see --treat-as-set).
#*
#*   --report-deprecated
#*                     Report the presence of data items that are marked as
#*                     deprecated in the dictionaries. Data item deprecation
#*                     usually means that it has been replaced with other
#*                     data items.
#*   --no-report-deprecated, --ignore-deprecated
#*                     Do not report presence of data items that are marked
#*                     as deprecated in the dictionaries (default).
#*
#*   --report-local-tags
#*                     Report the presence of local data items.
#*   --no-report-local-tags, --ignore-local-tags
#*                     Do not report the presence of local data items (default).
#*
#*   --ignore-case
#*                     Ignore letter case while validating enumeration values.
#*                     For example, even though '_atom_site_adp_type' is
#*                     restricted to values ('Uani', 'Uiso', 'Uovl', ...),
#*                     value 'UANI' would still be treated as valid.
#*                     Applies only to DDL1 dictionaries.
#*   --respect-case, --case-sensitive, --dont-ignore-case
#*                     Respect letter case while validating enumeration
#*                     values (default).
#*                     Applies only to DDL1 dictionaries.
#*
#*   --allow-double-precision-notation
#*                     Treat numbers expressed using the double precision
#*                     notation (i.e. 0.42D+7) as proper numbers in a way
#*                     that is compatible with DDL1, but not the CIF_1.1
#*                     syntax.
#*                     Applies only to DDL1 dictionaries.
#*   --no-allow-double-precision-notation
#*                     Treat numbers expressed using the double precision
#*                     notation (i.e. 0.42D+7) as character strings in a
#*                     way compatible with the CIF_1.1 syntax, but does not
#*                     cover the full extent of the DDL1 numbers variations
#*                     (default).
#*                     Applies only to DDL1 dictionaries.
#*
#*   --report-missing-su
#*                     Report measurand data items that are not accompanied by
#*                     the mandatory standard uncertainty values.
#*                     Applies only to DDLm dictionaries.
#*   --no-report-missing-su, --ignore-missing-su
#*                     Do not report measurand data items that are not
#*                     accompanied by the mandatory standard uncertainty
#*                     values (default).
#*                     Applies only to DDLm dictionaries.
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default)
#*
#*   -c, --always-continue
#*                     Continue processing and return successful return status
#*                     even if errors are diagnosed.
#*   -c-, --always-die
#*                     Stop and return error status if errors are diagnosed.
#*   --continue-on-errors
#*                     Do not terminate script if errors are raised.
#*   --die-on-errors
#*                     Terminate script immediately if errors are raised
#*                     (default).
#*   --continue-on-warnings
#*                     Do not terminate script if warnings are raised (default).
#*   --die-on-warnings
#*                     Terminate script immediately if warnings are raised.
#*   --continue-on-notes
#*                     Do not terminate script if notes are raised (default).
#*   --die-on-notes
#*                     Terminate script immediately if notes are raised.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   -v, --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '-d,--dictionaries'
                    => sub{ @generic_dic_files = split m/,|\s+/, get_value() },
    '-D,--add-dictionary'
                    => sub{ push @generic_dic_files, get_value() },
    '--clear-dictionaries'
                    => sub{ @generic_dic_files = () },

    '--ddl1-dictionaries'
                    => sub { @ddl1_dic_files = split m/,|\s+/, get_value() },
    '--ddl1-add-dictionary'
                    => sub { push @ddl1_dic_files, get_value() },
    '--ddl1-clear-dictionaries'
                    => sub { @ddl1_dic_files = () },

    '--merge-ddl1-dictionaries'    => sub { $merge_ddl1_dics = 1 },
    '--no-merge-ddl1-dictionaries' => sub { $merge_ddl1_dics = 0 },

    '--ddlm-dictionaries'
                    => sub { @ddlm_dic_files = split m/,|\s+/, get_value() },
    '--ddlm-add-dictionary'
                    => sub { push @ddlm_dic_files, get_value() },
    '--ddlm-clear-dictionaries'
                    => sub { @ddlm_dic_files = () },

    '-I,--add-ddlm-import-path'
                    => sub { push @ddlm_dic_import_path, get_value() },
    '--clear-ddlm-import-path'
                    => sub { @ddlm_dic_import_path = () },

    '--max-message-count' => sub { $max_message_count = get_int() },

    '--treat-as-set'                    => $enum_as_set_tags,
    '--no-treat-as-set'                 => sub{ $enum_as_set_tags = [] },

    '--range-su-multiplier' => sub { $range_su_multiplier = get_float() },

    '--ignore-case'                     => sub{ $ignore_case = 1 },
    '--dont-ignore-case,--respect-case' => sub{ $ignore_case = 0 },
    '--case-sensitive'                  => sub{ $ignore_case = 0 },

    '--allow-double-precision-notation'
                           => sub { $allow_double_precision_notation = 1 },
    '--no-allow-double-precision-notation'
                           => sub { $allow_double_precision_notation = 0 },

    '--report-local-tags'               => sub { $report_local_tags = 1 },
    '--no-report-local-tags'            => sub { $report_local_tags = 0 },
    '--ignore-local-tags'               => sub { $report_local_tags = 0 },

    '--report-deprecated'               => sub { $report_deprecated = 1 },
    '--no-report-deprecated'            => sub { $report_deprecated = 0 },
    '--ignore-deprecated'               => sub { $report_deprecated = 0 },

    '--report-missing-su'    => sub{ $report_missing_su = 1 },
    '--no-report-missing-su' => sub{ $report_missing_su = 0 },
    '--ignore-missing-su'    => sub{ $report_missing_su = 0 },

    '--use-perl-parser'                 => sub { $use_parser = 'perl' },
    '--use-c-parser'                    => sub { $use_parser = 'c' },

    '-c,--always-continue'              => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    '-c-,--always-die'                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    '--continue-on-errors'   => sub { $die_on_errors = 0 },
    '--die-on-errors'        => sub { $die_on_errors = 1 },

    '--continue-on-warnings' => sub { $die_on_warnings = 0 },
    '--die-on-warnings'      => sub { $die_on_warnings = 1 },

    '--continue-on-notes'    => sub { $die_on_notes = 0 },
    '--die-on-notes'         => sub { $die_on_notes = 1 },

    '--options'      => sub{ options; exit },
    '--help,--usage' => sub{ usage; exit; },
    '-v,--version'   => sub { print get_version_string(), "\n"; exit }
);

push @ddlm_dic_import_path, @{get_ddlm_import_path_from_env()};

my $die_on_error_level = {
    'ERROR'   => $die_on_errors,
    'WARNING' => $die_on_warnings,
    'NOTE'    => $die_on_notes
};

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

# Reading dictionary files

my %validation_dics;
if( @ddl1_dic_files +
    @ddlm_dic_files +
    @generic_dic_files ) {
    my $parser_options = {
                            'parser' => $use_parser,
                            'no_print' => 1,
                         };
    my $parsed_dics = parse_and_classify_dics(
                            {
                                '1' => \@ddl1_dic_files,
                                'm' => \@ddlm_dic_files,
                                'unspecified' => \@generic_dic_files,
                            },
                            {
                                'die_on_error_level' => $die_on_error_level,
                                'parser_options' => $parser_options,
                            } );

    my @ddl1_dictionaries;
    my $ddl1_enum_defaults = get_enumeration_defaults();
    my $tag_count = 0;
    for my $parsed_dic ( @{$parsed_dics} ) {
        my $ddl_type = $parsed_dic->{'ddl_type'};
        my $dic_data = $parsed_dic->{'parsed_file'};
        my $filename = $parsed_dic->{'filename'};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                     }, $die_on_error_level ) };

        my $item_count = 0;
        if ( $ddl_type eq '1' ) {
            my $ddl1_dic = build_ddl1_dic(
                                $dic_data,
                                {
                                  'default_item_values' => $ddl1_enum_defaults,
                                }
                             );
            set_ddl1_dic_filename($ddl1_dic, $filename);
            $item_count = scalar( keys %{ get_ddl1_dic_items($ddl1_dic) } );
            push @ddl1_dictionaries, $ddl1_dic;
        }

        #----------------------------------------------------------------
        # NB: although support of DDL2 dictionaries has been dropped,
        # the following commented-out code is left here as an example
        # of how DDL2 validation could be eventually reintegrated back
        # into the workflow
        #----------------------------------------------------------------
        #
        # if ( $ddl_type eq '2' ) {
        #    my $ddl2_dic = build_ddl2_dic( $dic_data->[0] );
        #    $item_count = scalar( keys %{$ddl2_dic} );
        #    $validation_dics{$ddl_type}{'dictionaries'}{$filename} = $ddl2_dic;
        #    $item_count = scalar(
        #            keys %{
        #                $validation_dics{$ddl_type}{'dictionaries'}{$filename}
        #            } );
        # }

        if ( $ddl_type eq 'm' ) {
            my ($dirs) = (fileparse( $filename ))[1];
            my $dic_block = resolve_dic_imports(
                $dic_data->[0],
                {
                   'import_path'        => [ @ddlm_dic_import_path, $dirs ],
                   'parser_options'     => $parser_options,
                   'die_on_error_level' => $die_on_error_level,
                   'importing_file'     => $filename,
                 }
            );

            $validation_dics{$ddl_type}{'dictionaries'}{$filename} =
                                                build_ddlm_dic( $dic_block );
            $item_count = scalar(
                    keys %{
                        $validation_dics{$ddl_type}
                                {'dictionaries'}{$filename}->{'Item'}
                    } );
        }

        if ( $item_count == 0 ) {
            warn "no data item definitions found\n";
        }

        $tag_count += $item_count;
    }

    if( $tag_count == 0 ) {
        report_message( {
            'program'   => $0,
            'err_level' => 'ERROR',
            'message'   =>
                'no data item definitions were found in the ' .
                'provided dictionary files (' .
                 ( join ',', map { "'$_'"} ( @ddl1_dic_files,
                                             @ddlm_dic_files,
                                             @generic_dic_files ) ) . ')'
        }, $die_on_errors );
    }

    if ($merge_ddl1_dics) {
        my $dic_merge_results = merge_ddl1_dics(\@ddl1_dictionaries);

        for my $merge_issue (@{$dic_merge_results->{'merge_issues'}}) {
            report_message( {
                'program'   => $0,
                'filename'  => $merge_issue->{'filename'},
                'add_pos'   => "data_$merge_issue->{'block_code'}",
                'err_level' => 'WARNING',
                'message'   => $merge_issue->{'message'},
            }, $die_on_errors );
        }

        my $merged_dic = $dic_merge_results->{'dictionary'};
        if (defined $merged_dic) {
            @ddl1_dictionaries = ( $merged_dic );
        }
    }

    $validation_dics{'1'}{'dictionaries'} = \@ddl1_dictionaries;

    $validation_dics{'1'}{'auxiliary_search_structure'} =
                                    build_auxiliary_dic_search_structure(
                                        $validation_dics{'1'}{'dictionaries'}
                                    );

} else {
    report_message( {
        'program'   => $0,
        'err_level' => 'ERROR',
        'message'   =>
            'at least one dictionary file should be provided using ' .
            'the \'--dictionaries\', \'--ddl1-dictionaries\', ' .
            '\'--ddlm-dictionaries\' options. ' .
            'Automatic dictionary download is currently not implemented'
    }, $die_on_errors );
    my $dic_iucr_uri = 'ftp://ftp.iucr.org/pub/cif_core.dic';
}

# Iterating through the CIF files

@ARGV = ('-') unless @ARGV;

my $validation_options = {
    'report_deprecated'   => $report_deprecated,
    'ignore_case'         => $ignore_case,
    'enum_as_set_tags'    => $enum_as_set_tags,
    'range_su_multiplier' => $range_su_multiplier,
    'max_issue_count'     => $max_message_count,
    # DDL1 specific options
    'allow_double_precision_notation' => $allow_double_precision_notation,
    # DDLm specific options
    'report_missing_su' => $report_missing_su,
};

my $known_data_names = get_all_data_names( \%validation_dics );

my $err_level = 'NOTE';
for my $filename ( @ARGV ) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );
    my $dir = (fileparse($filename))[1];

    next if !defined $data;

    for my $block ( @{$data} ) {
        my $dataname = 'data_' . $block->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname
            }, $die_on_error_level )
        };

        $block = resolve_dic_imports(
                    $block, {
                        'import_path'        => [ @ddlm_dic_import_path, $dir ],
                        'parser_options'     => $options,
                        'die_on_error_level' => $die_on_error_level,
                        'importing_file'     => $filename,
                    } );

        my @validation_notes;
        my $ddlm_dics = $validation_dics{'m'}{'dictionaries'};
        for my $dic_f ( sort keys %{$ddlm_dics} ) {
            for my $issue ( @{ ddlm_validate_data_block(
                                    $block,
                                    $ddlm_dics->{$dic_f},
                                    $validation_options ) } ) {
                if ( $issue->{'test_type'} eq 'ISSUE_COUNT_LIMIT_EXCEEDED' ) {
                    report_message( {
                        'err_level' => 'NOTE',
                        'program'   => $0,
                        'filename'  => $filename,
                        'message'   => $issue->{'message'},
                        'add_pos'   =>
                            ( 'data_' . $issue->{'data_block_code'} .
                                ( defined $issue->{'save_frame_code'} ?
                                  " save_$issue->{'save_frame_code'}" : '' ) )
                    }, $die_on_error_level->{'NOTE'} );
                } else {
                    push @validation_notes,
                            sprint_message( {
                                'program'  => $0,
                                'filename' => $filename,
                                'add_pos'  =>
                                    'data_' . $issue->{'data_block_code'} .
                                    (
                                        defined $issue->{'save_frame_code'} ?
                                        " save_$issue->{'save_frame_code'}" :
                                        ''
                                    ),
                                'err_level' => $err_level,
                                'message'   => $issue->{'message'},
                            } );
                }
            }
        }

        # NOTE: DDLm dictionary validation logic works with lowercased
        # data name as returned by the COD::CIF::Parser while the DDL1/DDL2
        # dictionary validation logic works with canonicalised data names
        canonicalize_names($block);
        push @validation_notes,
            map {
                sprint_message( {
                    'program'   => $0,
                    'filename'  => $filename,
                    'add_pos'   => $dataname,
                    'err_level' => $err_level,
                    'message'   => $_,
                 } )
            } @{ summarise_messages(
                    ddl1_validate_data_block(
                        $block,
                        $validation_dics{'1'},
                        $validation_options
                    )
                )
            };

        #----------------------------------------------------------------
        # NB: although support of DDL2 dictionaries has been dropped,
        # the following commented-out code is left here as an example
        # of how DDL2 validation could be eventually reintegrated back
        # into the workflow
        #----------------------------------------------------------------
        #
        # my $ddl2_dics = $validation_dics{'2'}{'dictionaries'};
        # for my $dic_f ( sort keys %{$ddl2_dics} ) {
        #    push @validation_notes,
        #        map {
        #            sprint_message( {
        #                'program'   => $0,
        #                'filename'  => $filename,
        #                'add_pos'   => $dataname,
        #                'err_level' => $err_level,
        #                'message'   => $_,
        #            } )
        #        } @{ summarise_messages(
        #                ddl2_validate_data_block(
        #                    $block, $ddl2_dics->{$dic_f},
        #                    $validation_options
        #                )
        #            )
        #        };
        # };
        #
        # my $allow_category_local = ( %{$ddl2_dics} || %{$ddlm_dics} );

        my $allow_category_local = ( %{$ddlm_dics} );
        push @validation_notes,
            map {
                sprint_message( {
                    'program'   => $0,
                    'filename'  => $filename,
                    'add_pos'   => $dataname,
                    'err_level' => $err_level,
                    'message'   => $_,
                } )
            } @{ report_unrecognised_data_names(
                    $block,
                    $known_data_names,
                    {
                        'report_local_tags'         => $report_local_tags,
                        'allow_category_local_tags' => $allow_category_local,
                    }
                )
            };

        for my $note (sort @validation_notes) {
            print STDOUT $note;
        }
    }
}

##
# Builds a DDL1 dictionary structure from a parsed DDL1 dictionary.
#
# @param $dic_data_blocks
#       Reference to a DDL1 dictionary structure as returned by the
#       COD::CIF::Parser. Normally, a DDL1 dictionary consists of
#       multiple data blocks each defining a data category or a
#       data item.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Reference to a hash containing the default values of data items
#       # from the DDL1 dictionary that should be used to complete data
#       # item definitions
#           'default_item_values' => {
#               '_list'            => 'no',
#               '_list_mandatory'  => 'no',
#               '_type_conditions' => 'none',
#               '_type_construct'  => '.*',
#               '_list_level'      => '1',
#           }
#       }
# $ddl1_defaults
#       Reference to a hash containing default values of data items
#       that appear in a DDL1 data item definitions.
# @return
#       Reference to dictionary data structure of the following form:
#       {
#         # Reference to a dictionary metadata data block
#           'dictionary' => { ... },
#         # Reference to a hash of category definition data blocks
#         # where lowercased category names serve as the hash keys
#           'category' => { ... },
#         # Reference to a hash of data item definition data blocks
#         # where lowercased item names serve as the hash keys
#           'item' => { ... }
#       }
##
sub build_ddl1_dic
{
    my ($dic_data_blocks, $options) = @_;

    my $ddl1_defaults = exists $options->{'default_item_values'} ?
                               $options->{'default_item_values'} : {};

    my $classified_blocks = classify_dic_blocks($dic_data_blocks);

    my $dictionary_block;
    if (@{$classified_blocks->{'dictionary'}}) {
        $dictionary_block = $classified_blocks->{'dictionary'}[0];
    }

    my %categories;
    for my $category_block (@{$classified_blocks->{'category'}}) {
        my $name = get_data_name($category_block);
        next if !defined $name;
        $name = convert_pseudo_data_name_to_category_name($name);
        $categories{lc $name} = $category_block;
    }

    my %items;
    for my $item_block (@{$classified_blocks->{'item'}}) {
        my $names = get_data_names($item_block);
        next if !defined $names;
        for (map { lc } @{$names}) {
            $items{$_} = $item_block;
            $items{$_}{values}{'_dataname'} = $item_block->{'name'};
        }
    }

    my %definitions = (
        'dictionary' => $dictionary_block,
        'category' => \%categories,
        'item' => \%items,
    );

    return \%definitions;
}

##
# Returns data item definitions as recorded in a DDL1 dictionary data structure.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of DDL1 data item definition blocks with
#       lowercased item names serving as hash keys.
##
sub get_ddl1_dic_items
{
    my ($dic) = @_;

    return $dic->{'item'};
}

##
# Returns category definitions as recorded in a DDL1 dictionary data structure.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of DDL1 category definition blocks
#       where lowercased category names serve as the hash keys.
##
sub get_ddl1_dic_categories
{
    my ($dic) = @_;

    return $dic->{'category'};
}

##
# Returns the dictionary metadata data block as recorded in a DDL1
# dictionary data structure.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @return
#       Reference to a dictionary metadata data block or
#       'undef' if the metadata block was not available.
##
sub get_ddl1_dic_metadata_block
{
    my ($dic) = @_;

    return $dic->{'dictionary'};
}

##
# Records a data block as a dictionary metadata data block in a given
# DDL1 dictionary data structure.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @param $block
#       Reference to a DDL1 dictionary metadata data block.
# @return $dic
#       Reference to the dictionary metadata data block that
#       was replaced by the new one.
##
sub set_ddl1_dic_metadata_block
{
    my ($dic, $block) = @_;

    my $old_block = $dic->{'dictionary'};
    $dic->{'dictionary'} = $block;

    return $old_block;
}

##
# Returns the filename of the file from which the DDL1 dictionary
# data structure was constructed.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @return $filename
#       String that contains the dictionary filename or
#       'undef' if the filename could not be retrieved.
##
sub get_ddl1_dic_filename
{
    my ($dic) = @_;

    return if !exists $dic->{'optional_fields'}{'filename'};
    return $dic->{'optional_fields'}{'filename'}
}

##
# Sets the filename of the file from which the DDL1 dictionary
# data structure was constructed.
#
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @param $filename
#       String that contains the new dictionary filename.
# @return $dic
#       Dictionary filename that was replaced by the new one.
##
sub set_ddl1_dic_filename
{
    my ($dic, $filename) = @_;

    my $old_filename = $dic->{'optional_fields'}{'filename'};
    $dic->{'optional_fields'}{'filename'} = $filename;

    return $old_filename;
}

##
# Adds data items with the default values to the given data block
# if they are not already present in the data block.
#
# @param $data_block
#       Reference to a data block or a save frame as returned by
#       the COD::CIF::Parser that should be modified.
# @param $ddl1_defaults
#       Reference to a hash containing default values of data items
#       that appears in DDL1 data item definitions.
# @return
#       Reference to the data block with the default data items added.
##
sub add_default_data_items
{
    my ($data_block, $default_values) = @_;

    for my $tag ( keys %{$default_values} ) {
        if ( !exists $data_block->{'values'}{$tag} ) {
            $data_block->{'values'}{$tag} = [ $default_values->{$tag} ];
        }
    }

    return $data_block;
}

##
# Validates a CIF data block against a set of DDL1 dictionaries.
#
# @param $data_block
#       Reference to a data block or a save frame as returned by
#       the COD::CIF::Parser.
# @param $validation_resources
#       Reference to a validation resource data structure of
#       the following form:
#       {
#       # Reference to a hash of dictionary data structures as
#       # returned by the build_ddl1_dic() subroutine:
#           'dictionaries' => {
#               'path_to_dictionary_file_A' => {
#                   ...
#                },
#               'path_to_dictionary_file_B' => {
#                   ...
#                },
#                ...,
#               'path_to_dictionary_file_Z' => {
#                   ...
#                },
#            },
#       # Reference to an auxiliary search data structure of a DDL1 dictionary
#       # as returned by the build_auxiliary_dic_search_structure() subroutine:
#           'auxiliary_search_structure' => {
#               'item_to_category' => { ... },
#               'category_to_mandatory_items' => { ... },
#               'sub_key_to_main_key' => { ... },
#           }
#       }
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#         # Report data items that have been replaced by other data items
#           'report_deprecated' => 0,
#         # Ignore the case while matching enumerators
#           'ignore_case' => 0,
#         # Array reference to a list of data items that should be
#         # treated as potentially having values consisting of a
#         # combination of several enumeration values. Data items
#         # are identified by data names.
#           'enum_as_set_tags' => ['_atom_site_refinement_flags'],
#         # Multiplier that should be applied to the standard
#         # uncertainty (s.u.) when determining if a numeric
#         # value resides in the specified range. For example,
#         # a multiplier of 3.5 means that the value is treated
#         # as valid if it falls in the interval of
#         # [lower bound - 3.5 * s.u.; upper bound + 3.5 * s.u.]
#         # Default: 3
#           'range_su_multiplier' => 3,
#         # Treat numbers expressed using the double precision notation
#         # (i.e. 0.42D+7) as proper numbers
#           'allow_double_precision_notation'  => 0,
#         # Maximum number of validation issues that are reported for
#         # each unique combination of validation criteria and validated
#         # data items. Negative values remove the limit altogether.
#           'max_issue_count' => 5,
#       }
# @return
#       Array reference to a list of validation messages.
##
sub ddl1_validate_data_block
{
    my ( $data_block, $validation_resources, $options ) = @_;

    $options = {} if !defined $options;
    my $report_deprecated   = exists $options->{'report_deprecated'} ?
                                     $options->{'report_deprecated'} : 0;
    my $ignore_case         = exists $options->{'ignore_case'} ?
                                     $options->{'ignore_case'} : 0;
    my $enum_as_set_tags    = exists $options->{'enum_as_set_tags'} ?
                                     $options->{'enum_as_set_tags'} : [];
    my $range_su_multiplier = exists $options->{'range_su_multiplier'} ?
                                     $options->{'range_su_multiplier'} : 3;
    my $allow_d_notation    = exists $options->{'allow_double_precision_notation'} ?
                                     $options->{'allow_double_precision_notation'} : 0;
    my $max_issue_count     = exists $options->{'max_issue_count'} ?
                                     $options->{'max_issue_count'} : -1;

    my $auxiliary_search_structure =
                    $validation_resources->{'auxiliary_search_structure'};

    my @issues;
    for my $loop_tags ( @{$data_block->{'loops'}} ) {
        push @issues,
             @{check_loop_category_homogeneity(
                 $loop_tags,
                 $auxiliary_search_structure->{'item_to_category'}
             )};
        push @issues,
             @{check_loop_mandatory_item_presence(
                 $loop_tags,
                 $auxiliary_search_structure
             )};
    }

    my $dics = $validation_resources->{'dictionaries'};
    for my $dic ( @{$dics} ) {
        push @issues,
             @{ ddl1_validate_data_block_against_single_dic(
                    $data_block, $dic,
                    {
                        'report_deprecated'
                                    => $report_deprecated,
                        'ignore_case'
                                    => $ignore_case,
                        'enum_as_set_tags'
                                    => $enum_as_set_tags,
                        'range_su_multiplier'
                                    => $range_su_multiplier,
                        'allow_double_precision_notation'
                                    => $allow_d_notation,
                        'sub_key_to_main_key'
                                    => $auxiliary_search_structure->
                                                {'sub_key_to_main_key'},
                    }
                )
             };
    };

    my @validation_messages;
    if ( $max_issue_count < 0 ) {
        push @validation_messages, map { $_->{'message'} } @issues;
    } else {
        my %grouped_issues;
        for my $issue ( @issues ) {
            my $constraint = $issue->{'test_type'};
            my $data_name_key = join "\x{001E}", @{$issue->{'data_items'}};
            push @{$grouped_issues{$constraint}{$data_name_key}}, $issue;
        }

        # TODO: move hash out of the subroutine
        my %test_types = (
            'SIMPLE_KEY_UNIQUENESS'    =>
                'simple loop key uniqueness',
            'COMPOSITE_KEY_UNIQUENESS' =>
                'composite loop key uniqueness',
            'LOOP.INDIVIDUAL_UNIQUE_VALUES' =>
                'value uniqueness',
            'LOOP.COLLECTIVELY_UNIQUE_VALUES' =>
                'collective value uniqueness',
            'LOOP.INTEGRITY'      =>
                'loop integrity',
            'LOOP.CATEGORY_HOMOGENEITY' =>
                'items in a looped list all belonging to the same category',
            'LOOP.MANDATORY_ITEM_PRESENCE' =>
                'mandatory item presence in a category loop',
            'KEY_ITEM_PRESENCE'       =>
                'mandatory key item presence',
            'ITEM_REPLACEMENT.PRESENCE_OF_REPLACED' =>
                'replaced data item presence',
            'ITEM_REPLACEMENT.SIMULTANEOUS_PRESENCE' =>
                'simultaneous presence of replaced and replacing items',
            'LOOP_CONTEXT.MUST_APPEAR_IN_LOOP' =>
                'data items that incorrectly appear outside of a looped list',
            'LOOP_CONTEXT.MUST_NOT_APPEAR_IN_LOOP' =>
                'data items that incorrectly appear inside of a looped list',
            'PRESENCE_OF_PARENT_DATA_ITEM' =>
                'parent data item presence',
            'PRESENCE_OF_PARENT_DATA_ITEM_VALUE' =>
                'parent data item value presence',
            'ENUMERATION_SET' =>
                'data value belonging to the specified enumeration set',
            'SU_ELIGIBILITY' =>
                'data value standard uncertainty eligibility',
            'ENUM_RANGE.CHAR_STRING_LENGTH' =>
                'data value belonging to a character range and ' .
                'consisting of more than one symbol',
            'ENUM_RANGE.IN_RANGE' =>
                'data value belonging to the specified value range',
            'TYPE_CONSTRAINT.QUOTED_NUMERIC_VALUES' =>
                'proper quote usage with numeric values',
            'TYPE_CONSTRAINT.PROPER_NUMERIC_VALUES' =>
                'data value conformance to the numeric data type',
        );

        for my $constraint (sort keys %grouped_issues) {
            for my $data_name_key (sort keys %{$grouped_issues{$constraint}}) {
                my @group_issues = @{$grouped_issues{$constraint}{$data_name_key}};
                my $group_size = scalar(@group_issues);

                my $description;
                if ( defined $test_types{$constraint} ) {
                    $description = $test_types{$constraint};
                }

                if ( $group_size > $max_issue_count ) {
                    warn 'NOTE, a test ' .
                         (defined $description ? "of $description " : '') .
                         'involving the [' .
                         ( join ', ',
                            map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                @{$group_issues[0]->{'data_items'}} ) .
                        "] data items resulted in $group_size validation messages " .
                        '-- the number of reported messages is limited to ' .
                        "$max_issue_count" . "\n";
                    $group_size = $max_issue_count;
                }

                push @validation_messages,
                        map { $_->{'message'} } @group_issues[0..($group_size - 1)];
            }
        }
    }

    return \@validation_messages;
}

##
# Validates a CIF data block against a single DDL1 dictionary.
#
# @param $data_block
#       Reference to a data block or a save frame as returned by
#       the COD::CIF::Parser.
# @param $dic
#       Reference to a DDL1 dictionary data structure as returned by
#       the build_ddl1_dic() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#         # Report data items that have been replaced by other data items
#           'report_deprecated' => 0,
#         # Ignore the case while matching enumerators
#           'ignore_case' => 0,
#         # Array reference to a list of data items that should be
#         # treated as potentially having values consisting of a
#         # combination of several enumeration values. Data items
#         # are identified by data names.
#           'enum_as_set_tags' => ['_atom_site_refinement_flags'],
#         # Multiplier that should be applied to the standard
#         # uncertainty (s.u.) when determining if a numeric
#         # value resides in the specified range. For example,
#         # a multiplier of 3.5 means that the value is treated
#         # as valid if it falls in the interval of
#         # [lower bound - 3.5 * s.u.; upper bound + 3.5 * s.u.]
#         # Default: 3
#           'range_su_multiplier' => 3,
#         # Treat numbers expressed using the double precision notation
#         # (i.e. 0.42D+7) as proper numbers
#           'allow_double_precision_notation'  => 0,
#         # Reference to a hash that maps subcategory reference data items
#         # to reference items of the parent category as returned by
#         # the get_subcategory_key_to_category_key_mapping() subroutine
#           'sub_key_to_main_key' => {
#               'sub_1_key_item_name' => 'main_1_key_item_name',
#               'sub_2_key_item_name' => 'main_1_key_item_name',
#               ...,
#               'sub_n_key_item_name' => 'main_m_key_item_name',
#           },
#       }
# @return
#       Array reference to a list of validation message data structures
#       of the following form:
#       {
#         # Code of the data block that contains the offending entry
#           'data_block_code' => 'offending_block_code',
#         # Code of the save frame that contains the offending entry.
#         # Might be undefined
#           'save_frame_code' => 'offending_frame_code',
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Human-readable description of the issue
#           'message' => 'issue description'
#       }
##
sub ddl1_validate_data_block_against_single_dic
{
    my ( $data_block, $dic, $options ) = @_;

    $options = {} if !defined $options;
    my $report_deprecated =
                    exists $options->{'report_deprecated'} ?
                           $options->{'report_deprecated'} : 0;
    my $ignore_case =
                    exists $options->{'ignore_case'} ?
                           $options->{'ignore_case'} : 0;
    my $enum_as_set_tags =
                    exists $options->{'enum_as_set_tags'} ?
                           $options->{'enum_as_set_tags'} : [];
    my $range_su_multiplier =
                    exists $options->{'range_su_multiplier'} ?
                           $options->{'range_su_multiplier'} : 3;
    my $allow_d_notation =
                    exists $options->{'allow_double_precision_notation'} ?
                           $options->{'allow_double_precision_notation'} : 0;
    my $sub_key_to_main_key =
                    exists $options->{'sub_key_to_main_key'} ?
                           $options->{'sub_key_to_main_key'} : {};

    my $dic_items = get_ddl1_dic_items( $dic );

    my @issues = @{
            validate_block_loops(
                $data_block,
                $dic_items,
                {
                    'sub_key_to_main_key' => $sub_key_to_main_key
                }
            )
        };

    for my $tag ( @{$data_block->{'tags'}} ) {
        my $lc_tag = lc $tag;

        next if !exists $dic_items->{$lc_tag};

        if( $report_deprecated ) {
            push @issues,
                 @{ report_deprecated( $data_block, $tag, $dic_items ) }
        };

        push @issues,
             @{ validate_list_unique_key( $data_block, $tag, $dic_items ) };

        my $dic_item = $dic_items->{$lc_tag};

        push @issues,
             @{ validate_list_mandatory( $data_block, $tag, $dic_item ) };

        push @issues,
             @{ check_list_link_parent( $data_block, $tag, $dic_item ) };

        push @issues,
             @{ validate_enumeration_set(
                    $data_block, $tag, $dic_item,
                    {
                        'ignore_case'  => $ignore_case,
                        'treat_as_set' => any { lc($_) eq $lc_tag }
                                                        @{$enum_as_set_tags}
                    }
             ) };

       push @issues,
            @{ validate_su( $data_block, $tag, $dic_item ) };

       push @issues,
            @{ validate_range(
                    $data_block, $tag, $dic_item,
                    {
                        'range_su_multiplier' => $range_su_multiplier,
                    }
            ) };

       push @issues,
            @{ validate_data_type(
                    $data_block, $tag, $dic_item,
                    {
                        'allow_double_precision_notation' => $allow_d_notation,
                    }
            ) };
    }

    @issues = @{ summarise_validation_issues( \@issues ) };

    return \@issues;
}

##
# Constructs an auxiliary search data structure of shared DDL1 dictionary
# properties extracted from all provided DDL1 dictionaries. The data structure
# allows to properly resolve entities that are defined in one dictionary, but
# referenced in a different dictionary (i.e. categories).
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures
#       as returned by the build_ddl1_dic() subroutine.
# @param \%auxiliary_search_structure
#       Reference to a data structure of the following form:
#       {
#       # Reference to a hash that maps each data item to the category
#       # that the it belongs to:
#           'item_to_category' => {
#               'data_name_a' => 'category_name_1',
#               'data_name_b' => 'category_name_1',
#               'data_name_c' => 'category_name_3',
#               ...
#               'data_name_f' => 'category_name_n',
#           },
#       # Reference to a hash that maps categories to data items that must
#       # appear in looped list of that category:
#           'category_to_mandatory_items' => {
#               'category_name_1' => [
#                   'data_name_a',
#                   'data_name_b',
#                   ...
#                   'data_name_c',
#               ],
#
#               ...,
#
#               'category_name_n' => [
#                   ...
#               ]
#           },
#       # Reference to a hash that maps subcategory reference data items
#       # to reference items of the parent category as returned by
#       # the get_subcategory_key_to_category_key_mapping() subroutine
#           'sub_key_to_main_key' => {
#               'sub_1_key_item_name' => 'main_1_key_item_name',
#               'sub_2_key_item_name' => 'main_1_key_item_name',
#               ...,
#               'sub_n_key_item_name' => 'main_m_key_item_name',
#           }
#       # Reference to a hash that maps data item names to the names of their
#       # alternate data items:
#           'item_to_alternate_items' => {
#               'item_1' => [ 'alternate_item_1', ],
#               'item_2' => [ 'alternate_item_2_a', ..., 'alternate_item_2_z', ],
#               ...,
#               'item_n' => [ 'alternate_item_n', ],
#           },
#       }
##
sub build_auxiliary_dic_search_structure
{
    my ($dics) = @_;

    my %auxiliary_search_structure;
    $auxiliary_search_structure{'item_to_category'} =
                            get_item_to_category_mapping($dics);
    $auxiliary_search_structure{'category_to_mandatory_items'} =
                            get_category_to_mandatory_items_mapping($dics);
    $auxiliary_search_structure{'sub_key_to_main_key'} =
                            get_subcategory_key_to_category_key_mapping($dics);
    $auxiliary_search_structure{'item_to_alternate_items'} =
                            get_item_to_alternate_items_mapping($dics);

    return \%auxiliary_search_structure;
}

##
# Constructs a hash that maps data item names to the names of the categories
# that they belong to as defined in the defining DDL1 dictionaries.
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures
#       as returned by the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of the following form:
#       {
#           'data_name_a' => 'category_name_1',
#           'data_name_b' => 'category_name_1',
#           'data_name_c' => 'category_name_3',
#           ...
#           'data_name_f' => 'category_name_n'
#       }
##
sub get_item_to_category_mapping
{
    my ($dics) = @_;

    my %item_to_category;
    for my $dic (@{$dics}) {
        my $items = get_ddl1_dic_items($dic);
        for my $tag (sort keys %{$items}) {
            my $category = get_category_name( $items->{$tag} );
            next if !defined $category;
            $item_to_category{$tag} = lc $category;
        }
    }

    return \%item_to_category;
}

##
# Constructs a hash that maps category names to the names of data items
# that must appear in looped list of that category.
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures
#       as returned by the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of the following form:
#       {
#         'category_name_1' => [
#               'data_name_a',
#               'data_name_b',
#               ...
#               'data_name_c',
#         ],
#
#         ...,
#
#         'category_name_n' => [
#           ...
#         ]
#       }
##
sub get_category_to_mandatory_items_mapping
{
    my ($dics) = @_;

    my %category_to_mandatory_items;
    for my $dic (@{$dics}) {
        my $items = get_ddl1_dic_items($dic);
        for my $tag (sort keys %{$items}) {
            my $category = get_category_name( $items->{$tag} );
            next if !defined $category;
            if ( get_list_mandatory_flag($items->{$tag}) eq 'yes' ) {
                push @{$category_to_mandatory_items{$category}}, $tag;
            }
        }
    }

    return \%category_to_mandatory_items;
}

##
# Constructs a hash that maps subcategory reference data items to
# reference items of the parent category.
#
# Examples of a parent and child categories (subcategories) provided in
# official IUCr sources [1,2] describe the way subcategory keys should
# be defined and used. A posts in the official IUCr mailing list [3]
# clarifies it even further:
#
#   "When the _list_link_parent data name is of the same category as the
#   defined data name, the _list_link_parent data name and the defined name
#   may be considered as a single combined definition which can be referred
#   to by either of the original data names for the purposes of resolving
#   _list_reference and _list_mandatory requirements."
#
# The official IUCr dictionaries currently contain a single instance of
# such relationship between the '_atom_site_label' and '_atom_site_aniso_label'
# data items.
#
# @source [1]
#       2.5.5.7. Definition example 7: joinable lists,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 56, doi: 10.1107/97809553602060000107
# @source [2]
#       3.1.5.4.2. Looped data,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 78, doi: 10.1107/97809553602060000107
# @source [3]
#       https://www.iucr.org/__data/iucr/lists/cif-developers/msg00197.html
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures
#       as returned by the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of the following form:
#       {
#         'sub_1_key_item_name' => 'main_1_key_item_name',
#         'sub_2_key_item_name' => 'main_1_key_item_name',
#         ...,
#         'sub_n_key_item_name' => 'main_m_key_item_name',
#       }
##
sub get_subcategory_key_to_category_key_mapping
{
    my ($dics) = @_;

    my %category;
    for my $dic (@{$dics}) {
        my $items = get_ddl1_dic_items($dic);
        for my $tag (sort keys %{$items}) {
            my $data_item = $items->{$tag};
            my $name = get_category_name( $data_item );
            next if !defined $name;
            next if exists $category{$name}{'items'}{$tag};
            $category{$name}{'items'}{$tag} = $data_item
        }
    }

    my %sub_key_to_main_key;
    for my $name (keys %category) {
        my $items = $category{$name}{'items'};
        my $list_reference_groups = get_all_list_references($items);
        next if scalar (@{$list_reference_groups} < 2);
        my @potential_sub_keys;
        for my $group (@{$list_reference_groups}) {
            next if scalar @{$group->{'key_data_items'}} != 1;
            my $key_name = $group->{'key_data_items'}[0];
            if (exists $items->{$key_name}{'values'}{'_list_link_parent'}) {
                push @potential_sub_keys, $key_name;
            }
        }

        for my $key_name (@potential_sub_keys) {
            my $parent_name = $items->{$key_name}{'values'}{'_list_link_parent'}[0];
            next if none { $_ eq $parent_name } keys %{$items};
            $sub_key_to_main_key{$key_name} = $parent_name;
        }
    }

    return \%sub_key_to_main_key;
}

##
# BEGIN: DDL1 dictionary merging subroutines
##

##
# Merges several DDL1 dictionaries into a single virtual DDL1 dictionary.
#
# The merging algorithm is implemented according to the official IUCr
# dictionary merging protocol [1].
#
# The protocol describes three merging modes: STRICT, REPLACE and OVERLAY.
# This subroutine implements only the OVERLAY mode. A short description of
# the mode as provided in the protocol:
# "New attributes are added to those already stored for the data name;
#  conflicting attributes replace those already stored."
#
# For a more detailed description of the merging protocol and modes,
# consult the original source.
#
# @source [1]
#       3.1.9.1. A dictionary merging protocol,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 87-89, doi: 10.1107/97809553602060000107
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures that should
#       be merged as returned by the build_ddl1_dic() subroutine. The first
#       dictionary in the array serves as the base dictionary with the rest
#       of the dictionaries being merged in sequential order.
# @return $base_dic
#       Reference to a data structure of the following form or
#       'undef' if the dictionaries could not be merged:
#       {
#       # Reference to a merged DDL1 dictionary data structure
#         'dictionary' => { ... },
#       # Reference to an array of merging issues
#         'merge_issues' => [
#           {
#           # Names of data items in the definition that could not be merged
#             'item_names'         => [ 'data_name_1', ..., 'data_name_n' ],
#           # Name of the data item that the definition defines
#             'defined_item_name'  => 'definition_item_name',
#           # Definition block code
#             'block_code'         => 'definition_block_code',
#           # Filename of the dictionary which contains the definition block
#             'filename'           => '/path/to/the/original/file.dic',
#           # Human-readable description of the merge issue
#             'message'            => 'item could not be merged',
#           # '1' if issue was found in the base definition,
#           # '0' if issues was found in the new definition
#             'is_base_definition' => 0,
#           },
#           ...
#         ]
#       }
##
sub merge_ddl1_dics
{
    my ($dics) = @_;

    return if !@{$dics};
    return if @{$dics} == 1;

    my @merge_issues;
    my $base_dic = $dics->[0];
    for (my $i = 1; $i < @{$dics}; $i++) {
        my $merge_results =
                    merge_dic_pair_in_overlay_mode($base_dic, $dics->[$i]);
        $base_dic = $merge_results->{'dictionary'};
        push @merge_issues, @{$merge_results->{'merge_issues'}};
    }

    my @metadata_blocks = map {
                            defined get_ddl1_dic_metadata_block($_) ?
                                    get_ddl1_dic_metadata_block($_) :
                                    new_datablock('on_this_dictionary')
                            } @{$dics};
    set_ddl1_dic_metadata_block( merge_metadata_blocks( \@metadata_blocks ) );
    set_ddl1_dic_filename($base_dic, undef);

    my $merge_results = {
        'dictionary'   => $base_dic,
        'merge_issues' => \@merge_issues,
    };

    return $merge_results;
}

##
# Constructs a metadata data block of a virtual DDL1 dictionary
# from the metadata data blocks of other DDL1 dictionaries.
#
# @param $blocks
#       Reference to an array of DDL1 dictionary metadata data blocks.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Dictionary name that should be recorded in the metadata block
#           'name'    => 'merged.dic',
#       # Version number that should be recorded in the metadata block
#       # (default: '1.0')
#           'version' => '1.0',
#       }
# @return
#       Reference to a metadata data block of a virtual DDL1 dictionary.
##
sub merge_metadata_blocks
{
    my ($blocks, $options) = @_;

    $options = {} if !defined $options;

    my $name    = exists $options->{'name'} ?
                         $options->{'name'} :
                         undef;
    my $version = exists $options->{'version'} ?
                         $options->{'version'} :
                         '1.0';

    my $merged_block = new_datablock('on_this_dictionary', '1.1');
    set_tag($merged_block, '_dictionary_version', $version);

    my ($day, $month, $year) = (localtime)[3,4,5];
    my $date = sprintf('%04d-%02d-%02d', $year+1900, $month+1, $day);
    set_tag($merged_block, '_dictionary_update', $date);

    if (!defined $name) {
        $name = `hostname`;
        chomp($name);
        $name = unicode2cif($name);
        $name = "$name-$$-$date";
    };
    set_tag($merged_block, '_dictionary_name', $name);

    my $history_header = <<"HISTORY_HEADER";
 $date Created the composite dictionary.

 The following dictionaries identified by [name, version, update date]
 were merged in the given order:

HISTORY_HEADER

    my $dic_count = 0;
    my $history = '';
    my @tags = ('_dictionary_name', '_dictionary_version', '_dictionary_update');
    for my $block (@{$blocks}) {
        $dic_count++;
        my @metadata;
        for my $tag (@tags) {
            if (defined $block->{'values'}{$tag}) {
                push @metadata, $block->{'values'}{$tag}[0];
            } else {
                push @metadata, undef;
            }
        }
        $history_header .= " [$dic_count]. [" .
            ( join ', ', map { defined $_ ? "'$_'" : 'undef' } @metadata ) .
            ']' . ( @{$blocks} == $dic_count ? '.' : ';') . "\n";

        next if !exists $block->{'values'}{'_dictionary_history'};
        $history .= " History of dictionary [$dic_count]:\n";
        $history .= $block->{'values'}{'_dictionary_history'}[0];
        $history .= "\n\n";
    }
    $history = $history_header . "\n" . $history;
    set_tag($merged_block, '_dictionary_name', $history);

    return $merged_block;
}

##
# Merges two DDL1 dictionaries into a single virtual DDL1 dictionary
# using the OVERLAY mode.
#
# The merging algorithm is implemented according to the official IUCr
# dictionary merging protocol [1].
#
# For a more detailed description of the merging protocol and modes,
# consult the original source.
#
# @source [1]
#       3.1.9.1. A dictionary merging protocol,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 87-89, doi: 10.1107/97809553602060000107
#
# @param $base_dic
#       Reference to a DDL1 dictionary data structure that should serve
#       as the base dictionary.
# @param $new_dic
#       Reference to a DDL1 dictionary data structure that should be merged
#       into the base dictionary.
# @param $dics
#       Reference to an array of DDL1 dictionary data structures that should
#       be merged as returned by the build_ddl1_dic() subroutine. The first
#       dictionary in the array serves as the base dictionary with the rest
#       of the dictionaries being merged in sequential order.
#
# @return
#       Reference to a data structure of the following form:
#       {
#       # Reference to a merged DDL1 dictionary data structure
#         'dictionary' => { ... },
#       # Reference to an array of merging issues
#         'merge_issues' => [
#           {
#           # Names of data items in the definition that could not be merged
#             'item_names'         => [ 'data_name_1', ..., 'data_name_n' ],
#           # Name of the data item that the definition defines
#             'defined_item_name'  => 'definition_item_name',
#           # Definition block code
#             'block_code'         => 'definition_block_code',
#           # Filename of the dictionary which contains the definition block
#             'filename'           => '/path/to/the/original/file.dic',
#           # Human-readable description of the merge issue
#             'message'            => 'item could not be merged',
#           # '1' if issue was found in the base definition,
#           # '0' if issues was found in the new definition
#             'is_base_definition' => 0,
#           },
#           ...
#         ]
#       }
##
sub merge_dic_pair_in_overlay_mode
{
    my ($base_dic, $new_dic) = @_;

    my @merge_issues;
    my $base_dic_items = get_ddl1_dic_items($base_dic);
    my $new_dic_items = get_ddl1_dic_items($new_dic);
    for my $tag (sort keys %{$new_dic_items}) {
        if (exists $base_dic_items->{$tag}) {
            my $merge_results = merge_dic_item_definitions(
                $base_dic_items->{$tag},
                $new_dic_items->{$tag}
            );
            $base_dic_items->{$tag} = $merge_results->{'definition'};
            push @merge_issues, @{$merge_results->{'merge_issues'}};
        } else {
            $base_dic_items->{$tag} = $new_dic_items->{$tag}
        }
    }

    my $base_dic_categories = get_ddl1_dic_categories($base_dic);
    my $new_dic_categories = get_ddl1_dic_items($new_dic);
    for my $name (sort keys %{$new_dic_categories}) {
        if (exists $base_dic_categories->{$name}) {
            my $merge_results = merge_dic_item_definitions(
                                  $base_dic_categories->{$name},
                                  $new_dic_categories->{$name}
                                );
            $base_dic_categories->{$name} = $merge_results->{'definition'};
            push @merge_issues, @{$merge_results->{'merge_issues'}};
        } else {
            $base_dic_categories->{$name} = $new_dic_categories->{$name}
        }
    }

    for my $issue (@merge_issues) {
        if ($issue->{'is_base_definition'}) {
            $issue->{'filename'} = get_ddl1_dic_filename($base_dic);
        } else {
            $issue->{'filename'} = get_ddl1_dic_filename($new_dic);
        }
    }

    my $merge_results = {
        'dictionary'   => $base_dic,
        'merge_issues' => \@merge_issues,
    };

    return $merge_results;
}

##
# Merges two DDL1 data item definitions into a single definition.
#
# The merging algorithm is implemented according to the official IUCr
# dictionary merging protocol [1].
#
# The protocol describes three merging modes: STRICT, REPLACE and OVERLAY.
# This subroutine implements only the OVERLAY mode. A short description of
# the mode as provided in the protocol:
# "New attributes are added to those already stored for the data name;
#  conflicting attributes replace those already stored."
#
# For a more detailed description of the merging protocol and modes,
# consult the original source.
#
# @source [1]
#       3.1.9.1. A dictionary merging protocol,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 87-89, doi: 10.1107/97809553602060000107
#
# @param $base_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a lower priority than those provided in the new definition.
# @param $new_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a higher priority than those provided in the base definition.
# @return
#       Reference to a data structure of the following form:
#       {
#       # Reference to a merged definition
#         'definition' => { ... },
#       # Reference to an array of merging issues
#         'merge_issues' => [
#           {
#           # Names of data items in the definition that could not be merged
#             'item_names'         => [ 'data_name_1', ..., 'data_name_n' ],
#           # Name of the data item that the definition defines
#             'defined_item_name'  => 'definition_item_name',
#           # Definition block code
#             'block_code'         => 'definition_block_code',
#           # Human-readable description of the merge issue
#             'message'            => 'item could not be merged',
#           # '1' if issue was found in the base definition,
#           # '0' if issues was found in the new definition
#             'is_base_definition' => 0,
#           },
#           ...
#         ]
#       }
##
sub merge_dic_item_definitions
{
    my ($base_definition, $new_definition) = @_;

    my $merged_definition = clone( $base_definition );

    $merged_definition = merge_scalar_items_in_overlay_mode(
                            $merged_definition,
                            $new_definition
                         );

    my $merge_results = merge_looped_items_in_overlay_mode(
                            $merged_definition,
                            $new_definition
                        );

    for my $issue (@{$merge_results->{'merge_issues'}}) {
        if ($issue->{'is_base_definition'}) {
            $issue->{'message'} = 'data items [' .
                ( join ', ', map { "'$_'" } @{$issue->{'item_names'}} ) .
                '] were left unchanged in the base definition of ' .
                "the '$issue->{'defined_item_name'}' data item due to issues " .
                'in the definition data block -- ' . $issue->{'message'};
        } else {
            $issue->{'message'} = 'data items [' .
                ( join ', ', map { "'$_'" } @{$issue->{'item_names'}} ) .
                '] were not merged into the base definition of ' .
                "the '$issue->{'defined_item_name'}' data item due to ".
                'issues in the definition data block -- ' . $issue->{'message'};
        }
    }

    return $merge_results;
}

##
# Merges scalar data items from the new definition into the base definition
# according to the rules of the OVERLAY mode.
#
# @param $base_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a lower priority than those provided in the new definition.
# @param $new_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a higher priority than those provided in the base definition.
# @return
#       Reference to the base definition block with the merged scalar items.
##
sub merge_scalar_items_in_overlay_mode
{
    my ($base_definition, $new_definition) = @_;

    ##
    # The list of scalar data item were manually compiled
    # from the 'ddl_core.dic' dictionary. Metadata of the
    # source dictionary:
    #
    # Dictionary name: ddl_core.dic
    # Dictionary version: 1.4.1
    # Last updated on: 2005-06-29
    # Retrieved on: 2020-02-19
    # Retrieved from: ftp://ftp.iucr.org/pub/ddl_core.dic
    ##
    my @scalar_item_names = qw(
        _category
        _definition
        _enumeration_default
        _enumeration_range
        _list
        _list_level
        _list_mandatory
        _type
        _type_construct
        _units
        _units_detail
    );

    for my $tag ( @scalar_item_names ) {
        next if !exists $new_definition->{'values'}{$tag};
        set_tag($base_definition, $tag, $new_definition->{'values'}{$tag}[0]);
    }

    return $base_definition;
}

##
# Merges potentially looped data items from the new definition into the
# base definition according to the rules of the OVERLAY mode.
#
# @param $base_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a lower priority than those provided in the new definition.
# @param $new_definition
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser. Data item values provided in this definition
#       have a higher priority than those provided in the base definition.
# @return
#       Reference to a data structure of the following form:
#       {
#       # Reference to a definition with merged looped items
#         'definition' => { ... },
#       # Reference to an array of merging issues
#         'merge_issues' => [
#           {
#           # Names of data items in the definition that could not be merged
#             'item_names'         => [ 'data_name_1', ..., 'data_name_n' ],
#           # Name of the data item that the definition defines
#             'defined_item_name'  => 'definition_item_name',
#           # Definition block code
#             'block_code'         => 'definition_block_code',
#           # Human-readable description of the merge issue
#             'message'            => 'item could not be merged',
#           # '1' if issue was found in the base definition,
#           # '0' if issues was found in the new definition
#             'is_base_definition' => 0,
#           },
#           ...
#         ]
#       }
##
sub merge_looped_items_in_overlay_mode
{
    my ($base_definition, $new_definition) = @_;

    ##
    # The list of potentially looped data item were manually compiled
    # from the 'ddl_core.dic' dictionary. Metadata of the source dictionary:
    #
    # Dictionary name: ddl_core.dic
    # Dictionary version: 1.4.1
    # Last updated on: 2005-06-29
    # Retrieved on: 2020-02-19
    # Retrieved from: ftp://ftp.iucr.org/pub/ddl_core.dic
    ##
    my %category_key_to_items = (
        # NOTE: the '_name' property should have been used to identify
        # the definitions that are being merged so there is no need to
        # process it any further
        # '_name'               => '_name',
        '_enumeration'        => [ '_enumeration_detail' ],
        '_example'            => [ '_example_detail' ],
        '_list_link_child'    => [],
        '_list_link_parent'   => [],
        '_list_reference'     => [],
        '_list_uniqueness'    => [],
        '_related_item'       => [ '_related_function' ],
        '_type_conditions'    => [],
    );

    my @merge_issues;
    for my $key_tag (sort keys %category_key_to_items) {
        my $new_looped_items = classify_looped_items_by_mergeability(
                                  $new_definition,
                                  $key_tag,
                                  $category_key_to_items{$key_tag}
                               );

        for my $tag (keys %{$new_looped_items->{'unmergeable'}}) {
            push @merge_issues,
                 {
                     'item_names'         => [ $key_tag, @{$category_key_to_items{$key_tag}} ],
                     'defined_item_name'  => $new_definition->{'values'}{'_name'}[0],
                     'block_code'         => $new_definition->{'name'},
                     'message'            => $new_looped_items->{'unmergeable'}{$tag},
                     'is_base_definition' => 0,
                 }
        }
        next if %{$new_looped_items->{'unmergeable'}};
        next if !%{$new_looped_items->{'mergeable'}};

        my $base_looped_items = classify_looped_items_by_mergeability(
                                  $base_definition,
                                  $key_tag,
                                  $category_key_to_items{$key_tag}
                                );

        for my $tag (keys %{$base_looped_items->{'unmergeable'}}) {
            push @merge_issues,
                 {
                     'item_names'         => [ $key_tag, @{$category_key_to_items{$key_tag}} ],
                     'defined_item_name'  => $base_definition->{'values'}{'_name'}[0],
                     'block_code'         => $base_definition->{'name'},
                     'message'            => $base_looped_items->{'unmergeable'}{$tag},
                     'is_base_definition' => 1,
                 }
        }
        next if %{$base_looped_items->{'unmergeable'}};

        eval {
            $new_looped_items->{'mergeable'} =
                                            collapse_identical_loop_rows(
                                               $base_looped_items->{'mergeable'},
                                               $new_looped_items->{'mergeable'},
                                               $key_tag
                                           );
        };
        if ($@) {
            push @merge_issues,
                 {
                     'item_names'         => [ $key_tag, @{$category_key_to_items{$key_tag}} ],
                     'defined_item_name'  => $new_definition->{'values'}{'_name'}[0],
                     'block_code'         => $new_definition->{'name'},
                     'message'            => $@,
                     'is_base_definition' => 1,
                 };
            next;
        }

        my $merged_loop = merge_looped_values(
                             $base_looped_items->{'mergeable'},
                             $new_looped_items->{'mergeable'}
                          );

        # Set the loop key item to create a new loop if needed
        exclude_tag($base_definition, $key_tag);
        my $is_looped = (@{$merged_loop->{$key_tag}} > 1);
        if ($is_looped) {
            set_loop_tag(
                $base_definition,
                $key_tag,
                $key_tag,
                $merged_loop->{$key_tag}
            );
        } else {
            set_tag(
                $base_definition,
                $key_tag,
                $merged_loop->{$key_tag}[0]
            );
        }
        delete $merged_loop->{$key_tag};
        # Set the rest of the item in the loop
        for my $tag (sort keys %{$merged_loop}) {
            exclude_tag($base_definition, $tag);
            if ($is_looped) {
                set_loop_tag(
                    $base_definition,
                    $tag,
                    $key_tag,
                    $merged_loop->{$tag}
                );
            } else {
                set_tag(
                    $base_definition,
                    $tag,
                    $merged_loop->{$tag}[0]
                );
            }
        }
    }

    my $merge_results = {
        'definition'   => $base_definition,
        'merge_issues' => \@merge_issues,
    };

    return $merge_results;
}

##
# Identifies which of the data items from a looped category are mergeable
# and which are not.
#
# @param $definition_block
#       Reference to a DDL1 data item definition block as returned by
#       the COD::CIF::Parser.
# @param $key_tag
#       Name of the data item that acts as the looped list reference
#       of the checked category.
# @param $category_tags
#       Reference to an array of data items that belong to the
#       same category as the looped list reference item. Data
#       items are identified by their names.
# @return
#       Reference to a data structure of the following form:
#       {
#       # Mergeable data items are identified by their data names
#       # and returned together with the associated data values
#           'mergeable' => {
#               'mergeable_item_a' => {
#                   'values' => [
#                       'value_a_1',
#                       'value_a_2',
#                       ...,
#                       'value_a_n'
#                   ]
#                },
#               ...,
#               'mergeable_item_m' => {
#                   'values' => [
#                       'value_m_1',
#                       'value_m_2',
#                       ...,
#                       'value_m_n'
#                   ]
#                }
#           },
#       # Unmergeable data items are identified by their data names
#       # and returned together with a human-readable description
#       # of the reason they were deemed unmergeable
#           'unmergeable' => {
#               'unmergeable_item_a' => '...',
#               'unmergeable_item_b' => '...',
#               ...
#           }
#       }
##
sub classify_looped_items_by_mergeability
{
    my ($definition_block, $key_tag, $category_tags) = @_;

    my $analysed_items = {
        'mergeable'   => {},
        'unmergeable' => {},
    };

    if (!exists $definition_block->{'values'}{$key_tag}) {
        for my $loop_tag (@{$category_tags}) {
            next if !exists $definition_block->{'values'}{$loop_tag};
            $analysed_items->{'unmergeable'}{$loop_tag} =
                "data item '$loop_tag' appears in a data block that " .
                'does not contain the associated looped list reference ' .
                "data item '$key_tag'";
        }
        return $analysed_items;
    }

    my $key_loop_index = get_item_loop_index($definition_block, $key_tag);
    $key_loop_index = -1 if !defined $key_loop_index;

    my %unmergeable_items;
    my @mergeable_items = ( $key_tag );
    for my $loop_tag (@{$category_tags}) {
        next if !exists $definition_block->{'values'}{$loop_tag};
        my $item_loop_index = get_item_loop_index($definition_block, $loop_tag);
        $item_loop_index = -1 if !defined $item_loop_index;
        if ($key_loop_index != $item_loop_index) {
            $unmergeable_items{$loop_tag} =
                "data item '$loop_tag' and the associated looped list " .
                "reference data item '$key_tag' do not appear in the " .
                'same loop';
            next;
        }
        push @mergeable_items, $loop_tag;
    }

    $analysed_items->{'unmergeable'} = \%unmergeable_items;
    for my $mergeable_item (@mergeable_items) {
        push @{$analysed_items->{'mergeable'}{$mergeable_item}{'values'}},
             @{$definition_block->{'values'}{$mergeable_item}};
    }

    return $analysed_items;
}

##
# Removes data values sets that are already present in the base loop
# from the new loop. Dies in case an identical loop key with different
# associated values is located.
#
# @param $base_loop_items
#       Reference to a looped list data structure as returned by
#       the classify_looped_items_by_mergeability() subroutine:
#       {
#           'item_a_name' => {
#                       'values' => [
#                           'base_value_a_1',
#                           'base_value_a_2',
#                           ...
#                       ],
#           },
#           'item_c_name' => {
#                       'values' => [
#                           'base_value_c_1',
#                           'base_value_c_2',
#                           ...
#                       ],
#           },
#           ...
#       }
# @param $new_loop_items
#       Reference to a looped list data structure as returned by
#       the classify_looped_items_by_mergeability() subroutine:
#       {
#           'item_a_name' => {
#                       'values' => [
#                           'new_value_a_1',
#                           'new_value_a_2',
#                           'new_value_a_3',
#                           ...
#                       ],
#           },
#           'item_b_name' => {
#                       'values' => [
#                           'new_value_b_1',
#                           'new_value_b_2',
#                           'new_value_b_3',
#                           ...
#                       ],
#           },
#           ...
#       }
# @param $loop_key
#       Name of the data item that acts as the looped list reference.
# @return
#       Reference to the modified $new_loop_items data structure.
##
sub collapse_identical_loop_rows
{
    my ($base_loop_items, $new_loop_items, $loop_key) = @_;

    return $new_loop_items if !%{$base_loop_items};

    my $non_key_loop_items = [ keys %{$base_loop_items}, keys %{$new_loop_items} ];
    @{$non_key_loop_items} = sort { $a cmp $b }
                                uniq grep {$_ ne $loop_key}
                                    @{$non_key_loop_items};

    my @duplicate_value_indexes;
    my @conflict_value_indexes;
    for (my $i = 0; $i < @{$new_loop_items->{$loop_key}{'values'}}; $i++) {
        my $new_key_value = $new_loop_items->{$loop_key}{'values'}[$i];
        for (my $j = 0; $j < @{$base_loop_items->{$loop_key}{'values'}}; $j++) {
            my $base_key_value = $base_loop_items->{$loop_key}{'values'}[$j];
            next if ($new_key_value ne $base_key_value);
            my $loop_values_match = 1;
            for my $loop_item (@{$non_key_loop_items}) {
                my $new_loop_value =
                            exists $new_loop_items->{$loop_item} ?
                                   $new_loop_items->{$loop_item}{'values'}[$i] :
                                   undef;
                my $base_loop_value =
                            exists $base_loop_items->{$loop_item} ?
                                   $base_loop_items->{$loop_item}{'values'}[$j] :
                                   undef;
                $loop_values_match = are_identical_values(
                                        $new_loop_value,
                                        $base_loop_value
                                     );
                last if !$loop_values_match;
            }
            if ($loop_values_match) {
                push @duplicate_value_indexes, $i;
            } else {
                push @conflict_value_indexes, [$j, $i];
            }
        }
    }

    for my $conflict_index_pair (@conflict_value_indexes) {
        my $base_loop_index = $conflict_index_pair->[0];
        my $new_loop_index = $conflict_index_pair->[1];
        die "data item '$loop_key' value '" .
             $new_loop_items->{$loop_key}{'values'}[$new_loop_index] .
             '\' is associated with different sets of [' .
              ( join ', ', map {"'$_'"} ( @{$non_key_loop_items} ) ) .
             '] data item values in the base definition and in ' .
             'the new definition ([' .
             ( join ', ', map { exists $base_loop_items->{$_} ?
                        "'$base_loop_items->{$_}{'values'}[$base_loop_index]'" :
                        'undef' } @{$non_key_loop_items} ) .
             '] vs. [' .
              ( join ', ', map { exists $new_loop_items->{$_} ?
                        "'$new_loop_items->{$_}{'values'}[$new_loop_index]'" :
                        'undef' } @{$non_key_loop_items} ) .
             '])' . "\n";
    }

    if (@duplicate_value_indexes) {
        for my $item_name (keys %{$new_loop_items}) {
            my @loop_values = @{$new_loop_items->{$item_name}{'values'}};
            my %ref_index;
            @ref_index{ @duplicate_value_indexes } = ();
            @loop_values = @loop_values[
                                        grep { !exists $ref_index{$_} }
                                                        (0..$#loop_values)
                                        ];
            $new_loop_items->{$item_name}{'values'} = \@loop_values;
        }
    }

    return $new_loop_items;
}

##
# Evaluates if two data values can be considered duplicates.
#
# @param $value_1
#       First of the values to be compared. May be undefined.
# @param $value_2
#       Second of the values to be compared. May be undefined.
# @return
#       '1' if values can be considered duplicates,
#       '0' otherwise.
##
sub are_identical_values
{
    my ($value_1, $value_2) = @_;

    return 1 if (!defined $value_1 && !defined $value_2);
    return 0 if !defined $value_1 || !defined $value_2;

    return ($value_1 eq $value_2) ? 1 : 0;
}

##
# Merges two looped lists of different sizes into a single looped list.
# Missing data values are replaced by CIF unknown ('?') values.
#
# @param $base_loop_items
#       Reference to a data structure of mergeable data items as
#       returned by the classify_looped_items_by_mergeability()
#       subroutine:
#       {
#           'item_a_name' => {
#                       'values' => [
#                           'base_value_a_1',
#                           'base_value_a_2',
#                           ...
#                       ],
#           },
#           'item_c_name' => {
#                       'values' => [
#                           'base_value_c_1',
#                           'base_value_c_2',
#                           ...
#                       ],
#           },
#           ...
#       }
# @param $new_loop_items
#       Reference to a data structure that contains data names
#       and data values of items that should be merged:
#       {
#           'item_a_name' => {
#                       'values' => [
#                           'new_value_a_1',
#                           'new_value_a_2',
#                           'new_value_a_3',
#                           ...
#                       ],
#           },
#           'item_b_name' => {
#                       'values' => [
#                           'new_value_b_1',
#                           'new_value_b_2',
#                           'new_value_b_3',
#                           ...
#                       ],
#           },
#           ...
#       }
# @return \%merged_loops
#       Reference to a data structure that contains a merged looped list
#       of the following form:
#       {
#           'item_a_name' => [
#                         'old_value_a_1',
#                         'old_value_a_2',
#                         ...,
#                         'new_value_a_1',
#                         'new_value_a_2',
#                         'new_value_a_3',
#                         ...
#                       ],
#           'item_b_name' => [
#                         '?',
#                         '?',
#                         ...,
#                         'new_value_b_1',
#                         'new_value_b_2',
#                         'new_value_b_3',
#                         ...
#                       ],
#           'item_c_name' => [
#                         'old_value_c_1',
#                         'old_value_c_2',
#                         ...,
#                         '?',
#                         '?',
#                         '?',
#                         ...
#                       ],
#           ...,
#       }
##
sub merge_looped_values
{
    my ($base_loop_items, $new_loop_items) = @_;

    my %merged_loop_items;
    # In case only new data items were added
    if (!%{$base_loop_items}) {
        for my $tag (keys %{$new_loop_items}) {
            push @{$merged_loop_items{$tag}},
                 @{$new_loop_items->{$tag}{'values'}};
        }
        return \%merged_loop_items;
    }

    # In case data item values need merging
    my $base_loop_length = scalar @{$base_loop_items->{
                            (sort keys %{$base_loop_items})[0]
                           }{'values'}};
    my $new_loop_length = scalar @{$new_loop_items->{
                            (sort keys %{$new_loop_items})[0]
                          }{'values'}};
    for my $tag (keys %{$base_loop_items}) {
        push @{$merged_loop_items{$tag}},
             @{$base_loop_items->{$tag}{'values'}};
        if (exists $new_loop_items->{$tag}) {
            push @{$merged_loop_items{$tag}},
                 @{$new_loop_items->{$tag}{'values'}};
        } else {
            push @{$merged_loop_items{$tag}},
                 ( '?' x $new_loop_length );
        }
    }
    for my $tag (keys %{$new_loop_items}) {
        next if exists $merged_loop_items{$tag};
        if (exists $base_loop_items->{$tag} ) {
            push @{$merged_loop_items{$tag}},
                 @{$base_loop_items->{$tag}{'values'}};
        } else {
            push @{$merged_loop_items{$tag}},
                 ( '?' x $base_loop_length );
        }
        push @{$merged_loop_items{$tag}},
             @{$new_loop_items->{$tag}{'values'}};
    }

    return \%merged_loop_items;
}

##
# END: DDL1 dictionary merging subroutines
##

##
# Constructs a hash that maps data item names to the names of their
# alternate data items.
#
# @param $dics
#       Reference to an array of DDL1 dictionary data structures
#       as returned by the build_ddl1_dic() subroutine.
# @return
#       Reference to a hash of the following form:
#       {
#         'item_1' => [ 'alternate_item_1', ],
#         'item_2' => [ 'alternate_item_2_a', ..., 'alternate_item_2_z', ],
#         ...,
#         'item_n' => [ 'alternate_item_n', ],
#       }
##
sub get_item_to_alternate_items_mapping
{
    my ($dics) = @_;

    my %item_to_alternate_items;
    for my $dic (@{$dics}) {
        my $items = get_ddl1_dic_items($dic);
        for my $tag (sort keys %{$items}) {
            my $alternate_item_names = get_alternate_item_names($items->{$tag});
            next if !@{$alternate_item_names};
            $item_to_alternate_items{$tag} = $alternate_item_names;
        }
    }

    return \%item_to_alternate_items
}

##
# Extracts the data names of items that are marked as alternates of
# the given item.
#
# @param $data_item
#       Data item definition block as returned by the COD::CIF::Parser.
# @return
#       Reference to an array of data names.
##
sub get_alternate_item_names
{
    my ($dic_item) = @_;

    return [] if !exists $dic_item->{'values'}{'_related_item'};
    return [] if !exists $dic_item->{'values'}{'_related_function'};
    # check if items reside in the same loop (or are both unlooped)
    my $related_item_loop = get_item_loop_index($dic_item, '_related_item');
    $related_item_loop = -1 if !defined $related_item_loop;

    my $related_function_loop = get_item_loop_index($dic_item, '_related_function');
    $related_function_loop = -1 if !defined $related_function_loop;
    return [] if $related_item_loop != $related_function_loop;

    my @alternate_item_names;
    for (my $i = 0; $i < @{$dic_item->{'values'}{'_related_item'}}; $i++) {
        next if $dic_item->{'values'}{'_related_function'}[$i] ne 'alternate';
        push @alternate_item_names, $dic_item->{'values'}{'_related_item'}[$i];
    };

    return \@alternate_item_names;
}

# NOTE: the subroutine was copied from the COD::CIF::DDL::DDLm module.
##
# Groups validation issues with identical messages together and replaces
# each group with a single validation issue that contains a summarized
# version of the message.
#
# @param $issues
#       Array reference to a list of validation message data structures
#       of the following form:
#       {
#         # Code of the data block that contains the offending entry
#           'data_block_code' => 'offending_block_code',
#         # Code of the save frame that contains the offending entry.
#         # Might be undefined
#           'save_frame_code' => 'offending_frame_code',
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Human-readable description of the issue
#           'message' => 'issue description'
#       }
#
# @return $summarised_issues
#       Reference to an array of unique summarised issues.
##
sub summarise_validation_issues
{
    my ($issues) = @_;

    my %message_count;
    for my $issue (@{$issues}) {
        $message_count{$issue->{'message'}}{'count'}++;
        $message_count{$issue->{'message'}}{'representative_issue'} = $issue;
    }

    my @summarised_issues;
    for my $message ( sort keys %message_count ) {
        my $count = $message_count{$message}->{'count'};
        my $issue = $message_count{$message}->{'representative_issue'};
        if( $count > 1 ) {
            $issue->{'message'} = $message . " ($count times)";
        }
        push @summarised_issues, $issue;
    }

    return \@summarised_issues;
}

##
# Returns an array of tags of data items that have superseded the data item.
#
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @param $tag
#       Lowercased name of the data item.
# @return
#       Array of tags of data items that have superseded the data item.
##
sub get_replacement_tags
{
    my ( $dic_items, $tag ) = @_;

    return [] if !exists $dic_items->{$tag};
    my $dic_item = $dic_items->{$tag}{'values'};
    return [] if !exists $dic_item->{'_related_item'};

    my @replace_with;
    # check if data items are deprecated (replaced with other data items)
    for( my $i = 0; $i < @{$dic_item->{'_related_item'}}; $i++ ) {
        if( $dic_item->{'_related_function'}[$i] eq 'replace' ) {
            push @replace_with, $dic_item->{'_related_item'}[$i];
        }
    }

    return \@replace_with;
}

##
# Returns an array of tags of the data items that are required to be present
# in the loop containing the analysed data item.
#
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @param $tag
#       Lowercased name of the data item to analyse.
# @return $list_reference_tags
#       A reference to an array of tags of data items that are required to
#       be present in the loop containing the analysed data items.
##
sub get_list_reference_tags
{
    my ( $dic_items, $tag ) = @_;

    return [] if !exists $dic_items->{$tag};
    my $dic_item = $dic_items->{$tag}{values};
    return [] if !exists $dic_item->{'_list_reference'};

    my @list_reference_tags;
    # _list_reference identifies data items that must collectively be
    # in a loop. They are referenced by the names of their data blocks
    for my $ref_dataname (@{$dic_item->{'_list_reference'}}) {
      for my $dic_tag ( sort keys %{$dic_items} ) {
          if ( '_' . $dic_items->{$dic_tag}{values}{'_dataname'} eq $ref_dataname ) {
              push @list_reference_tags, $dic_tag;
          }
      }
    }

    return \@list_reference_tags;
}

##
# Checks the existence of parent (foreign) keys as specified by a DDL1 dictionary.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub check_list_link_parent($$$)
{
    my ( $block, $tag, $dic_item ) = @_;

    return [] if !exists $dic_item->{'values'}{'_list_link_parent'};
    my $parents = $dic_item->{'values'}{'_list_link_parent'};

    # TODO: not handled yet, unsure how to do that
    return [] if @{$parents} > 1;
    my $parent = $parents->[0];

    my @validation_issues;
    if ( !exists $block->{values}{$parent} ) {
        push @validation_issues,
             {
                'test_type'  => 'PRESENCE_OF_PARENT_DATA_ITEM',
                'data_items' => [ $tag, $parent ],
                'message'    =>
                    'missing parent data item -- the ' .
                    q{'} . ( canonicalise_tag($parent) ) . q{'} .
                    ' data item is required by the ' .
                    q{'} . ( canonicalise_tag($tag) ) . q{'} . ' data item',
             };
        return \@validation_issues;
    }

    my %parent_values = map { $_ => 1 } @{$block->{values}{$parent}};

    my @unmatched = uniq sort grep { !exists $parent_values{$_} }
                         @{$block->{values}{$tag}};

    for my $value (@unmatched) {
        # FIXME: these special CIF values should be handled properly
        # by taking their quotation into account
        next if ( $value eq '.' || $value eq '?' );
        push @validation_issues,
             {
                'test_type'  => 'PRESENCE_OF_PARENT_DATA_ITEM_VALUE',
                'data_items' => [ $tag, $parent ],
                'message'    =>
                    'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                    ' contains value ' . q{'} . $value . q{'} . ' that was ' .
                    'not found among the values of the parent data item ' .
                    q{'} . ( canonicalise_tag($parent) ) . q{'},
             };
    }

    return \@validation_issues;
}

##
# Checks enumeration values against a DDL1 dictionary.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Ignore the case while matching enumerators
#           'ignore_case'  => 0
#       # Treat data values as potentially consisting of a
#       # combination of several enumeration values
#           'treat_as_set' => 0
#       }
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_enumeration_set
{
    my ($data_block, $tag, $dic_item, $options) = @_;

    return [] if !exists $dic_item->{'values'}{'_enumeration'};
    my $enum_set = $dic_item->{'values'}{'_enumeration'};

    my @values;
    for ( my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_block, $tag, $i);
        push @values, $data_block->{'values'}{$tag}[$i];
    }

    my @issues;
    my $is_proper_enum = check_enumeration_set( \@values, $enum_set, $options );
    for ( my $i = 0; $i < @{ $is_proper_enum }; $i++ ) {
        next if !$is_proper_enum->[$i];
        push @issues,
             {
               'test_type'  => 'ENUMERATION_SET',
               'data_items' => [ $tag ],
               'message'    =>
                    'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                    ' value ' . q{'} . $values[$i] . q{'} . ' must be one ' .
                    'of the enumeration values [' .
                        ( join ', ', map {"'$_'"} @{$enum_set} ) .
                    ']'
             };
    };

    return \@issues;
}

##
# Checks values with standard uncertainties against a DDL1 dictionary.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_su
{
    my ( $data_block, $tag, $dic_item ) = @_;

    return [] if is_su_permitted($dic_item);

    my @validation_issues;
    for (my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++) {
        next if  has_special_value($data_block, $tag, $i);
        next if !has_numeric_value($data_block, $tag, $i);

        my $value = $data_block->{'values'}{$tag}[$i];
        if ( $value =~ /([(][0-9]+[)])$/ ) {
            push @validation_issues,
                 {
                    'test_type'  => 'SU_ELIGIBILITY',
                    'data_items' => [ $tag ],
                    'message'    =>
                        'data item ' .
                        q{'} . ( canonicalise_tag($tag) ) . q{'} . ' value ' .
                        q{'} . $value . q{'} . ' is not permitted ' .
                        'to contain the appended standard uncertainty value ' .
                        "'$1'",
                }
        }
    }

    return \@validation_issues;
}

##
# Evaluates if the DDL1 dictionary definition permits data item values
# to contain standard uncertainty values.
#
# @param $dic_item
#       Dictionary definition of the data item as returned by build_ddl1_dic()
#       subroutine.
# @return
#       '1' if the s.u. value is permitted,
#       '0' otherwise.
##
sub is_su_permitted
{
    my ( $dic_item ) = @_;

    return 1 if !exists $dic_item->{'values'}{'_type'};
    return 1 if $dic_item->{'values'}{'_type'}[0] ne 'numb';

    my $is_su_permitted = any { $_ eq 'esd' || $_ eq 'su' }
                            @{$dic_item->{'values'}{'_type_conditions'}};

    return $is_su_permitted;
}

##
# Checks if values are within the range specified by a DDL1 dictionary.
#
# In case the value has an associated standard uncertainty (s.u.) value
# the range is extended from [x; y] to [x-3s; y+3s] where 's' is the s.u.
# value. Standard uncertainty values are considered in range comparison
# even if the data item is not formally eligible to have an associated
# s.u. value at all.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#         # Multiplier that should be applied to the standard
#         # uncertainty (s.u.) when determining if a numeric
#         # value resides in the specified range. For example,
#         # a multiplier of 3.5 means that the value is treated
#         # as valid if it falls in the interval of
#         # [lower bound - 3.5 * s.u.; upper bound + 3.5 * s.u.]
#           'range_su_multiplier' => 3,
#       }
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_range
{
    my ( $data_block, $tag, $dic_item, $options ) = @_;

    return [] if !exists $dic_item->{'values'}{'_enumeration_range'};

    my $range = parse_range($dic_item->{'values'}{'_enumeration_range'}[0]);
    my $range_type = $dic_item->{'values'}{'_type'}[0];
    my $range_su_multiplier = $options->{'range_su_multiplier'};

    my @validation_issues;
    for (my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++) {
        next if has_special_value($data_block, $tag, $i);
        next if !has_numeric_value($data_block, $tag, $i) &&
                $range_type eq 'numb';

        my $value = $data_block->{'values'}{$tag}[$i];
        if ( $range_type eq 'char' && length $value > 1 ) {
            push @validation_issues,
                 {
                   'test_type'  => 'ENUM_RANGE.CHAR_STRING_LENGTH',
                   'data_items' => [ $tag ],
                   'message'    =>
                        'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                        ' value ' . q{'} . $value . q{'} . ' violates range ' .
                        'constraints -- the values should consist of a ' .
                        'single character from the range ' .
                        range_to_string( $range, { 'type' => $range_type } )
                 };
            next;
        }

        my $su = $data_block->{'precisions'}{$tag}[$i];
        if ( $range_type eq 'numb' ) {
            $value =~ s/[(][0-9]+[)]$//;
        }

        if( is_in_range( $value,
                { 'type'  => $range_type,
                  'range' => $range,
                  'sigma' => $su,
                  'multiplier' => $range_su_multiplier, } ) <= 0 ) {
            push @validation_issues,
                 {
                   'test_type' => 'ENUM_RANGE.IN_RANGE',
                   'data_items' => [ $tag ],
                   'message'    =>
                        'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                        ' value ' . q{'} . $data_block->{'values'}{$tag}[$i] . q{'} .
                        ' should be in range ' .
                        range_to_string( $range, { 'type' => $range_type } )
                 };
        }
    }

    return \@validation_issues;
}

##
# Checks if values satisfy the DDL1 data type constraints.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Treat numbers expressed using the double precision notation
#       # (i.e. 0.42D+7) as proper numbers
#           'allow_double_precision_notation'  => 0
#       }
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_data_type
{
    my ( $data_block, $tag, $dic_item, $options ) = @_;

    my $data_type = get_data_type( $dic_item );
    return [] if !defined $data_type;
    return [] if $data_type ne 'numb';

    my $allow_d_notation = $options->{'allow_double_precision_notation'};

    my @validation_issues;
    for ( my $i = 0; $i < @{$data_block->{'values'}{$tag}}; $i++ ) {
        next if has_special_value($data_block, $tag, $i);
        next if has_numeric_value($data_block, $tag, $i);
        my $value = $data_block->{'values'}{$tag}[$i];

        my $message =
            'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
            ' value ' . q{'} . $value . q{'} . ' violates type constraints -- ';

        my $is_quoted_number = 0;
        if ( $allow_d_notation ) {
            $is_quoted_number = is_ddl1_number( $value );
            if ( $is_quoted_number ) {
                next if has_uqstring_value( $data_block, $tag, $i );
            }
        } else {
            $is_quoted_number = is_cif_1_number( $value );
        }

        my $test_type;
        if ( $is_quoted_number ) {
            $test_type = 'TYPE_CONSTRAINT.QUOTED_NUMERIC_VALUES';
            $message .=
                 'numeric values should be written without the use ' .
                 'of quotes or multiline value designators'
        } else {
             $test_type = 'TYPE_CONSTRAINT.PROPER_NUMERIC_VALUES';
             $message .=
                 'the value should be a numerically interpretable string, ' .
                 "e.g. '42', '42.00', '4200E-2'"
        };

        push @validation_issues,
             {
               'test_type' =>  $test_type,
               'data_items' => [ $tag ],
               'message'    => $message
             };
    }

    return \@validation_issues;
}

##
# Evaluates if the given value is a numeric one according to the CIF_1.1 syntax.
#
# @param $value
#       Value to be evaluated.
# @return
#       '1' if the value is numeric,
#       '0' otherwise.
##
sub is_cif_1_number
{
    my ($value) = @_;
    my $u_int   = '[0-9]+';
    my $int     = "[+-]?${u_int}";
    my $exp     = "[eE][+-]?${u_int}";
    my $u_float = "(?:${u_int}${exp})|(?:[0-9]*[.]${u_int}|${u_int}+[.])(?:${exp})?";
    my $float   = "[+-]?(?:${u_float})";

    return ( $value =~ m/^(?:${int}|${float})$/ ) ? 1 : 0;
}

##
# Evaluates if the given value is a numeric one according to the DDL1 core
# dictionary.
#
# @param $value
#       Value to be evaluated.
# @return
#       '1' if the value is numeric,
#       '0' otherwise.
##
sub is_ddl1_number
{
    my ($value) = @_;
    my $u_int   = '[0-9]+';
    my $int     = "[+-]?${u_int}";
    my $exp     = "[eEdD][+-]?${u_int}";
    my $u_float = "(?:${u_int}${exp})|(?:[0-9]*[.]${u_int}|${u_int}+[.])(?:${exp})?";
    my $float   = "[+-]?(?:${u_float})";

    return ( $value =~ m/^(?:${int}|${float})$/ ) ? 1 : 0;
}

##
# Checks if data names are defined in at least one of the given dictionaries.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $data_names
#       Reference to a hash of known data names as returned
#       by the get_all_data_names() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Report local data names as unrecognised data names.
#       # By default, the DDL1 definition of a local data name
#       # is used that covers all data names with the '_[local]'
#       # prefix
#           'report_local_tags'         => 0,
#       # Extend the local data name definition to include the
#       # local category names that apply to DDL2 and DDLm
#       # dictionaries (i.e. _category.[local]_item)
#           'allow_category_local_tags' => 0
#       }
# @param $report_local_tags
#       Boolean value denoting if the local data item names should be
#       reported as unrecognised data names.
# @return
#       Array reference to a list of validation messages.
##
sub report_unrecognised_data_names
{
    my ( $data_block, $data_names, $options ) = @_;

    my $report_local_tags         = $options->{'report_local_tags'};
    my $allow_category_local_tags = $options->{'allow_category_local_tags'};

    my @validation_messages;

    my @tags = sort @{$data_block->{'tags'}};
    if ( !$report_local_tags ) {
        if ( $allow_category_local_tags ) {
            @tags = grep { !is_local_data_name($_) } @tags;
        } else {
            @tags = grep { !is_general_local_data_name($_) } @tags;
        }
    }

    @tags = grep { !exists $data_names->{lc $_} } @tags;

    @validation_messages = map {
              'definition of the ' . q{'} . ( canonicalise_tag($_) ) . q{'} .
              ' data item was not found in the provided dictionaries';
          } @tags;

    return \@validation_messages;
}

##
# Checks various data item constraints that apply to the entire loop
# rather than individual data item values.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $data_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Reference to a hash that maps subcategory reference data items
#       # to reference items of the parent category as returned by
#       # the get_subcategory_key_to_category_key_mapping() subroutine
#           'sub_key_to_main_key' => {
#               'sub_1_key_item_name' => 'main_1_key_item_name',
#               'sub_2_key_item_name' => 'main_1_key_item_name',
#               ...,
#               'sub_n_key_item_name' => 'main_m_key_item_name',
#           }
#       }
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_block_loops
{
    my ($data_block, $dic_items, $options) = @_;

    my $sub_key_to_main_key = defined $options->{'sub_key_to_main_key'} ?
                                      $options->{'sub_key_to_main_key'} : {};

    my $list_references = get_all_list_references($dic_items);

    my @validation_issues;
    for my $loop_tags ( @{$data_block->{'loops'}} ) {
        push @validation_issues,
             @{ validate_loop_reference_items(
                    $loop_tags,
                    $dic_items,
                    {
                        'sub_key_to_main_key' => $sub_key_to_main_key
                    }
                ) };

        my $covered_sets = select_covered_reference_sets($list_references, $loop_tags);
        for my $reference_tags ( @{$covered_sets} ) {
            next if !@{$reference_tags};
            $reference_tags = [ map { canonical_tag_name($_) } @{$reference_tags} ];
            if ( @{$reference_tags} == 1 ) {
                push @validation_issues,
                        @{ check_simple_key_uniqueness(
                           $data_block,
                           $reference_tags->[0],
                           get_data_type( $dic_items->{$reference_tags->[0]} )
                        ) }
            } else {
                my %ref_types;
                for my $data_name ( @{$reference_tags} ) {
                    $ref_types{$data_name} =
                            get_data_type( $dic_items->{$data_name} );
                }

                push @validation_issues,
                        @{ check_composite_key_uniqueness(
                           $data_block,
                           $reference_tags,
                           \%ref_types
                        ) }
            }
        }
    }

    for my $group (@{$list_references}) {
        my @tags = sort map { canonical_tag_name( $_ ) }
                            @{$group->{'key_data_items'}},
                            @{$group->{'sub_data_items'}};
        my %loops;
        for my $tag (@tags) {
            next if !$data_block->{'inloop'}{$tag};
            $loops{$data_block->{'inloop'}{$tag}} = 1;
        }
        next if keys %loops <= 1;
        my $message =
            'data items [' .
                ( join ', ', map { q{'} . ( canonicalise_tag($_) ) . q{'} } @tags ) .
            '] must all appear in the same loop';
        push @validation_issues,
             {
                'test_type'  => 'LOOP.INTEGRITY',
                'data_items' => \@tags,
                'message'    => $message
             }
    }

    return \@validation_issues;
}

##
# Checks the uniqueness constraint of a simple loop key that consists
# of a single data item.
#
# @param $data_block
#       Data block in which the data item resides as returned
#       by the COD::CIF::Parser.
# @param $data_name
#       Data name of the data item which acts as the unique loop key.
# @param $key_type
#       Data type of the key as defined in the DDL1 dictionary.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub check_simple_key_uniqueness
{
    my ($data_block, $data_name, $key_type) = @_;

    my $unique_key_violations =
            get_simple_unique_key_violations($data_block, $data_name, $key_type);

    my @messages;
    for my $key ( sort keys %{$unique_key_violations} ) {
        push @messages,
            'data item ' . q{'} . ( canonicalise_tag($data_name) ) . q{'} .
            ' acts as a loop key, but the associated data values are not ' .
            "unique -- value '$key' appears " .
                ( scalar @{$unique_key_violations->{$key}} ) .
            ' times as [' .
                ( join ', ', uniq map { "'$_'" }
                    @{$unique_key_violations->{$key}} ) .
            ']';
    }

    my @validation_issues;
    for my $message ( @messages ) {
        push @validation_issues,
             {
                'test_type'  => 'SIMPLE_KEY_UNIQUENESS',
                'data_items' => [ $data_name ],
                'message'    => $message
             }
    }

    return \@validation_issues;
}

##
# Identifies values that violate a simple unique key constraint in
# the given data block.
#
# @param $data_block
#       Data block in which the data items reside as returned
#       by the COD::CIF::Parser.
# @param $data_name
#       Data name of the data item which acts as the unique loop key.
# @param $key_type
#       Data type of the key data item as defined in the DDL1 dictionary.
# @return $unique_key_violations
#       Reference to a data structure that details the violations
#       of the unique key constraint. The data structure takes the
#       following form:
#       {
#       # canonicalised values serve as hash keys and point to arrays
#       # that contain the duplicate values in their original form, i.e.:
#           '10'   => [ '10', '1E+1', '1000E-2', ... ],
#           'text' => [ 'text', 'text', 'text' ],
#            ...,
#       }
##
sub get_simple_unique_key_violations
{
    my ($data_block, $data_name, $key_type) = @_;

    my %grouped_values;
    for ( my $i = 0; $i < @{$data_block->{'values'}{$data_name}}; $i++ ) {
        # TODO: special values are silently skipped, but maybe they should
        # still be reported somehow since having special value in a key
        # might not be desirable...
        next if has_special_value($data_block, $data_name, $i);
        my $value = $data_block->{'values'}{$data_name}[$i];
        my $canon_value = canonicalise_value( $value, $key_type );
        push @{$grouped_values{$canon_value}}, $value;
    };

    my %unique_key_violations;
    for my $key ( keys %grouped_values ) {
        next if @{$grouped_values{$key}} < 2;
        $unique_key_violations{$key} = $grouped_values{$key};
    }

    return \%unique_key_violations;
}

##
# Checks the uniqueness constraint of a composite loop key that consists
# of multiple data items.
#
# @param $data_block
#       Data block in which the data items reside as returned
#       by the COD::CIF::Parser.
# @param $data_names
#       Reference to an array of data item names that act as
#       the composite unique loop key.
# @param $data_types
#       Reference to a hash containing the data types of
#       the key data items as defined in a DDL1 dictionary.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub check_composite_key_uniqueness
{
    my ($data_block, $data_names, $data_types) = @_;

    if ( !@{$data_names} ) {
        return [];
    }

    my $violations = get_composite_unique_key_violations(
                        $data_block,
                        $data_names,
                        $data_types
                     );

    my @messages;
    for my $violation ( @{$violations} ) {
        my @duplicates;
        for my $values ( @{$violation->{'duplicate_values'}} ) {
            push @duplicates,
                 '[' . ( join ', ', map { "'$_'" } @{$values} ) . ']';
        }

        push @messages,
            'data items [' .
                ( join ', ',
                    map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                        @{$data_names} ) .
            '] act as a composite loop key, but the associated data values ' .
            'are not collectively unique -- values [' .
                ( join ', ', map { "'$_'" } @{$violation->{'canonical_values'}} ) .
            '] appear ' .
                ( scalar @{$violation->{'duplicate_values'}} ) .
            ' times as [' .
                ( join ', ', uniq @duplicates ) .
            ']';
    }

    my @validation_issues;
    for my $message ( @messages ) {
        push @validation_issues,
             {
                'test_type'  => 'COMPOSITE_KEY_UNIQUENESS',
                'data_items' => $data_names,
                'message'    => $message
             }
    }

    return \@validation_issues;
}

##
# Identifies values that violate a composite unique key constraint in
# the given data block.
#
# @param $data_block
#       Data block in which the data items reside as returned
#       by the COD::CIF::Parser.
# @param $data_names
#       Reference to an array of data item names that act as
#       the composite unique loop key.
# @param $data_types
#       Reference to a hash containing the data types of
#       the key data items as defined in a DDL1 dictionary.
# @return $unique_key_violations
#       Reference to an array of data structures that detail
#       the violations of the composite unique key constraint.
#       Each data structure takes the following form:
#       {
#       # Values in the canonical form
#           'canonical_values' => [ 'text_1', '10', 'text_2' ],
#       # Duplicate values in their original form
#           'duplicate_values' => [
#                                   [ 'text_1', '10',     'text_2' ],
#                                   [ 'text_1', '1E+1',   'text_2' ],
#                                   ...,
#                                   [ 'text_1', '1000E-1', 'text_2' ]
#                                 ]
#       }
##
sub get_composite_unique_key_violations
{
    my ($data_block, $data_names, $data_types) = @_;

    my $join_char = "\x{001E}";
    my %grouped_values;
    my $loop_size = @{$data_block->{'values'}{$data_names->[0]}};
    for ( my $i = 0; $i < $loop_size; $i++ ) {
        my $composite_key = '';
        my @composite_key_values;
        my $has_special_value = 0;
        for my $data_name ( @{$data_names } ) {
            # TODO: composite keys containing special values are silently
            # skipped, but maybe they should still be reported somehow since
            # having special value in a key might render it unusable
            if ( has_special_value($data_block, $data_name, $i) ) {
                $has_special_value = 1;
                last;
            };

            my $value = $data_block->{'values'}{$data_name}[$i];
            my $data_type = $data_types->{lc $data_name};
            push @composite_key_values, $value;
            $composite_key .= canonicalise_value( $value, $data_type ) .
                              "$join_char";
        }
        if (!$has_special_value) {
            push @{$grouped_values{$composite_key}}, \@composite_key_values;
        }
    }

    my @unique_key_violations;
    for my $key (sort keys %grouped_values) {
        next if @{$grouped_values{$key}} < 2;
        my %violation;
        $violation{'canonical_values'} = [ split /$join_char/, $key ];
        $violation{'duplicate_values'} = $grouped_values{$key};
        push @unique_key_violations, \%violation;
    }

    return \@unique_key_violations;
}

##
# Groups data items based on their list references declared in the DDL1
# dictionary.
#
# @param $data_names
#       Reference to an array of data item names that should be grouped.
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @return
#       Reference to an array of data structures of the following form:
#       {
#         # Names of the data items comprising the list reference
#           'key_data_items' => [ '_key_data_name_1', '_key_data_name_2' ],
#         # Names of the data items that share the same list reference
#           'sub_data_items' => [ '_item_1', _item_2', '_item_3' ]
#       },
##
sub group_items_by_list_references
{
    my ( $data_names, $dic_items ) = @_;

    my %grouped_items;
    my $join_char = "\x{001E}";
    for my $tag ( map { lc } @{$data_names} ) {
        next if !exists $dic_items->{$tag};
        my $key_data_names = get_list_reference_tags($dic_items, $tag);
        next if !@{$key_data_names};

        my $key = join $join_char, map { lc } @{$key_data_names};
        if ( !defined $grouped_items{$key} ) {
            $grouped_items{$key}{'key_data_items'} = $key_data_names
        }
        push @{$grouped_items{$key}{'sub_data_items'}}, lc $tag;
    }
    my @item_groups = map { $grouped_items{$_} } sort keys %grouped_items;

    return \@item_groups;
}

##
# Selects those reference sets that can be constructed from the given data
# items.
#
# @param $list_references
#       Reference to an array of list reference groups as returned by
#       the group_items_by_list_references() subroutine.
# @param $data_items
#       Reference to an array of data items names that can be used to
#       construct the set.
# @return $covered_list_references
#       Reference to an array of list reference sets.
##
sub select_covered_reference_sets
{
    my ( $list_references, $data_items ) = @_;

    my @covered_list_references;
    for my $group ( @{$list_references} ) {
        my $key_data_items = $group->{'key_data_items'};
        next if !@{$key_data_items};

        my $is_eligible_ref_set = 1;
        for my $key_data_item ( @{$key_data_items} ) {
            $is_eligible_ref_set &=
                    any { lc $key_data_item eq lc $_ } @{$data_items};
        }
        if ( $is_eligible_ref_set ) {
            push @covered_list_references, $key_data_items;
        }
    }

    return \@covered_list_references;
}

##
# Gets all list reference sets that are described in the provided DDL1
# data item definitions.
#
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @return $list_ref_groups
#       Reference to an array of list reference groups as returned by
#       the group_items_by_list_references() subroutine.
##
sub get_all_list_references
{
    my ( $dic_items ) = @_;

    my $list_ref_groups =
             group_items_by_list_references( [ keys %{$dic_items} ], $dic_items );

    return $list_ref_groups;
}

##
# Checks if a loop contains reference data items that together act as a
# primary loop key as specified by the provided DDL1 data item definitions.
# Keys of main categories are allowed to replace subcategory keys [1].
#
# @source [1]
#       2.5.5.7. Definition example 7: joinable lists,
#       "International Tables for Crystallography Volume G:
#        Definition and exchange of crystallographic data",
#       2005, 56, doi: 10.1107/97809553602060000107
#
# @param $loop_tags
#       Reference to an array of data names residing in a loop.
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @param $options
#       Reference to a hash of options. The following options are recognised:
#       {
#       # Reference to a hash that maps subcategory reference data items
#       # to reference items of the parent category as returned by
#       # the get_subcategory_key_to_category_key_mapping() subroutine
#           'sub_key_to_main_key' => {
#               'sub_1_key_item_name' => 'main_1_key_item_name',
#               'sub_2_key_item_name' => 'main_1_key_item_name',
#               ...,
#               'sub_n_key_item_name' => 'main_m_key_item_name',
#           }
#       }
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_loop_reference_items
{
    my ( $loop_tags, $dic_items, $options ) = @_;

    my $sub_key_to_main_key = defined $options->{'sub_key_to_main_key'} ?
                                      $options->{'sub_key_to_main_key'} : {};

    my $item_ref_groups = group_items_by_list_references( $loop_tags, $dic_items );

    my @reported_key;
    my @validation_issues;
    for my $group ( @{$item_ref_groups} ) {
        for my $key_tag ( @{$group->{'key_data_items'}} ) {
            next if any { $_ eq $key_tag } @reported_key;
            next if any { lc $_ eq $key_tag } @{$loop_tags};
            if ( exists $sub_key_to_main_key->{$key_tag} ) {
                my $main_key = $sub_key_to_main_key->{$key_tag};
                next if any { lc $_ eq $main_key } @{$loop_tags};
            }

            push @reported_key, $key_tag;
            my $message =
                'missing looped list reference data item -- the ' .
                q{'} . ( canonicalise_tag($key_tag) ) . q{'} .
                ' data item must be provided in the loop containing the [' .
                    ( join ', ',
                        map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                            @{$group->{'sub_data_items'}} ) .
                '] data items';

            push @validation_issues,
                 {
                    'test_type' => 'KEY_ITEM_PRESENCE',
                    'data_items' => [ $key_tag ],
                    'message'    => $message
                 }
        }
    }

    return \@validation_issues;
}

##
# Checks if data items in a looped list all belong to the same category
# as specified by the defining DDL1 dictionaries.
#
# @param $loop_tags
#       Reference to an array of data names residing in a loop.
# @param $item_to_category
#       Reference to a hash that maps each data item to
#       the category that the it belongs to as returned
#       by the get_item_to_category_mapping() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub check_loop_category_homogeneity
{
    my ( $loop_tags, $item_to_category ) = @_;

    my %category_items;
    for my $tag ( @{$loop_tags} ) {
        next if !exists $item_to_category->{$tag};
        push @{$category_items{$item_to_category->{$tag}}}, $tag;
    }

    my @validation_issues;
    my @categories = sort keys %category_items;
    if (@categories > 1) {
        push @validation_issues,
             {
                'test_type'  => 'LOOP.CATEGORY_HOMOGENEITY',
                'data_items' => [ @{$loop_tags} ],
                'message'    =>
                    'data items in a looped list must all belong ' .
                    'to the same category -- ' .
                    ( join ', ',
                        map {
                            'data items [' .
                            ( join ', ',
                                map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                    @{$category_items{$_}} ) .
                            "] belong to the '$_' category"
                        } @categories
                    ),
             }
    }

    return \@validation_issues;
}

##
# Checks if mandatory data items are present in a looped list as specified
# by the defining DDL1 dictionaries.
#
# @param $loop_tags
#       Reference to an array of data names residing in a loop.
# @param $auxiliary_search_structure
#       Reference to an auxiliary search data structure of a DDL1 dictionary
#       as returned by the build_auxiliary_dic_search_structure() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub check_loop_mandatory_item_presence
{
    my ($loop_tags, $auxiliary_search_structure) = @_;

    my $item_to_category = $auxiliary_search_structure->{'item_to_category'};
    my $mandatory_items  = $auxiliary_search_structure->{'category_to_mandatory_items'};
    my $subkey_to_key    = $auxiliary_search_structure->{'sub_key_to_main_key'};
    my $item_to_alternate_items = $auxiliary_search_structure->{'item_to_alternate_items'};

    my %key_to_subkeys;
    for my $subkey (keys %{$subkey_to_key}) {
        push @{$key_to_subkeys{$subkey_to_key->{$subkey}}}, $subkey;
    };

    my %category_to_items;
    for my $tag ( @{$loop_tags} ) {
        next if !exists $item_to_category->{lc $tag};
        push @{$category_to_items{$item_to_category->{lc $tag}}}, $tag;
    }

    my @validation_issues;
    for my $category_name (sort keys %category_to_items) {
        next if !exists $mandatory_items->{$category_name};
        my $category_loop_tags = $category_to_items{$category_name};
        for my $mandatory_item (@{$mandatory_items->{$category_name}}) {
            $mandatory_item = canonical_tag_name($mandatory_item);
            next if any { $_ eq $mandatory_item } @{$category_loop_tags};

            my $subkey_alternative_found = 0;
            for my $subkey_alternative (@{$key_to_subkeys{$mandatory_item}}) {
                if ( any { $_ eq $subkey_alternative } @{$category_loop_tags} ) {
                    $subkey_alternative_found = 1;
                    last;
                }
            }
            next if $subkey_alternative_found;

            my $alternate_item_found = 0;
            for my $alt_item (@{$item_to_alternate_items->{$mandatory_item}}) {
                if ( any { $_ eq $alt_item } @{$loop_tags} ) {
                    $alternate_item_found = 1;
                    last;
                }
            }
            next if $alternate_item_found;

            push @validation_issues,
                 {
                   'test_type'  => 'LOOP.MANDATORY_ITEM_PRESENCE',
                   'data_items' => [ @{$category_loop_tags} ],
                   'message'    =>
                        'missing mandatory looped list data item -- the ' .
                        q{'} . ( canonicalise_tag($mandatory_item) ) . q{'} .
                        ' data item must be provided in the loop containing ' .
                        'the [' .
                            ( join ', ',
                                map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                    @{$category_loop_tags} ) .
                        '] data items',
                 }
        }
    }

    return \@validation_issues;
}

##
# Checks if a data items have a collectively unique value as specified
# by a DDL1 dictionary.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       Data name of the item that potentially identifies the
#       collectively unique data items.
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_list_unique_key
{
    my ( $data_block, $tag, $dic_items ) = @_;

    return [] if !defined $dic_items->{$tag};
    my $dic_item = $dic_items->{$tag};

    return [] if !defined $data_block->{'inloop'}{$tag};
    my $loop_index = $data_block->{'inloop'}{$tag};

    return [] if !exists $dic_item->{'values'}{'_list_uniqueness'};
    my $unique_key_items = [ map { lc } @{$dic_item->{'values'}{'_list_uniqueness'}} ];

    my %loop_tags = map { lc $_ => 1 } @{$data_block->{'loops'}[$loop_index]};
    my @key_loop_tags = grep { exists $loop_tags{$_} } @{$unique_key_items};
    return [] if !@key_loop_tags;

    my @validation_issues;
    @key_loop_tags = map { canonical_tag_name($_) } @key_loop_tags;
    if ( @key_loop_tags == 1 ) {
        my $key_type = get_data_type( $dic_items->{$key_loop_tags[0]} );
        my $violations = get_simple_unique_key_violations(
                                $data_block,
                                $key_loop_tags[0],
                                $key_type
                         );

        my @messages;
        for my $key ( sort keys %{$violations} ) {
            push @validation_issues,
                 {
                    'test_type'  => 'LOOP.INDIVIDUAL_UNIQUE_VALUES',
                    'data_items' => [ $key_loop_tags[0] ],
                    'message'    =>
                        'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                        ' requires data item ' .
                        q{'} . ( canonicalise_tag($key_loop_tags[0]) ) . q{'} .
                        ' to have unique values, but the associated values ' .
                        "are not unique -- value '$key' appears " .
                            ( scalar @{$violations->{$key}} ) .
                        ' times as [' .
                            ( join ', ', uniq map { "'$_'" }
                                @{$violations->{$key}} ) .
                        ']',
                }
        }
    } else {
        my %key_item_types;
        for my $data_name ( @key_loop_tags ) {
            $key_item_types{$data_name} = get_data_type( $dic_items->{$data_name} );
        }
        my $violations = get_composite_unique_key_violations(
                            $data_block,
                            \@key_loop_tags,
                            \%key_item_types
                         );

        my @messages;
        for my $violation ( @{$violations} ) {
            my @duplicates;
            for my $values ( @{$violation->{'duplicate_values'}} ) {
                push @duplicates,
                     '[' . ( join ', ', map { "'$_'" } @{$values} ) . ']';
            }

            push @validation_issues,
                 {
                    'test_type'  => 'LOOP.COLLECTIVELY_UNIQUE_VALUES',
                    'data_items' => [ @key_loop_tags ],
                    'message'    =>
                        'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                        ' requires data items [' .
                            ( join ', ',
                                map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                    @key_loop_tags ) .
                        '] to have collectively unique values, but ' .
                        'the associated values are not collectively ' .
                        'unique -- values [' .
                            ( join ', ', map { "'$_'" }
                                @{$violation->{'canonical_values'}} ) .
                        '] appear ' .
                            ( scalar @{$violation->{'duplicate_values'}} ) .
                        ' times as [' .
                            ( join ', ', uniq @duplicates ) .
                        ']',
                }
        }
    }

    return \@validation_issues;
}

##
# Checks if a data item reside in a correct loop context as specified
# by a DDL1 dictionary.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_item
#       Dictionary definition of the validated data item as returned by
#       build_ddl1_dic() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub validate_list_mandatory
{
    my ( $data_block, $tag, $dic_item ) = @_;

    my $must_be_looped = get_list_constraint_type( $dic_item );
    return [] if !defined $must_be_looped;

    my @validation_issues;
    if ( !exists $data_block->{'inloop'}{$tag} ) {
        if ( $must_be_looped eq 'yes' ) {
            push @validation_issues,
                 {
                    'test_type'  => 'LOOP_CONTEXT.MUST_APPEAR_IN_LOOP',
                    'data_items' => [ $tag ],
                    'message'    =>
                        'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                        ' must appear in a loop',
                 }
        }
    } elsif ( $must_be_looped eq 'no' ) {
        push @validation_issues,
             {
                'test_type'  => 'LOOP_CONTEXT.MUST_NOT_APPEAR_IN_LOOP',
                'data_items' => [ $tag ],
                'message'    =>
                    'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                    ' must not appear in a loop',
             }
    }

    return \@validation_issues;
}

##
# Checks if a data item is deprecated as specified by a DDL1 dictionary.
# Cases when both the replaced and the replacing data item reside in the
# same data block are also reported.
#
# @param $data_block
#       Data block that should be validated as returned by the COD::CIF::Parser.
# @param $tag
#       The data name of the item that should be validated.
# @param $dic_items
#       Reference to a hash of DDL1 data item definition blocks
#       as returned by the get_ddl1_dic_items() subroutine.
# @return
#       Reference to an array of validation issue data structures of
#       the following form:
#       {
#         # Code of the validation test that generated the issue
#           'test_type' => 'TEST_TYPE_CODE',
#         # Names of the data items examined by the validation test
#           'data_items' => [ 'data_name_1', 'data_name_2', ... ],
#         # Validation message that should be displayed to the user
#           'message'    => 'a detailed validation message'
#       }
##
sub report_deprecated
{
    my ($data_block, $tag, $dic_items) = @_;

    my $replacement_tags = get_replacement_tags($dic_items, lc $tag);
    return [] if !@{$replacement_tags};

    my @validation_issues;

    push @validation_issues,
         {
           'test_type'  => 'ITEM_REPLACEMENT.PRESENCE_OF_REPLACED',
           'data_items' => [ $tag ],
           'message'    =>
                'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
                ' has been replaced by the [' .
                    join(', ', map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                                        @{$replacement_tags}) .
                '] data items'
         };

    my @existing_replacement_tags =
        grep { exists $data_block->{values}{$_} } @{$replacement_tags};
    if( @existing_replacement_tags ) {
        my $message =
            'data item ' . q{'} . ( canonicalise_tag($tag) ) . q{'} .
            ' appears in the same data block as its replacement data items [' .
                join( ', ', map { q{'} . ( canonicalise_tag($_) ) . q{'} }
                                                        @{$replacement_tags} ) .
            ']';
        push @validation_issues,
             {
                'test_type'  => 'ITEM_REPLACEMENT.SIMULTANEOUS_PRESENCE',
                'data_items' => [ $tag ],
                'message'    => $message
             }
    }

    return \@validation_issues;
}

##
# Determines the DDL generation of the provided dictionary using ad hoc criteria.
#
# @param $data
#       Reference to parsed CIF dictionary file as returned by the
#       COD::CIF::Parser.
# @return
#       A string that represents the DDL generation or an undefined
#       value if the generation could not be determined. The following
#       string may be returned:
#           '1' for DDL1
#           '2' for DDL2
#           'm' for DDLm
##
sub determine_ddl_generation
{
    my ( $data ) = @_;

    if ( any { $_->{'name'} eq 'on_this_dictionary' } @{$data} ) {
        return '1';
    }

    my $block = $data->[0];
    if ( exists $block->{'values'}{'_dictionary.datablock_id'}) {
        return '2';
    }

    if ( exists $block->{'values'}{'_dictionary.ddl_conformance'} &&
         $block->{'values'}{'_dictionary.ddl_conformance'}[0] =~ /^[34][.]/ ) {
        return 'm';
    }

    return;
}

##
# Evaluates if the data item contains an unquoted string value as specified by
# the CIF working specification.
#
# @param $data_block
#       Data block that contains the data item as returned by the COD::CIF::Parser.
# @param $data_name
#       Name of the data item.
# @param $index
#       The index of the data item value to be evaluated.
# @return
#       Boolean value denoting if the data item contains an unquoted string
#       value.
##
sub has_uqstring_value
{
    my ( $data_block, $data_name, $index ) = @_;

    my $type = defined $data_block->{'types'}{$data_name}[$index] ?
               $data_block->{'types'}{$data_name}[$index] : 'UQSTRING' ;

    return $type eq 'UQSTRING';
};

##
# Extracts defined data names from the given dictionaries.
#
# @param $dics
#       Reference to a data structure that stores DDL1, DDL2 and DDLm
#       dictionaries:
#       {
#           '1' => {
#           # DDL1 dictionaries as returned by the build_ddl1_dic() subroutine
#               'ddl1_dic_filename_1' => { ... },
#               'ddl2_dic_filename_2' => { ... },
#               ...,
#           },
#           '2' => {
#           # DDL2 dictionaries as returned by the build_ddl2_dic() subroutine
#               'ddl2_dic_filename_1' => { ... },
#               ...,
#           },
#           'm' => {
#           # DDLm dictionaries as returned by
#           # the COD::CIF::DDL::DDLm::build_ddlm_dic() subroutine
#               'ddlm_dic_filename_1' => { ... },
#               ...,
#           }
#       }
# @return
#       Reference to a hash where known data names serve as keys
#       and all values are set to '1'.
##
sub get_all_data_names
{
    my ( $dics ) = @_;

    my %data_names;
    for my $dic ( @{$dics->{'1'}{'dictionaries'}} ) {
        for my $data_name ( keys %{get_ddl1_dic_items($dic)} ) {
            $data_names{$data_name} = 1;
        }
    }

    for my $dic ( values %{$dics->{'2'}{'dictionaries'}} ) {
        for my $data_name ( keys %{$dic} ) {
            $data_names{$data_name} = 1;
        }
    }

    for my $dic ( values %{$dics->{'m'}{'dictionaries'}} ) {
        for my $data_name ( keys %{$dic->{'Item'}} ) {
            $data_names{$data_name} = 1;
        }
    }

    return \%data_names;
}
