Author: Alexander Zangerl <az@debian.org>
Subject: fixing various minor astma parser bugs and issues

--- a/t/041astmafact.t
+++ b/t/041astmafact.t
@@ -140,6 +140,31 @@ my $npt = scalar keys %{$TM::infrastruct
   is ($ms->toplets,        $npt, 'empty map 2 (toplets)');
 }
 
+#-- dots in topic ids
+eval {
+  my $ms = _parse (q|
+this.is.a.valid.topic.name
+
+this.is.even-more.so.a_topic
+
+|);
+}; 
+ok(!$@,'topics with dots in the name work');
+
+eval {
+  my $ms = _parse (q|
+in-line-with-policy
+bn: goals of backup system must be in line with corp policies
+in: eg: no backup of desktops
+
+ex-suggested
+bn: ex-suggested:
+
+|);
+}; 
+ok(!$@,'topics with in- and ex- in the name work');
+
+
 { # empty line with blanks
   my $ms = _parse (q|
 topic1
@@ -433,6 +458,15 @@ role : p1 p2 p3
   is (scalar $ms->match (TM->FORALL, scope => 'tm:sss',                   iplayer => 'tm:player' ),   1, 'association scoped 3');
 }
 
+{
+  my $ms = _parse (q|
+(is-a-wonderful)
+role : aaa bbb
+
+|);
+is (scalar $ms->match, $npa+1, 'association with is-a type');
+}
+
 #-- reification --------------------------------------
 
 {
@@ -509,13 +543,13 @@ xxx (http://www.topicmaps.org/xtm/1.0/#p
 
 {
   my $ms = _parse (q|
-(xxx) is-reified-by aaa
+(xxx) is-reified-by aaa-and-bbb
   role : player
 |);
 #warn Dumper $ms;
   my ($a) = $ms->match (TM->FORALL, type => 'tm:xxx');
-  is_deeply ([ $ms->is_reified ($a) ], [ 'tm:aaa' ], 'assoc reified: regained');
-  is ($ms->reifies ('tm:aaa'), $a,                   'assoc reified: regained 2');
+  is_deeply ([ $ms->is_reified ($a) ], [ 'tm:aaa-and-bbb' ], 'assoc reified: regained');
+  is ($ms->reifies ('tm:aaa-and-bbb'), $a,                   'assoc reified: regained 2');
 }
 
 {
@@ -1055,6 +1089,48 @@ in: Ich chan Glaas sse, das tuet mir n
 
 }
 
+
+{
+    my $tmp;
+    use IO::File;
+    use POSIX qw(tmpnam);
+    do { $tmp = tmpnam().".atm" ;  } until IO::File->new ($tmp, O_RDWR|O_CREAT|O_EXCL);
+
+    my $fh = IO::File->new ("> $tmp") || die "so what?";
+    print $fh q|
+aaa
+
+ccc
+|;
+    $fh->close;
+
+    my $ms = _parse (qq|
+
+eee
+
+%include file:$tmp
+
+|);
+#warn Dumper $ms;
+    is ($ms->tids ('aaa'), 'tm:aaa', '%include: file, internalized');
+    is ($ms->tids ('ccc'), 'tm:ccc', '%include: file, internalized');
+    is ($ms->tids ('eee'), 'tm:eee', '%include: file, internalized');
+}
+
+{ # include with UNIX pipe
+    my $ms = _parse (qq|
+
+eee
+
+%include (echo "aaa" ; echo ; echo "ccc" ; echo ) \|
+
+|);
+    is ($ms->tids ('aaa'), 'tm:aaa', '%include: pipe, internalized');
+    is ($ms->tids ('ccc'), 'tm:ccc', '%include: pipe, internalized');
+    is ($ms->tids ('eee'), 'tm:eee', '%include: pipe, internalized');
+
+};
+
 my ($tmp);
 use IO::File;
 use POSIX qw(tmpnam);
@@ -1073,7 +1149,7 @@ bbb
 |);
 
  is (scalar $ms->toplets, $npt+1, 'cancelling');
- ERRexpect ("Cancelled");
+ ERRexpect ("Parsing cancelled");
 ##warn Dumper $ms;
 }
 
@@ -1087,7 +1163,7 @@ bbb
 |);
 
  is (scalar $ms->toplets, $npt+1, 'cancelling (blanks)');
- ERRexpect ("Cancelled");
+ ERRexpect ("Parsing cancelled");
 ##warn Dumper $ms;
 }
 
@@ -1137,9 +1213,6 @@ sub ERRexpect {
     close (ERR);
 }
 
-__END__
-
-
 
 __END__
 
@@ -1188,8 +1261,6 @@ rumsti : yyy;
    ok (!$@);
 } 
 
-__END__
-
 ##=========================================================
 
 
--- a/yapp/astma-fact.yp
+++ b/yapp/astma-fact.yp
@@ -24,6 +24,7 @@ my $tracing = 0;
 %token ISINDICATEDBY
 %token LOG
 %token CANCEL
+%token INCLUDE
 %token TRACE
 %token ENCODING
 %token COLON
@@ -39,9 +40,31 @@ maplet_definitions : #empty
                    | maplet_definitions maplet_definition
                    | maplet_definitions template_definition EOL
                    | maplet_definitions COMMENT EOL
-                   | maplet_definitions LOG EOL             { warn "Logging $_[2]"; }
-                   | maplet_definitions CANCEL EOL          { die  "Cancelled"; }
-                   | maplet_definitions TRACE EOL           { $tracing = $_[2]; warn "# start tracing: level $tracing"; }
+                   | maplet_definitions LOG EOL             { warn "Logging $_[2]\n"; }
+                   | maplet_definitions CANCEL EOL          { die  "Parsing cancelled\n"; }
+		| maplet_definitions INCLUDE EOL {
+		    my $content;
+
+		    if ($_[2] =~ /\|\s*$/)
+		    { # a pipe | at the end, this is a UNIX pipe
+			use IO::File;
+			my $fh = IO::File->new ($_[2])
+			    || die "unable to open pipe '$_[2]'\n";
+			local $/ = undef;
+			$content = <$fh>;
+			$fh->close;
+		    }
+		    else
+		    {
+			use LWP::Simple;
+			$content = get($_[2])
+			    || die "unable to load '$_[2]' with LWP\n";
+		    }
+		    $_[0]->YYData->{INPUT} =
+			$content . $_[0]->YYData->{INPUT}; # prepend it
+		}
+
+                   | maplet_definitions TRACE EOL           { $tracing = $_[2]; warn "start tracing: level $tracing\n"; }
                    | maplet_definitions ENCODING EOL        {
 		                                              use Encode;
 							      Encode::from_to ($_[0]->YYData->{INPUT}, "iso-8859-1", $_[2]);
@@ -55,35 +78,40 @@ maplet_definition  : topic_definition
 
 topic_definition  : ID types reification_indication inline_assocs EOL
                     {
+			my $tn=$_[1];
 			$_[1] = $_[0]->{USER}->{store}->internalize ($_[1]);
 
 			if (ref $_[3]) {                                                   # we have reification info
 			    if (     $_[3]->[0] == 1) {                                    # 1 = REIFIES, means current ID is a shorthand for the other
-				$_[0]->{USER}->{store}->internalize ($_[1] => $_[3]->[1]); 
+				my $target=$_[3]->[1];
+				# the other is a local id? then lookup/insert such a topic
+				$target=$_[0]->{USER}->{store}->internalize($_[0]->{USER}->{store}->baseuri.$target)
+				    if ($target=~/^[a-z_:][a-z0-9_:.-]*$/i);
+				$_[0]->{USER}->{store}->internalize ($_[1] => $target);
 			    } elsif ($_[3]->[0] == 0) {                                    # 0 = IS-REIFIED, this must be the other way round
 				$_[0]->{USER}->{store}->internalize ($_[3]->[1] => $_[1]);
 			    } elsif ($_[3]->[0] == 2) {                                    # 2 = ISINDICATEDBY, add the subject indicators
 				$_[0]->{USER}->{store}->internalize ($_[1] => \ $_[3]->[1]);
 			    } else {
-				die "internal fu**up";
+				die "internal fu**up\n";
 			    }
 			}
 			# assert instance/class
                         if (@{$_[2]}) {
 			    $_[0]->{USER}->{store}->assert ( map { bless
-								       [ undef, 
-									 undef, 
-									 'isa', 
+								       [ undef,
+									 undef,
+									 'isa',
 									 undef,
-									 [ 'class', 'instance' ], 
-									 [ $_, $_[1] ], 
-									 ], 'Assertion' }  
+									 [ 'class', 'instance' ],
+									 [ $_, $_[1] ],
+									 ], 'Assertion' }
 							     @{$_[2]} );
 			}
 			{                                                                     # memorize the types should be a 'topic'
                                                                                               # at the end (see end of parse)
 			    my $implicits = $_[0]->{USER}->{implicits};
-			    map { $implicits->{'isa-thing'}->{$_}++ } 
+			    map { $implicits->{'isa-thing'}->{$_}++ }
 			             (@{$_[2]}, $_[1]);                                       # the types and the ID are declared implicitely
 			}
 			
@@ -119,7 +147,7 @@ topic_definition  : ID types reification
 							    undef ], 'Assertion' );
 				} elsif ($templates->tids ( $type ) &&
 					 (my @ts    = $templates->match (TM->FORALL, type => $templates->tids ( $type )  ))) {
-				    warn "duplicate template for '$type' found (maybe typo?), taking one" if @ts > 1;
+				    warn "duplicate template for '$type' found (maybe typo?), taking one\n" if @ts > 1;
 				    my $t = $ts[0];                                           # I choose one
 				    $store->assert (bless [ undef,                   	      # LID
 							    undef,                   	      # SCOPE
@@ -136,7 +164,7 @@ topic_definition  : ID types reification
                                                                    :
 								         $_)
 							          )
-							          } @{$t->[TM->ROLES]} 
+							          } @{$t->[TM->ROLES]}
 						            ],
 						            [                       	      # PLAYERS
 							      map {
@@ -149,15 +177,15 @@ topic_definition  : ID types reification
                                                                   :
 								        $_)
 							          )
-							          } @{$t->[TM->PLAYERS]} 
+							          } @{$t->[TM->PLAYERS]}
 						           ],
 							    undef ], 'Assertion' );
 				} else {
-				    die "unknown association type '$type' in inlined association";
+				    die "unknown association type '$type' in inlined association\n";
 				}
 			    }
 			}
-			warn "added toplet $_[1]" if $tracing;
+			warn "added toplet $tn\n" if $tracing;
 		     }
                        characteristics_indication
                         {
@@ -185,12 +213,12 @@ topic_definition  : ID types reification
 			    map { $implicits->{'subclasses'}->{ $_->[0] == TM->NAME ? 'name' : 'occurrence' }->{$_->[2]}++ }
                             grep ($_->[2], @{$_[7]->[0]});                              # get all the characteristics with types
 			}
-			warn "added ".(scalar @{$_[7]->[0]})."characteristics for $_[1]" if $tracing > 1;
+			warn "added ".(scalar @{$_[7]->[0]})." characteristics for $_[1]\n" if $tracing > 1;
 		    }
 ;
 
 reification_indication     : # empty
-                           | REIFIES       ID                        { [ 1, $_[2] ] }            # 0, 1, 2 are just local encoding, nothing relevant 
+                           | REIFIES       ID                        { [ 1, $_[2] ] }            # 0, 1, 2 are just local encoding, nothing relevant
                            | ISREIFIED     ID                        { [ 0, $_[2] ] }
                            | ISINDICATEDBY ID                        { [ 2, $_[2] ] }
 ;
@@ -213,11 +241,11 @@ characteristic_indication  : characteris
                            | indication
 ;
 
-indication                 : SIN           { $_[0]->{USER}->{string} ||= "\n" } string           
+indication                 : SIN           { $_[0]->{USER}->{string} ||= "\n" } string
                                                                      { $_[3] }                   # TODO: replace with ID?
 ;
 
-characteristic             : class { $_[0]->{USER}->{string} ||= "\n" } 
+characteristic             : class { $_[0]->{USER}->{string} ||= "\n" }
                              scope char_type string                  {                           # check whether we are dealing with URIs or strings
 				                                       if ($_[1] == TM->NAME) {  # names are always strings
 									   $_[5] = new TM::Literal  ($_[5], TM::Literal->STRING);
@@ -255,7 +283,7 @@ inline_assocs              : # empty
 inline_assoc               : ID ID                                   { [ $_[1], $_[2] ] }
 ;
 
-template_definition        : LBRACKET 
+template_definition        : LBRACKET
                                       { ($_[0]->{USER}->{templates}, $_[0]->{USER}->{store}) = ($_[0]->{USER}->{store}, $_[0]->{USER}->{templates}); }
                                                                          # flag that we are inside a template
                              association_definition
@@ -271,7 +299,6 @@ association_definition     : LPAREN ID R
 			       my (@roles, @players);
 			       foreach my $m (@{$_[7]}) {                 # one member
 				   my $role = shift @$m;                  # first is role
-				   
 				   while (@$m) {
 				       push @roles, $role;                # roles repeat for every player
 				       my $player = shift @$m;
@@ -287,16 +314,16 @@ association_definition     : LPAREN ID R
 					  # (assoc) reifies http://.... means
 					  #     1) the assoc will be addes as thing (is done already)
 					  #     2) the http:// will be used as one subject indicator
-					  die "reifier of association must be a URI" unless $_[5]->[1] =~ /^\w+:.+/;
+					  die "reifier of association must be a URI\n" unless $_[5]->[1] =~ /^\w+:.+/;
 					  $ms->internalize ($a->[TM::LID], $_[5]->[1]);
 				      } elsif ($_[5]->[0] == 0) {              # something reifies this assoc
 					  # (assoc) is-reified-by xxx   means
 					  #     1) assoc is added as thing (is done already)
 					  #     2) the local identifier is added as thing with the abs URL of the assoc as subject address
-					  die "reifier must be local identifier" unless $_[5]->[1] =~ /^[A-Za-z][A-Za-z0-9_\.-]+$/;
+					  die "reifier must be local identifier\n" unless $_[5]->[1] =~ /^[A-Za-z][A-Za-z0-9_\.-]+$/;
 					  $ms->internalize ($_[5]->[1] => $a);
 				      } else { # this would be 'indication' but we do not want that here
-					  die "indication for associations are undefined";
+					  die "indication for associations are undefined\n";
 				      }
 				  }
 			      }
@@ -307,7 +334,7 @@ association_definition     : LPAREN ID R
 # implicit			   $implicits->{'subclasses'}->{'association'}->{$_[2]}++;
 				   $_[0]->{USER}->{implicits}->{'isa-scope'}->{$_[4]}++ if $_[4];
 			       }
-			       warn "added assertion $_[2]" if $tracing;
+			       warn "added assertion $_[2]\n" if $tracing;
 			   }
 ;
 
@@ -329,13 +356,13 @@ ids                        :
                            | ids ID                           { push @{$_[1]}, $_[2]; $_[1] }
 ;
 
-string                     : STRING EOL { die "empty string in characteristics" unless $_[1]; $_[1] }
+string                     : STRING EOL { die "empty string in characteristics\n" unless $_[1]; $_[1] }
 ;
 
 %%
 
 sub _Error {
-    die "Syntax error: Found ".$_[0]->YYCurtok." but expected ".join (' or ', $_[0]->YYExpect);
+    die "Syntax error: Found ".$_[0]->YYCurtok." but expected ".join (' or ', sort($_[0]->YYExpect))."\n";
 }
 
 use constant CHUNK_SIZE => 32000;
@@ -344,7 +371,7 @@ sub _Lexer {
     my $parser = shift;
     my $yydata = $parser->YYData;
 
-    if (length ($yydata->{INPUT}) < 1024 && $yydata->{OFFSET} < $yydata->{TOTAL}) { 
+    if (length ($yydata->{INPUT}) < 1024 && $yydata->{OFFSET} < $yydata->{TOTAL}) {
 	$yydata->{INPUT}  .= substr ($yydata->{RESERVE}, $yydata->{OFFSET}, CHUNK_SIZE);
 	$yydata->{OFFSET} += CHUNK_SIZE;
     }
@@ -358,13 +385,14 @@ sub _Lexer {
 #warn "lexer ($parser->{USER}->{string}):>>>".$parser->YYData->{INPUT}."<<<";
 
     $$refINPUT =~ s/^\n//so                           and return ('EOL',       	   undef);
+    # note: these are notoriously imprecise and prohibit topics named in, rd, ex, oc or bn.
     $$refINPUT =~ s/^in\b(?![\.-])//o                 and return ('IN',        	   undef);
     $$refINPUT =~ s/^rd\b(?![\.-])//o                 and return ('IN',        	   undef);
     $$refINPUT =~ s/^oc\b(?![\.-])//o                 and return ('OC',        	   undef);
     $$refINPUT =~ s/^ex\b(?![\.-])//o                 and return ('OC',        	   undef);
     $$refINPUT =~ s/^bn\b(?![\.-])//o                 and return ('BN',        	   undef);
 
-    $$refINPUT =~ s/^sin\b(?![\.-])//o                and return ('SIN',       	   undef);
+    $$refINPUT =~ s/^sin\s*(?=:)//o                and return ('SIN',       	   undef);
     $$refINPUT =~ s/^is-a\b(?![\.-])//o               and return ('ISA',       	   undef);
     $$refINPUT =~ s/^reifies\b(?![\.-])//o            and return ('REIFIES',   	   undef);
     $$refINPUT =~ s/^=//o                             and return ('REIFIES',   	   undef);
@@ -380,7 +408,7 @@ sub _Lexer {
 	    $t = "\n<<$1\n";
 
 ##warn "try finding string ..$t..  " ;
-	$$refINPUT =~ s/^:\s*(.*?)\s*$t/\n/s          and 
+	$$refINPUT =~ s/^:\s*(.*?)\s*$t/\n/s          and
 ##            (warn "returning $1" or 1) and
 	    (undef $parser->{USER}->{string}          or  return ('STRING',    $1));
 ##warn "no string";
@@ -404,6 +432,8 @@ sub _Lexer {
     $$refINPUT =~ s/^(\d{4}-\d{1,2}-\d{1,2})(\s+(\d{1,2}):(\d{2}))?//o
                                                       and return ('ID',        sprintf "urn:x-date:%s:%02d:%02d", $1, $3 || 0, $4 || 0); # is a date
 
+    $$refINPUT =~ s/^%include\s+(.*?)(?=\n)//so
+	and return ('INCLUDE',   $1); # positive look-ahead
     $$refINPUT =~ s/^%log\s+(.*?)(?=\n)//so           and return ('LOG',       $1); # positive look-ahead
     $$refINPUT =~ s/^%cancel\s*(?=\n)//so             and return ('CANCEL',    $1); # positive look-ahead
     $$refINPUT =~ s/^%trace\s+(.*?)(?=\n)//so         and return ('TRACE',     $1); # positive look-ahead
@@ -450,10 +480,10 @@ sub parse {
 
     eval {
 	$self->YYParse ( yylex => \&_Lexer, yyerror => \&_Error );
-    }; if ($@ =~ /^Cancelled/) {
-	warn $@;                                                        # de-escalate Cancelling to warning
+    }; if ($@ =~ /^Parsing cancelled/) {
+	warn $@."\n";                                                        # de-escalate Cancelling to warning
     } elsif ($@) {
-	die $@;                                                         # otherwise re-raise the exception
+	die $@."\n";                                                         # otherwise re-raise the exception
     }
 
 
@@ -464,7 +494,7 @@ sub parse {
 	{ # all super/subclasses
 	    foreach my $superclass (keys %{$implicits->{'subclasses'}}) {
 		$store->assert ( map {
-		    bless [ undef, undef, 'is-subclass-of', TM->ASSOC, [ 'superclass', 'subclass' ], [ $superclass, $_ ] ], 'Assertion' 
+		    bless [ undef, undef, 'is-subclass-of', TM->ASSOC, [ 'superclass', 'subclass' ], [ $superclass, $_ ] ], 'Assertion'
 		    }  keys %{$implicits->{'subclasses'}->{$superclass}});
 	    }
 #warn "done with subclasses";
@@ -475,7 +505,7 @@ sub parse {
 	}
 	{ # establishing the scoping topics
 	    $store->assert (map {
-                                 bless [ undef, undef, 'isa', TM->ASSOC, [ 'class', 'instance' ], [ 'scope', $_ ] ], 'Assertion' 
+                                 bless [ undef, undef, 'isa', TM->ASSOC, [ 'class', 'instance' ], [ 'scope', $_ ] ], 'Assertion'
 				 } keys %{$implicits->{'isa-scope'}});
 	}
     }
--- a/yapp/astma2-fact.yp
+++ b/yapp/astma2-fact.yp
@@ -690,7 +690,7 @@ string                     : STRING EOL
 %%
 
 sub _Error {
-    die "Syntax error: Found ".$_[0]->YYCurtok." but expected ".join (' or ', $_[0]->YYExpect);
+    die "Syntax error: Found ".$_[0]->YYCurtok." but expected ".join (' or ', sort($_[0]->YYExpect));
 }
 
 sub _Lexer {
