#!/usr/bin/perl

# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Yerna Lindale.
#
# The Initial Developer of the Original Code is
# the Software Engineering Lab, INTEC, University Ghent.
# Portions created by the Initial Developer are Copyright (C) 2004
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#   Kris De Schutter <kris.deschutter@ugent.be>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****

# This script takes an lll grammar which has possibly been annotated
# with some Prolog code (hence: PAL, or Prolog Annotated LLL), and
# outputs new Prolog code which implements a kind of tree walker for
# xml structures which match that lll grammar.
#
# When matching a node which is not annotated it will check if the
# nodename matches the grammar-rule, and return itself as the result.
#
# Annotations allow you to do additional checks on the structure, or
# to specialize the returned value for a grammar rule.
#
# The user needs to provide predicates for matching literals and
# terminals. These can not be deduced from the grammar.
#
# [ DIRECTIVES ]
#
# Some options can be set from within the source through directives.
# Directives are to appear first in the source file.
#
# [ NAMING ]
#
# A walker has to be named. This make it possible to use multiple
# walkers in a single environment. Without naming their definitions
# would clash.
#
# A walker can be named by adding a naming directive in the input.
# This directive is a single line starting with "#@" (minus the
# quotes). The remainder for this line must be whitespace, save for
# the identifier to use as the walker's name.
#
# If no name was specified the generated code will use "##id##"
# (minus the quotes) as a name. This can then be searched for and
# replaced by external tools.
#
# [ CUTDOWN ]
#
# The generated prolog code will only do matching on children if the
# value which that child returns is bound to a name.
#
# In addition, cutdown will not generate predicates for grammar rules
# which are not active (= whose return value is never bound). This
# helps minimize the size of the generated prolog code, which may
# save some time in loading them into a prolog environment.
#
# Of course, to know which grammar rules are active we need to know
# which rules are used by the programmer in the first place. This is
# what starter directives are for.
#
# [ STARTER DIRECTIVE ]
#
# A starter directive is a directive saying which grammar rules are
# used by the programmer. The format is on line starting with "#:"
# (minus the quotes) and followed by the rule's identifier (and
# nothing else).
#
# [ GRAMMAR ]
#
# The extended grammar for the lll rules is as follows:
#
#   specification : rule+ ;
#
#   rule : identifier code_value? ":" top_disjunction ";" ;
#
#   top_disjunction : { (top_conjunction code_body?) "|" }+ ;
#
#   top_conjunction : (term | "<" identifier ":" term ">")+ ;
#
#   term : basis repitition? ;
#
#   basis : identifier | literal | "%epsilon" | alternation | group ;
#
#   repitition : "+" | "*" | "?" ;
#
#   alternation : "{" basis basis "}" repitition ;
#
#   group : "(" disjunction ")" ;
#
#   disjunction : { conjunction "|" }+ ;
#
#   conjunction : term+ ;
#
# A code body is anything within properly nested curly brackets ("{"
# and "}"; strings are taken into account).
#
# A code value is the same as a code body but starts with "{=" rather
# than just "{".
#
# (Note that this grammar conforms to itself.)
#
# Shortcoming: (for now) alternations are not handled. If the grammar
# does contain an alternation it will be clearly marked in the output
# (something like "##alternation##"). The generated code will not be
# usable.
#
# [ REFERENCE TO A GRAMMAR ]
#
# If you want to you do not need to define all grammar rules in a PAL
# file. You can restrict the content to the rules which need
# annotating, and have the PAL file refer to an LLL file to fill in
# the blanks.
#
# To refer to an external grammar file use the "#>" directive (minus
# the quotes), followed by the filename, and nothing else (apart
# from the usual whitespace).
#
# [ EPILOGUE ]
#
# The annotated grammar can optionally be followed by an epilogue of
# Prolog code. To flag the start of the epilogue insert a line
# starting with "##" (minus the quotes), and followed only by
# whitespace. All following lines will be copied verbatim into the
# output.
#
#
# This tool is part of "Yerna Lindale" (old music).
#
# @author Kris De Schutter <kris.deschutter@ugent.be>
# @date Thu. May 13, 2004

use Getopt::Long;
require PAL;
require PAL_To_Prolog;

# Options...
my $help = 0;
my $version = 0;
my $quiet = 0;
my $file_in = '';
my $file_out = '';

# Process command line options...
GetOptions (
  'help|?' => \$help,
  'version' => \$version,
  'quiet' => \$quiet,
  'in:s' => \$file_in,
  'out:s' => \$file_out,
) or usage_and_exit ();

# Show help if requested...
if ($help) { usage_and_exit (); }

# Show version info if requested...
if ($version) { version_and_exit (); }

# -- input ----------------------------------------------------------

# Prepare input stream...
our $fh_in = 'STDIN';
if ($file_in) {
  open (HANDLE_IN, $file_in) or file_not_found ($file_in);
  $fh_in = 'HANDLE_IN';
}

# Parse PAL file from inputstream...
my $pal_file = PAL::parse (\*$fh_in);

# Close input stream...
close ($fh_in);

my $lll_grammar;
# If the pal file references an lll...
if ($pal_file->{'lll_reference'}) {
  my $reference = $pal_file->{'lll_reference'};

  open (HANDLE_IN, $reference) or file_not_found ("$reference in $file_in");
  $fh_in = 'HANDLE_IN';

  # Parse LLL file from inputstream...
  $lll_grammar = LLL::parse (\*$fh_in);
  $lll_grammar->{'id'} = $pal_file->{'id'};

  # Close input stream...
  close ($fh_in);

  # Fill in blanks in pal using lll...
  my $inconsistencies = $pal_file->complete_using ($lll_grammar);
  if ($inconsistencies) {
    foreach $rule_name (@{$inconsistencies}) {
      print STDERR ("Found '$rule_name' in PAL but not in LLL.\n");
    }
    exit (-401);
  }
}

# -- output ---------------------------------------------------------

# Prepare output stream...
our $fh_out = 'STDOUT';
if ($file_out) {
  open (HANDLE_OUT, ">$file_out") or cannot_open_file ($file_out);
  $fh_out = 'HANDLE_OUT';
}

$pal_file->cut_down ();
my $prolog_generator = PAL_To_Prolog->new ();
$prolog_generator->{'out'} = \*$fh_out;
$pal_file->accept ($prolog_generator);

close ($fh_out);

# -- endgame --------------------------------------------------------

# Optionally print a report...
if (!$quiet) {

  if ($file_in) {
    print STDERR ("PAL file '$file_in':\n");
  } else {
    print STDERR ("PAL from STDIN:\n");
  }

  my $pal_line_count = $pal_file->{'line_count'};  
  print STDERR ("  Processed $pal_line_count line(s).\n");

  my $pal_name = $pal_file->{'name'};
  if ($pal_name eq '##id##') {
    print STDERR ("  Walker was not named in input.\n");
  } else {
    print STDERR ("  Walker is named '$pal_name'.\n");
  }

  my $rule_count = $pal_file->{'rule_count_in_input'};
  print STDERR ("  Matched $rule_count rule(s).\n");

  if ($pal_file->{'epilogue'} ne '') {
    print STDERR ("  An epilogue was found and processed.\n");
  }

  if ($pal_file->{'lll_reference'}) {
    my $reference = $pal_file->{'lll_reference'};
    print STDERR ("  References '$reference'.\n");
  }

  if ($lll_grammar) {
    my $reference = $pal_file->{'lll_reference'};
    print STDERR ("LLL file '$reference':\n");

    my $lll_line_count = $lll_grammar->{'line_count'};  
    print STDERR ("  Processed $lll_line_count line(s).\n");
    
    my $lll_rule_count = @{$lll_grammar->{'rules'}};
    print STDERR ("  Matched $lll_rule_count rule(s).\n");

    print STDERR ("PAL definition completed using referenced LLL grammar.\n");
  }

  my $active_rule_count = @{$pal_file->{'rules'}};
  print STDERR ("PAL definition cut down to $active_rule_count active rule(s).\n");
  
  my $predicate_count = $prolog_generator->{'predicate_count'};
  print STDERR ("Generated $predicate_count Prolog predicate(s).\n");

  if ($file_out) {
    print STDERR ("Output written to file '$file_out'.\n");
  }
}

exit (0);


# ===================================================================
# Boilerplating for script...                                       =
# ===================================================================

sub usage_and_exit {
  print STDERR ("Usage: paltopl [OPTIONS]...\n");
  print STDERR ("Where OPTIONS is one of:\n");
  print STDERR ("  -in FILE      read from FILE rather than STDIN\n");
  print STDERR ("  -out FILE     write to FILE rather than STDOUT\n");
  print STDERR ("  -quiet        no report at end of run\n");
  print STDERR ("  -version      show version information\n");
  print STDERR ("  -help         print this message\n");
  print STDERR ("\n");
  exit (0);
}

# ===================================================================

sub version_and_exit {
  print STDERR ("This is \"paltopl\", version 0.4.0.\n");
  print STDERR ("Part of Yerna Lindale (old music).\n");
  exit (0);
}

# ===================================================================

sub file_not_found {
  print STDERR ("File not found: $_[0]\n");
  exit (-101);
}

# ===================================================================

sub cannot_open_file {
  print STDERR ("Cannot open file: $_[0]\n");
  exit (-102);
}
