#!/usr/bin/perl

# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Yerna Lindale.
#
# The Initial Developer of the Original Code is
# the Software Engineering Lab, INTEC, University Ghent.
# Portions created by the Initial Developer are Copyright (C) 2004
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#   Kris De Schutter <kris.deschutter@ugent.be>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****

package LLL;

# ===================================================================
# Main method...                                                    =
# ===================================================================

sub parse {
  my $fh_in = $_[0];

  # Parse stream into tokens...
  my ($line_count, $tokens) = scan ($fh_in);

  # Will hold the grammar...
  my $grammar = LLL_Grammar->new ();
  $grammar->{'line_count'} = $line_count;

  # Current token parsing is at...
  my $position = 0;
  
  # While there is more input to process...
  while ($position < @$tokens) {
    # We match a rule starting at the current position...
    my ($len, $rule) = match_rule ($tokens, $position);
  
    if ($len == -1) {
      # No rule found even though we expected one => error in input.
      my $culprit = $tokens->[$position];
      parse_error ("incorrect rule starting with $culprit");
    }
  
    # Add rule...
    $grammar->add ($rule);
    
    # Update our position...
    $position += $len;
  }

  return $grammar;
}

# ===================================================================
# Scanning...                                                       =
# ===================================================================

sub scan {
  my $fh_in = $_[0];

  # Bookkeeping...
  my $line_count = 0;

  # This keeps track of tokens found...
  my $tokens = [];
  
  # Current line...
  my $line = '';
  
  # While end of file has not been reached...
  while (1) {

    # Read in new line if neccesary...
    if ($line =~ /^\s*$/) {
      if ($line = <$fh_in>) {
        # Loose trailing newlines...
        chomp ($line);
  
        # Bookkeeping...
        $line_count++;
        
        # Start loop again...
        next;
        
      } else {
        # Exit loop...
        last;
      }
    }
  
    # Skip leading whitespace...
    $line =~ /^\s*(.*)$/;
    $line = "$1";

    # While line has characters...
    while ($line !~ /^\s*$/) {

      # DEBUG:
      # print STDERR ("<$line>\n");

      # Tokenize the stream...
      if ($line =~ /^(\"|\')(.*)/) {

        # A string literal...
        my $literal = "";
        my $delimiter = "$1";
        $line = "$2";

        my $scanning_for_literal = 1;
        while ($scanning_for_literal == 1) {

          if ($line =~ /^(\\.)(.*)/) {
            $literal = "$literal$1";
            $line = "$2";

          } elsif ($line =~ /^$delimiter(.*)/) {
            $line = "$1";
            $scanning_for_literal = 0;

          } elsif ($line =~ /^$/) {
            scanner_error ("incomplete literal at line $line_count\n");

          } else {
            $line =~ /^([^\\$delimiter]+)(.*)$/;
            $literal = "$literal$1";
            $line = "$2";
          }
        }

        # DEBUG:
        # print STDERR ("% LITERAL: $delimiter$literal$delimiter\n");

        $tokens->[@$tokens] = "$delimiter$literal$delimiter";

      } elsif ($line =~ /^([0-9]+)([^0-9].*)?$/) {

        # An integer literal...
        my $literal = "$1";
        $line = "$2";

        # DEBUG:
        # print STDERR ("% LITERAL: $literal\n");

        $tokens->[@$tokens] = $literal;

      } elsif ($line =~ /^([a-zA-Z][a-zA-Z0-9_]*)([^a-zA-Z0-9_].*)?$/) {

        # An identifier...
        my $identifier = "$1";
        $line = "$2";

        # DEBUG:
        # print STDERR ("% IDENTIFIER: $identifier\n");

        $tokens->[@$tokens] = $identifier;

      } elsif ($line =~ /^(\+|\*|\?|\(|\)|\:|\;|\|)(.*)$/) {

        # A structural token...
        my $token = "$1";
        $line = "$2";

        # DEBUG:
        # print STDERR ("% TOKEN: $token\n");

        $tokens->[@$tokens] = $token;

      } else {

        # Scanner error!
        $line =~ /^(.)/;
        scanner_error ("unexpected token in input: '$1' ($line_count)\n");

      }

      # Skip leading whitespace...
      $line =~ /^\s*(.*)$/;
      $line = "$1";
    }
  } # End of parsing lll rules.

  # print STDERR ("> $line_count\n");
  # foreach $t (@{$tokens}) {
  #   print STDERR (": $t\n");
  # }

  return $line_count, $tokens;
}

# ===================================================================
# Parsing...                                                        =
# ===================================================================

# These are ment to match tokens, not arbitrary strings !

sub token_is_identifier {
  return $_[0] =~ /^([a-zA-Z][a-zA-Z0-9_]*)$/;
}

sub token_is_literal {
  return $_[0] =~ /^(\".*\"|\'.*\'|[0-9]+)$/;
}

# ===================================================================

sub match_rule {
  # rule : identifier ":" disjunction ";"

  my $tokens = $_[0];
  my $position = $_[1];
  my $len = 0;

  # DEBUG:
  # print STDERR ("RULE ? at $position\n");

  my $rule = LLL_Rule->new ();

  # IDENTIFIER...
  if (not token_is_identifier($tokens->[$position])) {
    # DEBUG:
    # print STDERR ("RULE: NO IDENTIFIER\n");
    return -1, undef;
  }
  # DEBUG:
  # print STDERR ("RULE: IDENTIFIER '$tokens->[$position]' FOUND\n");

  $rule->{'name'} = $tokens->[$position];
  $position++; $len++;

  # COLON...
  if (not ($tokens->[$position] =~ /^:$/)) {
    # DEBUG:
    # print STDERR ("RULE: NO COLON\n");
    return -1, undef;
  }
  $position++; $len++;
  # DEBUG:
  # print STDERR ("RULE: COLON FOUND\n");

  # DISJUNCTION...
  # my $disj_len = 0;
  (my $disj_len, $rule->{'body'}) = match_disjunction ($tokens, $position);
  if (disj_len == -1) {
    # DEBUG:
    # print STDERR ("RULE: NO DISJUNCTION\n");
    return -1, undef;
  }
  $position += $disj_len;
  $len += $disj_len;
  # DEBUG:
  # print STDERR ("RULE: DISJUNCTION FOUND\n");

  # SEMICOLON...
  if (not ($tokens->[$position] =~ /^;$/)) {
    # DEBUG:
    # print STDERR ("RULE: NO SEMICOLON\n");
    return -1, undef;
  }
  $position++; $len++;
  # DEBUG:
  # print STDERR ("RULE: SEMICOLON FOUND\n");

  return $len, $rule;
}

# ===================================================================

sub match_disjunction {
  # disjunction : { conjunction "|" }+

  my $tokens = $_[0];
  my $position = $_[1];
  my $disjunction = LLL_Disjunction->new ();
  my $len = 0;

  # DEBUG:
  # print STDERR ("DISJUNCTION ? at $position\n");

  # CONJUNCTION...
  my ($len_conjunction, $conjunction) = match_conjunction($tokens, $position);
  if ($len_conjunction == -1) {
    # DEBUG:
    # print STDERR ("DISJUNCTION: NO CONJUNCTION\n");
    return -1, undef;
  }
  $position += $len_conjunction;
  $len += $len_conjunction;
  # DEBUG:
  # print STDERR ("DISJUNCTION: CONJUNCTION FOUND\n");

  $disjunction->add ($conjunction);

  my $more_conjunctions = 1;
  do {
    # OPTION ?
    # DEBUG:
    # print STDERR ("DISJUNCTION: looking at $tokens->[$position]\n");
    if ($tokens->[$position] =~ /^\|$/) {
      $more_conjunctions = 1;
      # DEBUG:
      # print STDERR ("DISJUNCTION: OPTION FOUND\n");
      $position++; $len++;
    } else {
      $more_conjunctions = 0;
    }

    if ($more_conjunctions == 1) {
      # CONJUNCTION...
      ($len_conjunction, $conjunction) = match_conjunction($tokens, $position);
      if ($len_conjunction == -1) {
        # DEBUG:
        # print STDERR ("DISJUNCTION: NO CONJUNCTION\n");
        return -1, undef;
      }
      $position += $len_conjunction;
      $len += $len_conjunction;
      # DEBUG:
      # print STDERR ("DISJUNCTION: CONJUNCTION FOUND\n");

      $disjunction->add ($conjunction);
    }

  } while ($more_conjunctions == 1);

  # DEBUG:
  # print STDERR ("DISJUNCTION: FOUND [$len]\n");
  return $len, $disjunction;
}

# ===================================================================

sub match_conjunction {
  # conjunction : term+

  my $tokens = $_[0];
  my $position = $_[1];
  my $conjunction = LLL_Conjunction->new ();
  my $len = 0;

  # DEBUG:
  # print STDERR ("CONJUNCTION ? at $position\n");

  my ($len_term, $term) = match_term($tokens, $position);
  if ($len_term == -1) {
    # DEBUG:
    # print STDERR ("CONJUNCTION: NO TERM\n");
    return -1, undef;
  }
  $position += $len_term;
  $len += $len_term;
  # DEBUG:
  # print STDERR ("CONJUNCTION: TERM FOUND\n");

  $conjunction->add ($term);

  do {
    ($len_term, $term) = match_term($tokens, $position);
    if ($len_term != -1) {
      $position += $len_term;
      $len += $len_term;
      # DEBUG:
      # print STDERR ("CONJUNCTION: TERM FOUND\n");
      $conjunction->add ($term);
    }
  } while ($len_term != -1);

  # DEBUG:
  # print STDERR ("CONJUNCTION: FOUND [$len]\n");
  return $len, $conjunction;
}

# ===================================================================

sub match_term {
  # term : basis repitition?

  my $tokens = $_[0];
  my $position = $_[1];
  my $term = LLL_Term->new ();
  my $len = 0;

  # DEBUG:
  # print STDERR ("TERM ? at $position\n");

  # BASIS ...
  my ($len_basis, $basis) = match_basis($tokens, $position);
  if ($len_basis == -1) {
    # DEBUG:
    # print STDERR ("TERM: NO BASIS\n");
    return -1, undef;
  }
  $position += $len_basis;
  $len += $len_basis;
  # DEBUG:
  # print STDERR ("TERM: BASIS FOUND\n");

  $term->{'basis'} = $basis;

  # REPITITION ?
  my ($len_repitition, $repitition) = match_repitition($tokens, $position);
  if ($len_repitition != -1) {
    $position += $len_repitition;
    $len += $len_repitition;
    # DEBUG:
    # print STDERR ("TERM: REPITITION FOUND\n");
    $term->{'repitition'} = $repitition;
  }

  return $len, $term;
}

# ===================================================================

sub match_basis {
  # basis : identifier | literal | "%epsilon" | alternation | group

  my $tokens = $_[0];
  my $position = $_[1];
  my $len = 0;

  # DEBUG:
  # print STDERR ("BASIS ? at $position\n");

  # IDENTIFIER ?
  if (token_is_identifier ($tokens->[$position])) {
    # DEBUG:
    # print STDERR ("BASIS: IDENTIFIER '$tokens->[$position]' FOUND\n");
    my $identifier = LLL_Identifier->new ();
    $identifier->{'value'} = $tokens->[$position];
    return 1, $identifier;
  }

  # LITERAL ?
  if (token_is_literal ($tokens->[$position])) {
    # DEBUG:
    # print STDERR ("BASIS: LITERAL $tokens->[$position] FOUND\n");
    my $literal = LLL_Literal->new ();
    $literal->{'value'} = $tokens->[$position];
    return 1, $literal;
  }

  # EPSILON ?
  if ($tokens->[$position] =~ /^\%epsilon$/) {
    # DEBUG:
    # print STDERR ("BASIS: EPSILON FOUND\n");
    my $epsilon = LLL_Epsilon->new ();
    return 1, $epsilon;
  }

  # ALTERNATION ?
  ($len, my $alternation) = match_alternation ($tokens, $position);
  if ($len != -1) {
    # DEBUG:
    # print STDERR ("BASIS: ALTERNATION FOUND\n");
    return $len, $alternation;
  }

  # GROUP ?
  ($len, my $group) = match_group ($tokens, $position);
  if ($len != -1) {
    # DEBUG:
    # print STDERR ("BASIS: GROUP FOUND\n");
    return $len, $group;
  }

  # NO MATCH !
  # DEBUG:
  # print STDERR ("BASIS: NO identifier, literal, epsilon, alternation OR group\n");
  return -1, undef;
}

# ===================================================================

sub match_repitition {
  # repitition : "+" | "*" | "?"

  my $tokens = $_[0];
  my $position = $_[1];
  my $len = 0;

  # DEBUG:
  # print STDERR ("REPITITION ? at $position\n");

  if ($tokens->[$position] =~ /^\+$/) {
    # DEBUG:
    # print STDERR ("REPITITION: + FOUND\n");
    return 1, 'plus';
  }

  if ($tokens->[$position] =~ /^\*$/) {
    # DEBUG:
    # print STDERR ("REPITITION: * FOUND\n");
    return 1, 'star';
  }

  if ($tokens->[$position] =~ /^\?$/) {
    # DEBUG:
    # print STDERR ("REPITITION: ? FOUND\n");
    return 1, 'opt';
  }

  # DEBUG:
  # print STDERR ("REPITITION: NO \"+\", \"*\" OR \"?\"\n");

  return -1, undef;
}

# ===================================================================

sub match_alternation {
  # alternation : "{" basis basis "}" repitition

  my $tokens = $_[0];
  my $position = $_[1];
  my $alternation = LLL_Alternation->new ();
  my $len = 0;

  # DEBUG:
  # print STDERR ("ALTERNATION ? at $position\n");

  # LEFT BRACE...
  if (not ($tokens->[$position] =~ /^\{$/)) {
    # DEBUG:
    # print STDERR ("ALTERNATION: NO LEFT BRACE\n");
    return -1, undef;
  }
  $position++; $len++;
  # DEBUG:
  # print STDERR ("ALTERNATION: LEFT BRACE FOUND\n");

  # BASIS...
  my ($len_basis_1, $basis_1) = match_basis($tokens, $position);
  if ($len_basis_1 == -1) {
    # DEBUG:
    # print STDERR ("ALTERNATION: NO FIRST BASIS\n");
    return -1, undef;
  }
  $position += $len_basis_1;
  $len += $len_basis_1;
  # DEBUG:
  # print STDERR ("ALTERNATION: FIRST BASIS FOUND\n");

  # BASIS...
  my ($len_basis_2, $basis_2) = match_basis($tokens, $position);
  if ($len_basis_2 == -1) {
    # DEBUG:
    # print STDERR ("ALTERNATION: NO SECOND BASIS\n");
    return -1, undef;
  }
  $position += $len_basis_2;
  $len += $len_basis_2;
  # DEBUG:
  # print STDERR ("ALTERNATION: SECOND BASIS FOUND\n");

  # RIGHT BRACE...
  if (not ($tokens->[$position] =~ /^\}$/)) {
    # DEBUG:
    # print STDERR ("ALTERNATION: NO RIGHT BRACE\n");
    return -1, undef;
  }
  $position++; $len++;
  # DEBUG:
  # print STDERR ("ALTERNATION: RIGHT BRACE FOUND\n");

  # REPITITION...
  my ($len_repitition, $repitition) = match_repitition($tokens, $position);
  if ($len_repitition == -1) {
    # DEBUG:
    # print STDERR ("ALTERNATION: NO REPITITION\n");
    return -1, undef;
  }
  $position += $len_repitition;
  $len += $len_repitition;
  # DEBUG:
  # print STDERR ("ALTERNATION: REPITITION FOUND\n");

  # Building the result...
  $alternation->{'first'} = $basis_1;
  $alternation->{'second'} = $basis_2;
  $alternation->{'repitition'} = $repitition->[0];

  return $len, $alternation;
}

# ===================================================================

sub match_group {
  # group : "(" disjunction ")"

  my $tokens = $_[0];
  my $position = $_[1];

  # DEBUG:
  # print STDERR ("GROUP ? at $position\n");

  # LEFT PARENTHESIS...
  if (not ($tokens->[$position] =~ /^\($/)) {
    # DEBUG:
    # print STDERR ("GROUP: NO LEFT PARENTHESIS\n");
    return -1, undef;
  }
  $position++;
  # DEBUG:
  # print STDERR ("GROUP: LEFT PARENTHESIS FOUND\n");

  # DISJUNCTION...
  my ($len, $group) = match_disjunction ($tokens, $position);
  if ($len == -1) {
    # DEBUG:
    # print STDERR ("GROUP: NO DISJUNCTION\n");
    return -1, undef;
  }
  $position += $len;
  # DEBUG:
  # print STDERR ("GROUP: DISJUNCTION FOUND\n");

  # RIGHT PARENTHESIS...
  if (not ($tokens->[$position] =~ /^\)$/)) {
    # DEBUG:
    # print STDERR ("GROUP: NO RIGHT PARENTHESIS\n");
    return -1, undef;
  }
  $position++;
  # DEBUG:
  # print STDERR ("GROUP: RIGHT PARENTHESIS FOUND\n");

  return ($len + 2), $group;
}

# ===================================================================
# Error routines...                                                 =
# ===================================================================

sub scanner_error {
  print STDERR ("Scanner error: $_[0]\n");
  exit (-120);
}

sub parse_error {
  print STDERR ("Parse error: $_[0]\n");
  exit (-140);
}

# ===================================================================
# Classes...                                                        =
# ===================================================================

package LLL_Grammar;

  sub new {
    my $class = shift;

    return bless {
      'rules' => []
    }, $class;
  }

  sub add {
    my $self = shift;
    my $rule = $_[0];

    my $rules = $self->{'rules'};
    $rules->[@{$rules}] = $rule;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Grammar/" . @{$self->{'rules'}};
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_grammar ($self) == 1) {
      foreach $rule (@{$self->{'rules'}}) {
        $rule->accept ($visitor);
      }
    }
    $visitor->leave_grammar ($self);
  }

# ===================================================================

package LLL_Rule;

  sub new {
    my($class) = shift;

    return bless {
      'name' => 'unnamed rule',
      'body' => undef
    }, $class;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Rule (" . $self->{'name'} . ")";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_rule ($self) == 1) {
      $self->{'body'}->accept ($visitor);
    }
    $visitor->leave_rule ($self);
  }

# ===================================================================

package LLL_Disjunction;

  sub new {
    my($class) = shift;

    return bless {
      'conjunctions' => []
    }, $class;
  }

  sub add {
    my $self = $_[0];
    my $conjunction = $_[1];

    my $conjunctions = $self->{'conjunctions'};
    $conjunctions->[@{$conjunctions}] = $conjunction;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Disjunction (" . @{$self->{'conjunctions'}} . ")";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_disjunction ($self) == 1) {
      foreach $conjunction (@{$self->{'conjunctions'}}) {
        $conjunction->accept ($visitor);
      }
    }
    $visitor->leave_disjunction ($self);
  }

# ===================================================================

package LLL_Conjunction;

  sub new {
    my($class) = shift;

    return bless {
      'terms' => []
    }, $class;
  }

  sub add {
    my $self = $_[0];
    my $term = $_[1];

    my $terms = $self->{'terms'};
    $terms->[@{$terms}] = $term;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Conjunction (" . @{$self->{'terms'}} . ")";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_conjunction ($self) == 1) {
      foreach $term (@{$self->{'terms'}}) {
        $term->accept ($visitor);
      }
    }
    $visitor->leave_conjunction ($self);
  }

# ===================================================================

package LLL_Term;

  sub new {
    my($class) = shift;

    return bless {
      'basis' => undef,
      'repitition' => 'once'
    }, $class;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Term (" . @{$self->{'repitition'}} . ")";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_term ($self) == 1) {
      $self->{'basis'}->accept ($visitor);
    }
    $visitor->leave_term ($self);
  }

# ===================================================================

package LLL_Alternation;

  sub new {
    my($class) = shift;

    return bless {
      'first' => undef,
      'second' => undef,
      'repitition' => undef
    }, $class;
  }

  sub as_string {
    my $self = $_[0];
    return "LLL_Alternation";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    if ($visitor->enter_alternation ($self) == 1) {
      $self->{'first'}->accept ($visitor);
      $self->{'second'}->accept ($visitor);
    }
    $visitor->leave_alternation ($self);
  }

# ===================================================================

package LLL_Identifier;

  sub new {
    my($class) = shift;

    return bless {
      'value' => undef
    }, $class;
  }

  sub as_string {
    my $self = $_[0];
    return $self->{'value'};
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    $visitor->do_identifier ($self);
  }

# ===================================================================

package LLL_Literal;

  sub new {
    my($class) = shift;

    return bless {
      'value' => undef
    }, $class;
  }

  sub as_string {
    my $self = $_[0];
    return $self->{'value'};
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    $visitor->do_literal ($self);
  }

# ===================================================================

package LLL_Epsilon;

  sub new {
    my($class) = shift;
    return bless {}, $class;
  }

  sub as_string {
    return "%epsilon";
  }

  sub accept {
    my $self = shift;
    my $visitor = $_[0];
    
    $visitor->do_epsilon ($self);
  }









1;
