-------------------------------------------------------------------------------
--
-- Copyright (C) 1999 FlightSafety International and Ted Dennison
--
-- This file is part of the OpenToken package.
--
-- The OpenToken package is free software; you can redistribute it and/or
-- modify it under the terms of the  GNU General Public License as published
-- by the Free Software Foundation; either version 2, or (at your option)
-- any later version. The OpenToken package is distributed in the hope that
-- it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for  more details.  You should have received
-- a copy of the GNU General Public License  distributed with the OpenToken
-- package;  see file GPL.txt.  If not, write to  the Free Software Foundation,
-- 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.
--
-- As a special exception,  if other files  instantiate  generics from this
-- unit, or you link this unit with other files to produce an executable,
-- this unit does not by itself cause the resulting executable to be
-- covered by the GNU General Public License.  This exception does not
-- however invalidate any other reasons why the executable file might be
-- covered by the GNU Public License.
--
-- Maintainer: Ted Dennison (dennison@telepath.com)
--
-- This software was originally developed by the following company, and was
-- released as open-source software as a service to the community:
--
--           FlightSafety International Simulation Systems Division
--                    Broken Arrow, OK  USA  918-259-4000
--
-- Update History:
-- $Log: token-analyzer.adb,v $
-- Revision 1.4  1999/10/08 22:47:51  Ted
-- Add default token functionality
--
-- Revision 1.3  1999/08/17 03:07:54  Ted
-- Add log line
--
-------------------------------------------------------------------------------

with Ada.Exceptions;
with Ada.Strings;

-------------------------------------------------------------------------------
-- This package implements a mostly full-strength tokenizer (or lexical
-- analyizer).
-------------------------------------------------------------------------------
package body Token.Analyzer is

   type Match_List is array (Token_ID) of Analysis_Verdict;

   -------------------------------------------------------------------------
   -- Find all the characters that *aren't* part of a token match.
   --
   -- This routine should be called when the first character in the
   -- analyzer's buffer is unmatchable. It succesively checks each character
   -- thereafter until it finds one that *does* start a valid token.
   --
   -- Last_Unmatched will be set to the index of the last unmatchable
   -- character.
   -------------------------------------------------------------------------
   procedure Find_Non_Match (Last_Unmatched : out    Natural;
                             Analyzer       : in out Instance) is

      -- The table of token matches
      Match : Match_List;

      Possible_Matches : Boolean;
      Current_Char     : Integer;

   begin

      -- Loop to find unrecognized characters
      Last_Unmatched := 1;

      Check_For_Unrecognized : loop

         -- Loop to see if Last_Unmatched + 1 starts a valid token
         Current_Char := Last_Unmatched + 1;

         -- Clear the state of all the tokens
         for Token_Index in Token_Id loop
            Clear (Analyzer.Token_List(Token_Index).all);
         end loop;
         Match := (others => So_Far_So_Good);

         Check_For_Match : loop

            -- Get more data when we run out
            if Current_Char > Buffers.Length(Analyzer.Buffer) then

               Buffers.Append (Source   => Analyzer.Buffer,
                               New_Item => Analyzer.Get_More_Text.all);
            end if;

            -- Loop to see if this character starts a match on any token.
            -- We will assume that there are no possible matches until proven
            -- otherwise.
            Possible_Matches := False;

            for Token_Index in Token_ID loop

               if Match(Token_Index) /= Failed then

                  Analyze (The_Token => Analyzer.Token_List(Token_Index).all,
                           Next_Char => Buffers.Element( Source => Analyzer.Buffer,
                                                         Index  => Current_Char),
                           Verdict   => Match(Token_Index));


               end if;

               case Match(Token_Index) is
                  -- If we found a match, quit.
                  when Matches =>
                     return;

                  -- If we *could* have a match, check the next character
                  when So_Far_So_Good =>
                     Possible_Matches :=
                       Buffers.Element( Source => Analyzer.Buffer,
                                        Index  => Current_Char) /= EOF_Character;

                  when others =>
                     null;
               end case;

            end loop;

            exit Check_For_Match when not Possible_Matches;

            Current_Char := Current_Char + 1;

         end loop Check_For_Match;

         Last_Unmatched := Last_Unmatched + 1;

         exit when Buffers.Element( Source => Analyzer.Buffer,
                                    Index  => Last_Unmatched) = EOF_Character;

      end loop Check_For_Unrecognized;

   end Find_Non_Match;


   ----------------------------------------------------------------------------
   -- Find the the best (aka: longest) match for the tokens in input stream
   -- The ID and length of the best token match will be returned.
   --
   -- This routine attempts to find the longest possible string in the
   -- Analyzer's buffer (starting at index 1) that matches a token. If the
   -- buffer runs out of characters during this process, it will be refilled
   -- from the Analyzer's text feeder function.
   ----------------------------------------------------------------------------
   procedure Find_Best_Match
     (Analyzer          : in out Instance;
      Best_Match_Token  :    out Token_ID;
      Best_Match_Length :    out Natural
     ) is

      -- The table of token matches
      Match : Match_List;

      More_Possible_Matches : Boolean;

      Current_Char : Integer;

   begin

      -- Clear the state of all the tokens
      for Token_Index in Token_Id loop
         Clear (Analyzer.Token_List(Token_Index).all);
      end loop;
      Match := (others => So_Far_So_Good);

      Best_Match_Length     := 0;
      Current_Char          := 1;
      More_Possible_Matches := True;

      while More_Possible_Matches loop

         -- Get more data when we run out
         if Current_Char > Buffers.Length(Analyzer.Buffer) then

            Buffers.Append (Source   => Analyzer.Buffer,
                            New_Item => Analyzer.Get_More_Text.all);
         end if;

         -- Assume no more matches until proven otherwise
         More_Possible_Matches := False;

         -- Check all the token Analyzers...
         for Token_Index in Token_Id loop

            -- check only tokens that haven't yet failed...
            if Match(Token_Index) /= Failed then

               -- Dispatch to the token's analyze routine with the new character
               Analyze (The_Token => Analyzer.Token_List(Token_Index).all,
                        Next_Char => Buffers.Element( Source => Analyzer.Buffer,
                                                      Index  => Current_Char),
                        Verdict   => Match(Token_Index));

               -- If its the longest match yet, save it.
               if  Match(Token_Index) = Matches and Best_Match_Length < Current_Char then

                  Best_Match_Token  := Token_Index;
                  Best_Match_Length := Current_Char;

               end if;

               -- If we find at least one possible match and we aren't at the end of the file,
               -- keep checking.
               if Match(Token_Index) /= Failed then
                  More_Possible_Matches := Buffers.Element( Source => Analyzer.Buffer,
                                                            Index  => Current_Char) /=
                    EOF_Character;
               end if;
            end if;

         end loop;

         Current_Char := Current_Char + 1;

      end loop;

   end Find_Best_Match;

   ----------------------------------------------------------------------------
   -- Return an Analyzer with the given syntax and text feeder function.
   ----------------------------------------------------------------------------
   function Initialize (Language_Syntax : in Syntax;
                        Feeder          : in Text_Feeder := Input_Feeder'Access
                       ) return Instance is
      New_Analyzer : Instance;
   begin
      New_Analyzer.Token_List := Language_Syntax;
      New_Analyzer.Get_More_Text := Feeder;
      New_Analyzer.Has_Default := False;

      New_Analyzer.Line        := 1;
      New_Analyzer.Column      := 1;
      New_Analyzer.Buffer      := Buffers.Null_Bounded_String;
      New_Analyzer.Next_Line   := 1;
      New_Analyzer.Next_Column := 1;

      return New_Analyzer;
   end Initialize;

   function Initialize (Language_Syntax : in Syntax;
                        Default         : in Token_ID;
                        Feeder          : in Text_Feeder := Input_Feeder'Access
                       ) return Instance is
      New_Analyzer : Instance;
   begin
      New_Analyzer.Token_List    := Language_Syntax;
      New_Analyzer.Get_More_Text := Feeder;
      New_Analyzer.Has_Default   := True;
      New_Analyzer.Default_Token := Default;

      New_Analyzer.Line        := 1;
      New_Analyzer.Column      := 1;
      New_Analyzer.Buffer      := Buffers.Null_Bounded_String;
      New_Analyzer.Next_Line   := 1;
      New_Analyzer.Next_Column := 1;

      return New_Analyzer;
   end Initialize;

   ----------------------------------------------------------------------------
   -- Set the analyzer's text feeder function to be the given function.
   ----------------------------------------------------------------------------
   procedure Set_Text_Feeder (Analyzer : in out Instance; Feeder : in Text_Feeder) is
   begin
      Analyzer.Get_More_Text := Feeder;
   end Set_Text_Feeder;


   ----------------------------------------------------------------------------
   -- Set the Analyzer's syntax to the given value.
   ----------------------------------------------------------------------------
   procedure Set_Syntax (Analyzer : in out Instance; Language_Syntax : in Syntax) is
   begin
      Analyzer.Token_List := Language_Syntax;
   end Set_Syntax;

   ----------------------------------------------------------------------------
   -- Set the analyzer's default token to the given ID.
   --
   -- If Find_Next can't find a matching token, it will set Token to this token
   -- id, instead of raising syntax error. The Lexeme in this situation will
   -- be contain all the contiguous characters that fail to match an token.
   -- In practice this will be much less efficient than an "error" token that
   -- explicitly matches unmatchable strings. But often those are quite
   -- difficult to construct.
   -- The default token will be checked for legitimate matches. If this is not
   -- the behavior you want, it would be best to use a token that can't match
   -- any legitimate string (eg: Token.Nothing)
   ----------------------------------------------------------------------------
   procedure Set_Default (Analyzer : in out Instance;
                          Default  : in     Token_ID
                         ) is
   begin
      Analyzer.Has_Default   := True;
      Analyzer.Default_Token := Default;
   end Set_Default;

   ----------------------------------------------------------------------------
   -- Reset the analyzer to have *no* default token ID. If Find_Next doesn't
   -- find a matching token, Syntax_Error will be raised.
   ----------------------------------------------------------------------------
   procedure Unset_Default (Analyzer : in out Instance) is
   begin
      Analyzer.Has_Default := False;
   end Unset_Default;

   ----------------------------------------------------------------------------
   -- Get the next token.
   --
   -- Raises Syntax_Error or returns the default token if no token could be
   -- found.
   ----------------------------------------------------------------------------
   procedure Find_Next (Analyzer : in out Instance) is

      EOLs_Found : Integer;

      Matched_Token  : Token_ID;
      Matched_Length : Natural;

   begin

      loop

         -- Find the best token match from the input stream
         Find_Best_Match
           (Analyzer          => Analyzer,
            Best_Match_Token  => Matched_Token,
            Best_Match_Length => Matched_Length);


         --
         -- If we didn't find a match, its a either syntax error or a match to the default token.

         if Matched_Length = 0 then
            if Analyzer.Has_Default then

               -- Find all the characters that *aren't* part of a match
               Find_Non_Match
                 (Last_Unmatched => Matched_Length,
                  Analyzer       => Analyzer);
               Matched_Token := Analyzer.Default_Token;

            else
               Ada.Exceptions.Raise_Exception(Syntax_Error'Identity, "Unrecognized character '" &
                                              Buffers.Element (Source => Analyzer.Buffer, Index => 1) & "'");
            end if;
         end if;

         --
         -- Update the line and column count

         Analyzer.Line   := Analyzer.Next_Line;
         Analyzer.Column := Analyzer.Next_Column;

         EOLs_Found := Buffers.Count (Source  =>
           Buffers.Head (Source => Analyzer.Buffer,
                         Count  => Matched_Length),
                                      Pattern => (1 => EOL_Character));
         Analyzer.Next_Line := Analyzer.Next_Line + EOLs_Found;

         if EOLs_Found = 0 then
            Analyzer.Next_Column := Analyzer.Next_Column + Matched_Length;
         else
            Analyzer.Next_Column := 1 + Matched_Length - Buffers.Index
              (Source  => Buffers.Head (Source => Analyzer.Buffer,
                                        Count  => Matched_Length),
               Pattern => (1 => EOL_Character),
               Going   => Ada.Strings.Backward);
         end if;

         --
         -- Quit when we find a reportable token

         exit when Analyzer.Token_List(Matched_Token).Report;

         --
         -- Ditch the last token to make room for more parsing

         Buffers.Tail (Source => Analyzer.Buffer,
                       Count  => Buffers.Length (Analyzer.Buffer) - Matched_Length);

      end loop;

      -- Save off the information for the lexeme we found
      Analyzer.Lexeme := Buffers.Head (Source => Analyzer.Buffer, Count => Matched_Length);
      Analyzer.Last_Token := Matched_Token;

      -- Ditch the last token to make room for more parsing
      Buffers.Tail (Source => Analyzer.Buffer,
                    Count  => Buffers.Length (Analyzer.Buffer) - Matched_Length);

   end Find_Next;

   ----------------------------------------------------------------------------
   -- Returns the current text line at which processing will resume.
   ----------------------------------------------------------------------------
   function Line (Analyzer : in Instance) return Natural is
   begin
      return Analyzer.Line;
   end Line;

   ----------------------------------------------------------------------------
   -- Returns the current text column at which processing will resume.
   ----------------------------------------------------------------------------
   function Column (Analyzer : in Instance) return Natural is
   begin
      return Analyzer.Column;
   end Column;

   ----------------------------------------------------------------------------
   -- Returns the last token that was matched.
   ----------------------------------------------------------------------------
   function Token (Analyzer : in Instance) return Token_Id is
   begin
      return Analyzer.Last_Token;
   end Token;

   ----------------------------------------------------------------------------
   -- Returns the actual text of the last token that was matched.
   ----------------------------------------------------------------------------
   function Lexeme (Analyzer : in Instance) return String is
   begin
      return Buffers.To_String(Analyzer.Lexeme);
   end Lexeme;

   ----------------------------------------------------------------------------
   -- This function returns strings read from Current_Input. If the end of the
   -- file is reached, a Token.EOF_Character is retured to the analyzer.
   -- This is the default input feeder function.
   ----------------------------------------------------------------------------
   function Input_Feeder return String is
      Buffer : String(1..512);
      Data_Length : Integer;
   begin

      Ada.Text_IO.Get_Line (Item => Buffer,
                            Last => Data_Length);

      if Ada.Text_IO.End_Of_File then
         return Buffer(1..Data_Length) & EOF_Character;
      else
         return Buffer(1..Data_Length) & EOL_Character;
      end if;
   exception
      when Ada.Text_IO.End_Error =>
         return (1 => EOF_Character);

   end Input_Feeder;

end Token.Analyzer;




