-------------------------------------------------------------------------------
--
-- Copyright (C) 1999 FlightSafety International and Ted Dennison
--
-- This file is part of the OpenToken package.
--
-- The OpenToken package is free software; you can redistribute it and/or
-- modify it under the terms of the  GNU General Public License as published
-- by the Free Software Foundation; either version 2, or (at your option)
-- any later version. The OpenToken package is distributed in the hope that
-- it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for  more details.  You should have received
-- a copy of the GNU General Public License  distributed with the OpenToken
-- package;  see file GPL.txt.  If not, write to  the Free Software Foundation,
-- 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.
--
-- As a special exception,  if other files  instantiate  generics from this
-- unit, or you link this unit with other files to produce an executable,
-- this unit does not by itself cause the resulting executable to be
-- covered by the GNU General Public License.  This exception does not
-- however invalidate any other reasons why the executable file might be
-- covered by the GNU Public License.
--
-- Maintainer: Ted Dennison (dennison@telepath.com)
--
-- This software was originally developed by the following company, and was
-- released as open-source software as a service to the community:
--
--           FlightSafety International Simulation Systems Division
--                    Broken Arrow, OK  USA  918-259-4000
--
-- Update History:
-- $Log: token-analyzer.ads,v $
-- Revision 1.4  1999/10/08 22:47:33  Ted
-- Add default token functionality
--
-- Revision 1.3  1999/08/17 03:07:54  Ted
-- Add log line
--
-------------------------------------------------------------------------------

with Ada.Characters.Latin_1;
with Ada.Text_IO;
with Ada.Strings.Bounded;

-------------------------------------------------------------------------------
-- This package implements a mostly full-strength tokenizer (or lexical
-- analyizer).
--
-- To use it, create a function to feed text strings into the Analyzer. Create
-- an enumerated type of all the tokens you want to recognize. Instantiate this
-- package with the function and the enumerated type.
--
-- Next, define a token subclass for each token in Tokens. Then create a Syntax
-- which matches up the tokens to their appropriate token class and pass it
-- into Set_Syntax.
--
-- Once that is done, you may repeatedly call Get_Next to get tokens.
-------------------------------------------------------------------------------
generic

   type Token_Id is (<>);

package Token.Analyzer is

   ----------------------------------------------------------------------------
   -- Get_Next will call this function repeatedly to get more text to analyze.
   -- To cleanly mark the end of data, a Token.EOF_Character should be returned.
   ----------------------------------------------------------------------------
   type Text_Feeder is access function return String;

   -- Token handle type. Defined here to allow access's of objects
   -- declared at the same level as this package's instantiation.
   type Token_Handle is access all Token.Class;

   -- The syntax of a language, which is defined by the set of valid tokens.
   type Syntax is array (Token_Id) of Token_Handle;

   type Instance is tagged private;

   -- No token match could be found
   Syntax_Error : exception;

   -- Need to revisit token definitions or raise Max_String_Length
   Token_Too_Long : exception;

   ----------------------------------------------------------------------------
   -- This function returns strings read from Ada.Text_IO.Current_Input. If the
   -- end of the file is reached, a Token.EOF_Character is retured to the
   -- analyzer. This is the default input feeder function.
   ----------------------------------------------------------------------------
   function Input_Feeder return String;

   ----------------------------------------------------------------------------
   -- Return an Analyzer with the given syntax and text feeder function.
   ----------------------------------------------------------------------------
   function Initialize (Language_Syntax : in Syntax;
                        Feeder          : in Text_Feeder := Input_Feeder'Access
                       ) return Instance;
   function Initialize (Language_Syntax : in Syntax;
                        Default         : in Token_ID;
                        Feeder          : in Text_Feeder := Input_Feeder'Access
                       ) return Instance;

   ----------------------------------------------------------------------------
   -- Set the Analyzer's syntax to the given value.
   --
   -- Due to the accessability rules of Ada, you cannot create syntax objects
   -- in which the component tokens are declared at a deeper dynamic scope than
   -- the instantiation of this package using 'access on the tokens.
   -- 'Unchecked_Access is safe to use as long as the Analyzer does not have a
   -- longer lifetime than its tokens.
   ----------------------------------------------------------------------------
   procedure Set_Syntax (Analyzer : in out Instance; Language_Syntax : in Syntax);

   ----------------------------------------------------------------------------
   -- Set the analyzer's text feeder function to be the given function.
   ----------------------------------------------------------------------------
   procedure Set_Text_Feeder (Analyzer : in out Instance; Feeder : in Text_Feeder);

   ----------------------------------------------------------------------------
   -- Set the analyzer's default token to the given ID.
   --
   -- If Find_Next can't find a matching token, it will set Token to this token
   -- id, instead of raising syntax error. The Lexeme in this situation will
   -- be contain all the contiguous characters that fail to match an token.
   -- In practice this will be much less efficient than an "error" token that
   -- explicitly matches unmatchable strings. But often those are quite
   -- difficult to construct.
   -- The default token will be checked for legitimate matches. If this is not
   -- the behavior you want, it would be best to use a token that can't match
   -- any legitimate string (eg: Token.Nothing)
   ----------------------------------------------------------------------------
   procedure Set_Default (Analyzer : in out Instance;
                          Default  : in     Token_ID
                         );

   ----------------------------------------------------------------------------
   -- Reset the analyzer to have *no* default token ID. If Find_Next doesn't
   -- find a matching token, Syntax_Error will be raised.
   ----------------------------------------------------------------------------
   procedure Unset_Default (Analyzer : in out Instance);

   ----------------------------------------------------------------------------
   -- Locate the next token.
   --
   -- The next token will be the token that matches the *longest* sequence of
   -- characters before failing. Ties go to the token with the smallest Token_ID.
   --
   -- Raises Syntax_Error if no token could be found (unless there is a default
   -- token defined).
   ----------------------------------------------------------------------------
   procedure Find_Next (Analyzer : in out Instance);

   ----------------------------------------------------------------------------
   -- Returns the current text line at which processing will resume. This is
   -- particularly useful for printing error messages when syntax errors are
   -- detected.
   ----------------------------------------------------------------------------
   function Line (Analyzer : in Instance) return Natural;

   ----------------------------------------------------------------------------
   -- Returns the current text column at which processing will resume. This is
   -- particularly useful for printing error messages when syntax errors are
   -- detected.
   ----------------------------------------------------------------------------
   function Column (Analyzer : in Instance) return Natural;

   ----------------------------------------------------------------------------
   -- Returns the last token that was matched.
   ----------------------------------------------------------------------------
   function Token (Analyzer : in Instance) return Token_Id;

   ----------------------------------------------------------------------------
   -- Returns the actual text of the last token that was matched.
   ----------------------------------------------------------------------------
   function Lexeme (Analyzer : in Instance) return String;

private

   -- Put all the Analyzer's state information in here, so there can be several
   -- Analyzers running at once.
   type Instance is tagged record
      -- User-settable attributes
      Token_List    : Syntax;
      Get_More_Text : Text_Feeder := Input_Feeder'access;
      Has_Default   : Boolean := False;
      Default_Token : Token_ID;

      -- User-gettable attributes
      Line        : Natural := 1;
      Column      : Natural := 1;
      Lexeme      : Buffers.Bounded_String;
      Last_Token  : Token_Id;

      -- Internal state information
      Buffer       : Buffers.Bounded_String := Buffers.Null_Bounded_String;
      Next_Line    : Natural := 1;
      Next_Column  : Natural := 1;

   end record;

end Token.Analyzer;




