-------------------------------------------------------------------------------
--
-- Copyright (C) 1999 Christoph Karl Walter Grein
--
-- This file is part of the OpenToken package.
--
-- The OpenToken package is free software; you can redistribute it and/or
-- modify it under the terms of the  GNU General Public License as published
-- by the Free Software Foundation; either version 2, or (at your option)
-- any later version. The OpenToken package is distributed in the hope that
-- it will be useful, but WITHOUT ANY WARRANTY; without even the implied
-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for  more details.  You should have received
-- a copy of the GNU General Public License  distributed with the OpenToken
-- package;  see file GPL.txt.  If not, write to  the Free Software Foundation,
-- 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.
--
-- As a special exception,  if other files  instantiate  generics from this
-- unit, or you link this unit with other files to produce an executable,
-- this unit does not by itself cause the resulting executable to be
-- covered by the GNU General Public License.  This exception does not
-- however invalidate any other reasons why the executable file might be
-- covered by the GNU Public License.
--
-- Maintainer: Christoph K. W. Grein (Christ-Usch.Grein@T-Online.de)
--
-- Update History:
-- $Log: java_lexer.ads,v $
-- Revision 1.2  1999/10/08 23:19:01  Ted
-- Disable sign recognition in integer and real literals
--
-- Revision 1.1  1999/08/17 03:40:24  Ted
-- Initial Version
--
--
-- 1.0 -  8 August 1999  Final complete version
-- 0.2 -  4 July   1999  Exclusion set for characters
-- 0.1 - 28 June   1999  Added escape sequences and /* bracketed comment */
-- 0.0 - 27 June   1999  First preliminary release
-------------------------------------------------------------------------------

with Ada.Strings.Maps;

with Token.Analyzer;
with Token.Keyword, Token.Separator;
with Token.Identifier;
with Token.Graphic_Character,
     Token.Escape_Sequence, Token.Octal_Escape, Token.String;
with Token.Integer, Token.Based_Integer_Java_Style,
     Token.Real;
with Token.Character_Set;
with Token.Line_Comment, Token.Bracketed_Comment;
with Token.End_Of_File;

pragma Elaborate_All (Token.Analyzer,
                      Token.Keyword, Token.Separator,
                      Token.Identifier,
                      Token.Graphic_Character,
                      Token.Escape_Sequence, Token.Octal_Escape, Token.String,
                      Token.Integer, Token.Based_Integer_Java_Style,
                      Token.Real,
                      Token.Character_Set,
                      Token.Line_Comment, Token.Bracketed_Comment,
                      Token.End_Of_File);

package Java_Lexer is

  ---------------------------------------------------------------------
  -- This ia a lexical analyser for the Java language.
  -- In the current preliminary state, not all tokens are recognized.
  --
  -- Missing:
  --   Numerals with suffixes
  --     integer suffixes l L
  --     float suffixes d D f F
  --
  -- There is another lexer for the Ada and Java languages at:
  --   <http://home.T-Online.de/home/Christ-Usch.Grein/Ada/Lexer.html>
  ---------------------------------------------------------------------

  type Java_Token is
    (-- Keywords JRM 3.9
     Abstract_T,
     Boolean_T, Break_T, Byte_T,
     Case_T, Catch_T, Char_T, Class_T, Const_T, Continue_T,
     Default_T, Do_T, Double_T,
     Else_T, Extends_T,
     Final_T, Finally_T, Float_T, For_T,
     Goto_T,
     If_T, Implements_T, Import_T, InstanceOf_T, Int_T, Interface_T,
     Long_T,
     Native_T, New_T,
     Package_T, Private_T, Protected_T, Public_T,
     Return_T,
     Short_T, Static_T, Super_T, Switch_T, Synchronized_T,
     This_T, Throw_T, Throws_T, Transient_T, Try_T,
     Void_T, Volatile_T,
     While_T,
     -- Separators JRM 3.11
     -- ( ) { } [ ] ; , .
     -- Operators JRM 3.12
     -- =  >  <  !  ~  ?  :
     -- == <= >= != && || ++ --
     -- +  -  *  /  &  |  ^  %  <<  >>  >>>
     -- += -= *= /= &= |= ^= %= <<= >>= >>>=
     Colon_T, Comma_T, Dot_T, Semicolon_T,             -- : , . ;
     LeftBrace_T, RightBrace_T,                        -- { }
     LeftBracket_T, RightBracket_T,                    -- [ ]
     Left_Parenthesis_T, Right_Parenthesis_T,          -- ( )
     And_T, Or_T,                                      -- & |
     ShortCutAnd_T, ShortCutOr_T,                      -- && ||
     Assignment_T, Conditional_T,                      -- = ?
     Equal_T, NotEqual_T,                              -- == !=
     Greater_Equal_T, Less_Equal_T,                    -- >= <=
     Greater_Than_T, Less_Than_T,                      -- > <
     Complement_T, Not_T, Xor_T,                       -- ~ ! ^
     Plus_T, Minus_T, Times_T, Divide_T, Remainder_T,  -- + - * / %
     Increment_T, Decrement_T,                         -- ++ --
     LeftShift_T, RightShift_T, UnsignedRightShift_T,  -- << >> >>>
     PlusAssign_T, MinusAssign_T,                      -- += -=
     TimesAssign_T, DivideAssign_T, RemainderAssign_T, -- *= /= %=
     AndAssign_T, OrAssign_T, XorAssign_T,             -- &= |= ^=
     LeftShiftAssign_T, RightShiftAssign_T,            -- <<= >>=
     UnsignedRightShiftAssign_T,                       -- >>>=
     -- Literals (JRM 3.10) (all Java reals may use lazy forms,
     -- i.e. the whole or decimal part may be missing)
     Null_T, False_T, True_T,
     Integer_T,           -- 1
     Based_Integer_T,     -- 07, 0xF
   --LongInteger_T,       -- 1L
   --BasedLongInteger_T,  -- 07L, 0xFL
     Real_T,              -- 1.0, 1., .1, 1E+7
   --FloatNumber_T,       -- 1.0E+10F
   --DoubleNumber_T,      -- 1.0E+10D
     Character_T,         -- 'x' with x any graphic character except one of "'\
     Escape_Sequence_T,   -- '\x' with x one of btnfr"'\
     Octal_Escape_T,      -- '\377'
     String_T,            -- "Any characters except " or \ and escape sequences"
     -- Other tokens
     Identifier_T,
     EndOfLineComment_T,  -- // to end of line
     EmbeddedComment_T,   -- /* anything (even several lines) */
     Whitespace_T,
     -- Syntax error
  -- Bad_Token_T,
     --
     End_of_File_T);

  package Tokenizer is new Token.Analyzer (Java_Token);

  Syntax : constant Tokenizer.Syntax :=
    (Abstract_T     => new Token.Keyword.Instance'(Token.Keyword.Get ("abstract"    , Case_Sensitive => True)),
     Boolean_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("boolean"     , Case_Sensitive => True)),
     Break_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("break"       , Case_Sensitive => True)),
     Byte_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("byte"        , Case_Sensitive => True)),
     Case_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("case"        , Case_Sensitive => True)),
     Catch_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("catch"       , Case_Sensitive => True)),
     Char_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("char"        , Case_Sensitive => True)),
     Class_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("class"       , Case_Sensitive => True)),
     Const_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("const"       , Case_Sensitive => True)),
     Continue_T     => new Token.Keyword.Instance'(Token.Keyword.Get ("continue"    , Case_Sensitive => True)),
     Default_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("default"     , Case_Sensitive => True)),
     Do_T           => new Token.Keyword.Instance'(Token.Keyword.Get ("do"          , Case_Sensitive => True)),
     Double_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("double"      , Case_Sensitive => True)),
     Else_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("else"        , Case_Sensitive => True)),
     Extends_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("extends"     , Case_Sensitive => True)),
     Final_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("final"       , Case_Sensitive => True)),
     Finally_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("finally"     , Case_Sensitive => True)),
     Float_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("float"       , Case_Sensitive => True)),
     For_T          => new Token.Keyword.Instance'(Token.Keyword.Get ("for"         , Case_Sensitive => True)),
     Goto_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("goto"        , Case_Sensitive => True)),
     If_T           => new Token.Keyword.Instance'(Token.Keyword.Get ("if"          , Case_Sensitive => True)),
     Implements_T   => new Token.Keyword.Instance'(Token.Keyword.Get ("implements"  , Case_Sensitive => True)),
     Import_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("import"      , Case_Sensitive => True)),
     InstanceOf_T   => new Token.Keyword.Instance'(Token.Keyword.Get ("instanceof"  , Case_Sensitive => True)),
     Int_T          => new Token.Keyword.Instance'(Token.Keyword.Get ("int"         , Case_Sensitive => True)),
     Interface_T    => new Token.Keyword.Instance'(Token.Keyword.Get ("interface"   , Case_Sensitive => True)),
     Long_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("long"        , Case_Sensitive => True)),
     Native_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("native"      , Case_Sensitive => True)),
     New_T          => new Token.Keyword.Instance'(Token.Keyword.Get ("new"         , Case_Sensitive => True)),
     Package_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("package"     , Case_Sensitive => True)),
     Private_T      => new Token.Keyword.Instance'(Token.Keyword.Get ("private"     , Case_Sensitive => True)),
     Protected_T    => new Token.Keyword.Instance'(Token.Keyword.Get ("protected"   , Case_Sensitive => True)),
     Public_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("public"      , Case_Sensitive => True)),
     Return_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("return"      , Case_Sensitive => True)),
     Short_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("short"       , Case_Sensitive => True)),
     Static_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("static"      , Case_Sensitive => True)),
     Super_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("super"       , Case_Sensitive => True)),
     Switch_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("switch"      , Case_Sensitive => True)),
     Synchronized_T => new Token.Keyword.Instance'(Token.Keyword.Get ("synchronized", Case_Sensitive => True)),
     This_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("this"        , Case_Sensitive => True)),
     Throw_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("throw"       , Case_Sensitive => True)),
     Throws_T       => new Token.Keyword.Instance'(Token.Keyword.Get ("throws"      , Case_Sensitive => True)),
     Transient_T    => new Token.Keyword.Instance'(Token.Keyword.Get ("transient"   , Case_Sensitive => True)),
     Try_T          => new Token.Keyword.Instance'(Token.Keyword.Get ("try"         , Case_Sensitive => True)),
     Void_T         => new Token.Keyword.Instance'(Token.Keyword.Get ("void"        , Case_Sensitive => True)),
     Volatile_T     => new Token.Keyword.Instance'(Token.Keyword.Get ("volatile"    , Case_Sensitive => True)),
     While_T        => new Token.Keyword.Instance'(Token.Keyword.Get ("while"       , Case_Sensitive => True)),
     Colon_T                    => new Token.Separator.Instance'(Token.Separator.Get (":")),
     Comma_T                    => new Token.Separator.Instance'(Token.Separator.Get (",")),
     Dot_T                      => new Token.Separator.Instance'(Token.Separator.Get (".")),
     Semicolon_T                => new Token.Separator.Instance'(Token.Separator.Get (";")),
     LeftBrace_T                => new Token.Separator.Instance'(Token.Separator.Get ("{")),
     RightBrace_T               => new Token.Separator.Instance'(Token.Separator.Get ("}")),
     LeftBracket_T              => new Token.Separator.Instance'(Token.Separator.Get ("[")),
     RightBracket_T             => new Token.Separator.Instance'(Token.Separator.Get ("]")),
     Left_Parenthesis_T         => new Token.Separator.Instance'(Token.Separator.Get ("(")),
     Right_Parenthesis_T        => new Token.Separator.Instance'(Token.Separator.Get (")")),
     And_T                      => new Token.Separator.Instance'(Token.Separator.Get ("&")),
     Or_T                       => new Token.Separator.Instance'(Token.Separator.Get ("|")),
     ShortCutAnd_T              => new Token.Separator.Instance'(Token.Separator.Get ("&&")),
     ShortCutOr_T               => new Token.Separator.Instance'(Token.Separator.Get ("||")),
     Assignment_T               => new Token.Separator.Instance'(Token.Separator.Get ("=")),
     Conditional_T              => new Token.Separator.Instance'(Token.Separator.Get ("?")),
     Equal_T                    => new Token.Separator.Instance'(Token.Separator.Get ("==")),
     NotEqual_T                 => new Token.Separator.Instance'(Token.Separator.Get ("!=")),
     Greater_Equal_T            => new Token.Separator.Instance'(Token.Separator.Get (">=")),
     Less_Equal_T               => new Token.Separator.Instance'(Token.Separator.Get ("<=")),
     Greater_Than_T             => new Token.Separator.Instance'(Token.Separator.Get (">")),
     Less_Than_T                => new Token.Separator.Instance'(Token.Separator.Get ("<")),
     Complement_T               => new Token.Separator.Instance'(Token.Separator.Get ("~")),
     Not_T                      => new Token.Separator.Instance'(Token.Separator.Get ("!")),
     Xor_T                      => new Token.Separator.Instance'(Token.Separator.Get ("^")),
     Plus_T                     => new Token.Separator.Instance'(Token.Separator.Get ("+")),
     Minus_T                    => new Token.Separator.Instance'(Token.Separator.Get ("-")),
     Times_T                    => new Token.Separator.Instance'(Token.Separator.Get ("*")),
     Divide_T                   => new Token.Separator.Instance'(Token.Separator.Get ("/")),
     Remainder_T                => new Token.Separator.Instance'(Token.Separator.Get ("%")),
     Increment_T                => new Token.Separator.Instance'(Token.Separator.Get ("++")),
     Decrement_T                => new Token.Separator.Instance'(Token.Separator.Get ("--")),
     LeftShift_T                => new Token.Separator.Instance'(Token.Separator.Get ("<<")),
     RightShift_T               => new Token.Separator.Instance'(Token.Separator.Get (">>")),
     UnsignedRightShift_T       => new Token.Separator.Instance'(Token.Separator.Get (">>>")),
     PlusAssign_T               => new Token.Separator.Instance'(Token.Separator.Get ("+=")),
     MinusAssign_T              => new Token.Separator.Instance'(Token.Separator.Get ("-=")),
     TimesAssign_T              => new Token.Separator.Instance'(Token.Separator.Get ("*=")),
     DivideAssign_T             => new Token.Separator.Instance'(Token.Separator.Get ("/=")),
     RemainderAssign_T          => new Token.Separator.Instance'(Token.Separator.Get ("%=")),
     AndAssign_T                => new Token.Separator.Instance'(Token.Separator.Get ("&=")),
     OrAssign_T                 => new Token.Separator.Instance'(Token.Separator.Get ("|=")),
     XorAssign_T                => new Token.Separator.Instance'(Token.Separator.Get ("^=")),
     LeftShiftAssign_T          => new Token.Separator.Instance'(Token.Separator.Get ("<<=")),
     RightShiftAssign_T         => new Token.Separator.Instance'(Token.Separator.Get (">>=")),
     UnsignedRightShiftAssign_T => new Token.Separator.Instance'(Token.Separator.Get (">>>=")),
     Null_T  => new Token.Keyword.Instance'(Token.Keyword.Get ("null" , Case_Sensitive => True)),
     False_T => new Token.Keyword.Instance'(Token.Keyword.Get ("false", Case_Sensitive => True)),
     True_T  => new Token.Keyword.Instance'(Token.Keyword.Get ("true" , Case_Sensitive => True)),
     Integer_T       => new Token.Integer.Instance'(Token.Integer.Get (Allow_Underscores  => False,
                                                                       Allow_Exponent     => False,
                                                                       Allow_Signs        => False,
                                                                       Allow_Leading_Zero => False)),
     Based_Integer_T => new Token.Based_Integer_Java_Style.Instance'(Token.Based_Integer_Java_Style.Get),
     Real_T          => new Token.Real.Instance'(Token.Real.Get (Allow_Underscores => False,
                                                                 Allow_Signs       => False,
                                                                 Allow_Laziness    => True)),
     Identifier_T  => new Token.Identifier.Instance'(Token.Identifier.Get),
     Character_T       => new Token.Graphic_Character.Instance'(Token.Graphic_Character.Get
                                      (Exclude => Ada.Strings.Maps.To_Set ("""'\"))),
     Escape_Sequence_T => new Token.Escape_Sequence.Instance'(Token.Escape_Sequence.Get
                                      (Ada.Strings.Maps.To_Set ("btnfr""'\"))),
     Octal_Escape_T    => new Token.Octal_Escape.Instance'(Token.Octal_Escape.Get),
     String_T          => new Token.String.Instance'(Token.String.Get
                                      (Escapeable       => True,
                                       Double_Delimiter => False,
                                       Escape_Mapping   => Token.String.Java_Style_Escape_Code_Map)),
     EndOfLineComment_T => new Token.Line_Comment.Instance'(Token.Line_Comment.Get ("//")),
     EmbeddedComment_T  => new Token.Bracketed_Comment.Instance'(Token.Bracketed_Comment.Get ("/*", "*/")),
     Whitespace_T  => new Token.Character_Set.Instance'(Token.Character_Set.Get
                                      (Token.Character_Set.Standard_Whitespace)),
     End_of_File_T => new Token.End_Of_File.Instance'(Token.End_Of_File.Get));

   Analyzer: Tokenizer.Instance := Tokenizer.Initialize (Syntax);

end Java_Lexer;
