Compiler/lexical analyzer: Difference between revisions

Content added Content deleted
m (J: add some documentation)
Line 3,677: Line 3,677:
{{out}}
{{out}}
Tested against all programs in [[Compiler/Sample programs]].
Tested against all programs in [[Compiler/Sample programs]].

=={{header|Fortran}}==
{{works with|gfortran|11.2.1}}
You should call the source file ‘lex.F90’, so gfortran will know to use the C preprocessor. I use the preprocessor to select between different ways to read stream input from the standard input.

(Despite the ‘.F90’ extension that I recommend, this is Fortran 2008/2018 code.)

There is ‘framework’ for supporting Unicode, but no actual Unicode support. To support Unicode reliably I would probably use the C interface and GNU libunistring.

The author has placed this Fortran code in the public domain.
<lang Fortran>!!!
!!! An implementation of the Rosetta Code lexical analyzer task:
!!! https://rosettacode.org/wiki/Compiler/lexical_analyzer
!!!
!!! The C implementation was used as a reference on behavior, but was
!!! not adhered to for the implementation.
!!!

module string_buffers
use, intrinsic :: iso_fortran_env, only: error_unit
use, intrinsic :: iso_fortran_env, only: int64

implicit none
private

public :: strbuf_t
public :: strbuf_t_length_kind
public :: strbuf_t_character_kind

integer, parameter :: strbuf_t_length_kind = int64

! String buffers can handle Unicode.
integer, parameter :: strbuf_t_character_kind = selected_char_kind ('ISO_10646')

! Private abbreviations.
integer, parameter :: nk = strbuf_t_length_kind
integer, parameter :: ck = strbuf_t_character_kind

type :: strbuf_t
integer(kind = nk), private :: len = 0
!
! ‘chars’ is made public for efficient access to the individual
! characters.
!
character(1, kind = ck), allocatable, public :: chars(:)
contains
procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage
procedure, pass :: to_unicode => strbuf_t_to_unicode
procedure, pass :: length => strbuf_t_length
procedure, pass :: set => strbuf_t_set
procedure, pass :: append => strbuf_t_append
generic :: assignment(=) => set
end type strbuf_t

contains

function strbuf_t_to_unicode (strbuf) result (s)
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable :: s

!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit ‘character’ is supported.
!

integer(kind = nk) :: i

allocate (character(len = strbuf%len, kind = ck) :: s)
do i = 1, strbuf%len
s(i:i) = strbuf%chars(i)
end do
end function strbuf_t_to_unicode

elemental function strbuf_t_length (strbuf) result (n)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk) :: n

n = strbuf%len
end function strbuf_t_length

elemental function next_power_of_two (x) result (y)
integer(kind = nk), intent(in) :: x
integer(kind = nk) :: y

!
! It is assumed that no more than 64 bits are used.
!
! The branch-free algorithm is that of
! https://archive.is/nKxAc#RoundUpPowerOf2
!
! Fill in bits until one less than the desired power of two is
! reached, and then add one.
!

y = x - 1
y = ior (y, ishft (y, -1))
y = ior (y, ishft (y, -2))
y = ior (y, ishft (y, -4))
y = ior (y, ishft (y, -8))
y = ior (y, ishft (y, -16))
y = ior (y, ishft (y, -32))
y = y + 1
end function next_power_of_two

elemental function new_storage_size (length_needed) result (size)
integer(kind = nk), intent(in) :: length_needed
integer(kind = nk) :: size

! Increase storage by orders of magnitude.

if (2_nk**32 < length_needed) then
size = huge (1_nk)
else
size = next_power_of_two (length_needed)
end if
end function new_storage_size

subroutine strbuf_t_ensure_storage (strbuf, length_needed)
class(strbuf_t), intent(inout) :: strbuf
integer(kind = nk), intent(in) :: length_needed

integer(kind = nk) :: new_size
type(strbuf_t) :: new_strbuf

if (.not. allocated (strbuf%chars)) then
! Initialize a new strbuf%chars array.
new_size = new_storage_size (length_needed)
allocate (strbuf%chars(1:new_size))
else if (ubound (strbuf%chars, 1) < length_needed) then
! Allocate a new strbuf%chars array, larger than the current
! one, but containing the same characters.
new_size = new_storage_size (length_needed)
allocate (new_strbuf%chars(1:new_size))
new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len)
call move_alloc (new_strbuf%chars, strbuf%chars)
end if
end subroutine strbuf_t_ensure_storage

subroutine strbuf_t_set (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src

integer(kind = nk) :: n
integer(kind = nk) :: i

select type (src)
type is (character(*, kind = ck))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
type is (character(*))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n = src%len
call dst%ensure_storage(n)
dst%chars(1:n) = src%chars(1:n)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_set

subroutine strbuf_t_append (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src

integer(kind = nk) :: n_dst, n_src, n
integer(kind = nk) :: i

select type (src)
type is (character(*, kind = ck))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
type is (character(*))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n_dst = dst%len
n_src = src%len
n = n_dst + n_src
call dst%ensure_storage(n)
dst%chars((n_dst + 1):n) = src%chars(1:n_src)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_append

end module string_buffers

module lexical_analysis
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, intrinsic :: iso_fortran_env, only: int32
use, non_intrinsic :: string_buffers

implicit none
private

public :: lexer_input_t
public :: lexer_output_t
public :: run_lexer

integer, parameter :: input_file_unit_no = 100
integer, parameter :: output_file_unit_no = 101

! Private abbreviations.
integer, parameter :: nk = strbuf_t_length_kind
integer, parameter :: ck = strbuf_t_character_kind

! Integers large enough for a Unicode code point. Unicode code
! points (and UCS-4) have never been allowed to go higher than
! 7FFFFFFF, and are even further restricted now.
integer, parameter :: ichar_kind = int32

character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck)
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck)
character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck)
character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck)
character(1, kind = ck), parameter :: space_char = ck_' '

! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char

character(1, kind = ck), parameter :: backslash_char = char (92, kind = ck)

character(*, kind = ck), parameter :: newline_intstring = ck_'10'
character(*, kind = ck), parameter :: backslash_intstring = ck_'92'

integer, parameter :: tk_EOI = 0
integer, parameter :: tk_Mul = 1
integer, parameter :: tk_Div = 2
integer, parameter :: tk_Mod = 3
integer, parameter :: tk_Add = 4
integer, parameter :: tk_Sub = 5
integer, parameter :: tk_Negate = 6
integer, parameter :: tk_Not = 7
integer, parameter :: tk_Lss = 8
integer, parameter :: tk_Leq = 9
integer, parameter :: tk_Gtr = 10
integer, parameter :: tk_Geq = 11
integer, parameter :: tk_Eq = 12
integer, parameter :: tk_Neq = 13
integer, parameter :: tk_Assign = 14
integer, parameter :: tk_And = 15
integer, parameter :: tk_Or = 16
integer, parameter :: tk_If = 17
integer, parameter :: tk_Else = 18
integer, parameter :: tk_While = 19
integer, parameter :: tk_Print = 20
integer, parameter :: tk_Putc = 21
integer, parameter :: tk_Lparen = 22
integer, parameter :: tk_Rparen = 23
integer, parameter :: tk_Lbrace = 24
integer, parameter :: tk_Rbrace = 25
integer, parameter :: tk_Semi = 26
integer, parameter :: tk_Comma = 27
integer, parameter :: tk_Ident = 28
integer, parameter :: tk_Integer = 29
integer, parameter :: tk_String = 30

character(len = 16), parameter :: token_names(0:30) = &
& (/ "End_of_input ", "Op_multiply ", "Op_divide ", "Op_mod ", "Op_add ", &
& "Op_subtract ", "Op_negate ", "Op_not ", "Op_less ", "Op_lessequal ", &
& "Op_greater ", "Op_greaterequal ", "Op_equal ", "Op_notequal ", "Op_assign ", &
& "Op_and ", "Op_or ", "Keyword_if ", "Keyword_else ", "Keyword_while ", &
& "Keyword_print ", "Keyword_putc ", "LeftParen ", "RightParen ", "LeftBrace ", &
& "RightBrace ", "Semicolon ", "Comma ", "Identifier ", "Integer ", &
& "String " /)

type :: token_t
integer :: token_no

! Our implementation stores the value of a tk_Integer as a
! string. The C reference implementation stores it as an int.
character(:, kind = ck), allocatable :: val

integer(nk) :: line_no
integer(nk) :: column_no
end type token_t

type :: lexer_input_t
logical, private :: using_input_unit = .true.
integer, private :: unit_no = -(huge (1))
integer(kind = nk) :: line_no = 1
integer(kind = nk) :: column_no = 0
integer, private :: unget_count = 0

! The maximum lookahead is 2, for how we handle comments.
character(1, kind = ck), private :: unget_buffer(1:2)
logical, private :: unget_eof_buffer(1:2)

! Using the same strbuf_t multiple times reduces the need for
! reallocations. Putting that strbuf_t in the lexer_input_t is
! simply for convenience.
type(strbuf_t), private :: strbuf

contains
!
! Note: There is currently no facility for closing one input and
! switching to another.
!
! Note: There is currently no facility to decode inputs into
! Unicode codepoints. Instead, what happens is raw bytes of
! input get stored as strbuf_t_character_kind values. This
! behavior is adequate for ASCII inputs.
!
procedure, pass :: use_file => lexer_input_t_use_file
procedure, pass :: get_next_ch => lexer_input_t_get_next_ch
procedure, pass :: unget_ch => lexer_input_t_unget_ch
procedure, pass :: unget_eof => lexer_input_t_unget_eof
end type lexer_input_t

type :: lexer_output_t
integer, private :: unit_no = output_unit
contains
procedure, pass :: use_file => lexer_output_t_use_file
procedure, pass :: output_token => lexer_output_t_output_token
end type lexer_output_t

contains

subroutine lexer_input_t_use_file (inputter, filename)
class(lexer_input_t), intent(inout) :: inputter
character(*), intent(in) :: filename

integer :: stat

inputter%using_input_unit = .false.
inputter%unit_no = input_file_unit_no
inputter%line_no = 1
inputter%column_no = 0

open (unit = input_file_unit_no, file = filename, status = 'old', &
& action = 'read', access = 'stream', form = 'unformatted', &
& iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", A, " for input")') filename
stop 1
end if
end subroutine lexer_input_t_use_file

#ifdef __GFORTRAN__

subroutine get_input_unit_char (c, stat)
!
! The following works if you are using gfortran.
!
! (FGETC is considered a feature for backwards compatibility with
! g77. However, I know of no way to reconfigure input_unit as a
! Fortran 2003 stream, for use with ordinary ‘read’.)
!
character, intent(inout) :: c
integer, intent(out) :: stat

call fgetc (input_unit, c, stat)
end subroutine get_input_unit_char

#else

subroutine get_input_unit_char (c, stat)
!
! An alternative implementation of get_input_unit_char. This
! actually reads input from the C standard input, which might not
! be the same as input_unit.
!
use, intrinsic :: iso_c_binding, only: c_int
character, intent(inout) :: c
integer, intent(out) :: stat

interface
!
! Use getchar(3) to read characters from standard input. This
! assumes there is actually such a function available, and that
! getchar(3) does not exist solely as a macro. (One could write
! one’s own getchar() if necessary, of course.)
!
function getchar () result (c) bind (c, name = 'getchar')
use, intrinsic :: iso_c_binding, only: c_int
integer(kind = c_int) :: c
end function getchar
end interface

integer(kind = c_int) :: i_char

i_char = getchar ()
!
! The C standard requires that EOF have a negative value. If the
! value returned by getchar(3) is not EOF, then it will be
! representable as an unsigned char. Therefore, to check for end
! of file, one need only test whether i_char is negative.
!
if (i_char < 0) then
stat = -1
else
stat = 0
c = char (i_char)
end if
end subroutine get_input_unit_char

#endif

subroutine lexer_input_t_get_next_ch (inputter, eof, ch)
class(lexer_input_t), intent(inout) :: inputter
logical, intent(out) :: eof
character(1, kind = ck), intent(inout) :: ch

integer :: stat
character(1) :: c

if (0 < inputter%unget_count) then
if (inputter%unget_eof_buffer(inputter%unget_count)) then
eof = .true.
else
eof = .false.
ch = inputter%unget_buffer(inputter%unget_count)
end if
inputter%unget_count = inputter%unget_count - 1
else
if (inputter%using_input_unit) then
call get_input_unit_char (c, stat)
else
read (unit = inputter%unit_no, iostat = stat) c
end if

ch = char (ichar (c, kind = ichar_kind), kind = ck)

if (0 < stat) then
write (error_unit, '("Input error with status code ", I0)') stat
stop 1
else if (stat < 0) then
eof = .true.
! The C reference code increases column number on end of file;
! therefore, so shall we.
inputter%column_no = inputter%column_no + 1
else
eof = .false.
if (ch == newline_char) then
inputter%line_no = inputter%line_no + 1
inputter%column_no = 0
else
inputter%column_no = inputter%column_no + 1
end if
end if
end if
end subroutine lexer_input_t_get_next_ch

subroutine lexer_input_t_unget_ch (inputter, ch)
class(lexer_input_t), intent(inout) :: inputter
character(1, kind = ck), intent(in) :: ch

if (ubound (inputter%unget_buffer, 1) <= inputter%unget_count) then
write (error_unit, '("class(lexer_input_t) unget buffer overflow")')
stop 1
else
inputter%unget_count = inputter%unget_count + 1
inputter%unget_buffer(inputter%unget_count) = ch
inputter%unget_eof_buffer(inputter%unget_count) = .false.
end if
end subroutine lexer_input_t_unget_ch

subroutine lexer_input_t_unget_eof (inputter)
class(lexer_input_t), intent(inout) :: inputter

if (ubound (inputter%unget_buffer, 1) <= inputter%unget_count) then
write (error_unit, '("class(lexer_input_t) unget buffer overflow")')
stop 1
else
inputter%unget_count = inputter%unget_count + 1
inputter%unget_buffer(inputter%unget_count) = ck_'*'
inputter%unget_eof_buffer(inputter%unget_count) = .true.
end if
end subroutine lexer_input_t_unget_eof

subroutine lexer_output_t_use_file (outputter, filename)
class(lexer_output_t), intent(inout) :: outputter
character(*), intent(in) :: filename

integer :: stat

outputter%unit_no = output_file_unit_no
open (unit = output_file_unit_no, file = filename, action = 'write', iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", A, " for output")') filename
stop 1
end if
end subroutine lexer_output_t_use_file

subroutine lexer_output_t_output_token (outputter, token)
class(lexer_output_t), intent(inout) :: outputter
class(token_t), intent(in) :: token

select case (token%token_no)
case (tk_Integer, tk_Ident, tk_String)
write (outputter%unit_no, '(X, I20, X, I20, X, A20, X, A)') &
& token%line_no, token%column_no, &
& token_names(token%token_no), token%val
case default
write (outputter%unit_no, '(X, I20, X, I20, X, A20)') &
& token%line_no, token%column_no, &
& token_names(token%token_no)
end select
end subroutine lexer_output_t_output_token

subroutine run_lexer (inputter, outputter)
class(lexer_input_t), intent(inout) :: inputter
class(lexer_output_t), intent(inout) :: outputter

type(token_t) :: token

token = get_token (inputter)
do while (token%token_no /= tk_EOI)
call outputter%output_token (token)
token = get_token (inputter)
end do
call outputter%output_token (token)
end subroutine run_lexer

function get_token (inputter) result (token)
class(lexer_input_t), intent(inout) :: inputter
type(token_t) :: token

logical :: eof
character(1, kind = ck) :: ch

call skip_spaces_and_comments (inputter, eof, ch, &
& token%line_no, token%column_no)

if (eof) then
token%token_no = tk_EOI
else
select case (ch)
case (ck_'{')
token%token_no = tk_Lbrace
case (ck_'}')
token%token_no = tk_Rbrace
case (ck_'(')
token%token_no = tk_Lparen
case (ck_')')
token%token_no = tk_Rparen
case (ck_'+')
token%token_no = tk_Add
case (ck_'-')
token%token_no = tk_Sub
case (ck_'*')
token%token_no = tk_Mul
case (ck_'%')
token%token_no = tk_Mod
case (ck_';')
token%token_no = tk_Semi
case (ck_',')
token%token_no = tk_Comma
case (ck_'/')
token%token_no = tk_Div

case (ck_"'")
call read_character_literal

case (ck_'<')
call distinguish_operators (ch, ck_'=', tk_Leq, tk_Lss)
case (ck_'>')
call distinguish_operators (ch, ck_'=', tk_Geq, tk_Gtr)
case (ck_'=')
call distinguish_operators (ch, ck_'=', tk_Eq, tk_Assign)
case (ck_'!')
call distinguish_operators (ch, ck_'=', tk_Neq, tk_Not)
case (ck_'&')
call distinguish_operators (ch, ck_'&', tk_And, tk_EOI)
case (ck_'|')
call distinguish_operators (ch, ck_'|', tk_Or, tk_EOI)

case (ck_'"')
call read_string_literal (ch, ch)

case default
if (isdigit (ch)) then
call read_numeric_literal (ch)
else if (isalpha_or_underscore (ch)) then
call read_identifier_or_keyword (ch)
else
call start_error_message (inputter)
write (error_unit, '("unrecognized character ''", A, "''")') ch
stop 1
end if
end select
end if
contains

subroutine read_character_literal
character(1, kind = ck) :: ch
logical :: eof
character(20, kind = ck) :: buffer

token%token_no = tk_Integer

call inputter%get_next_ch (eof, ch)
if (eof) then
call start_error_message (inputter)
write (error_unit, '("end of input in character literal")')
stop 1
else if (ch == ck_"'") then
call start_error_message (inputter)
write (error_unit, '("empty character literal")')
stop 1
else if (ch == backslash_char) then
call inputter%get_next_ch (eof, ch)
if (eof) then
call start_error_message (inputter)
write (error_unit, '("end of input in character literal, after backslash")')
stop 1
else if (ch == ck_'n') then
allocate (token%val, source = newline_intstring)
else if (ch == backslash_char) then
allocate (token%val, source = backslash_intstring)
else
call start_error_message (inputter)
write (error_unit, '("unknown escape sequence ''", A, A, "'' in character literal")') &
& backslash_char, ch
stop 1
end if
call read_character_literal_close_quote
else
call read_character_literal_close_quote
write (buffer, '(I0)') ichar (ch, kind = ichar_kind)
allocate (token%val, source = trim (buffer))
end if
end subroutine read_character_literal

subroutine read_character_literal_close_quote
logical :: eof
character(1, kind = ck) :: close_quote

call inputter%get_next_ch (eof, close_quote)
if (eof) then
call start_error_message (inputter)
write (error_unit, '("end of input in character literal")')
stop 1
else if (close_quote /= ck_"'") then
call start_error_message (inputter)
write (error_unit, '("multi-character literal")')
stop 1
end if
end subroutine read_character_literal_close_quote

subroutine distinguish_operators (first_ch, second_ch, &
& token_no_if_second_ch, &
& token_no_if_no_second_ch)
character(1, kind = ck), intent(in) :: first_ch
character(1, kind = ck), intent(in) :: second_ch
integer, intent(in) :: token_no_if_second_ch
integer, intent(in) :: token_no_if_no_second_ch

character(1, kind = ck) :: ch
logical :: eof

call inputter%get_next_ch (eof, ch)
if (eof) then
call inputter%unget_eof
token%token_no = token_no_if_no_second_ch
else if (ch == second_ch) then
token%token_no = token_no_if_second_ch
else if (token_no_if_no_second_ch == tk_EOI) then
call start_error_message (inputter)
write (error_unit, '("unrecognized character ''", A, "''")') first_ch
stop 1
else
call inputter%unget_ch (ch)
token%token_no = token_no_if_no_second_ch
end if
end subroutine distinguish_operators

subroutine read_string_literal (opening_quote, closing_quote)
character(1, kind = ck), intent(in) :: opening_quote
character(1, kind = ck), intent(in) :: closing_quote

character(1, kind = ck) :: ch
logical :: done

inputter%strbuf = opening_quote
done = .false.
do while (.not. done)
call inputter%get_next_ch (eof, ch)
if (eof) then
call start_error_message (inputter)
write (error_unit, '("end of input in string literal")')
stop 1
else if (ch == closing_quote) then
call inputter%strbuf%append(ch)
done = .true.
else if (ch == newline_char) then
call start_error_message (inputter)
write (error_unit, '("end of line in string literal")')
stop 1
else
call inputter%strbuf%append(ch)
end if
end do
allocate (token%val, source = inputter%strbuf%to_unicode())
token%token_no = tk_String
end subroutine read_string_literal

subroutine read_numeric_literal (first_ch)
character(1, kind = ck), intent(in) :: first_ch

character(1, kind = ck) :: ch

token%token_no = tk_Integer

inputter%strbuf = first_ch
call inputter%get_next_ch (eof, ch)
do while (isdigit (ch))
call inputter%strbuf%append (ch)
call inputter%get_next_ch (eof, ch)
end do
if (isalpha_or_underscore (ch)) then
call start_error_message (inputter)
write (error_unit, '("invalid numeric literal """, A, """")') &
& inputter%strbuf%to_unicode()
stop 1
else
call inputter%unget_ch (ch)
allocate (token%val, source = inputter%strbuf%to_unicode())
end if
end subroutine read_numeric_literal

subroutine read_identifier_or_keyword (first_ch)
character(1, kind = ck), intent(in) :: first_ch

character(1, kind = ck) :: ch

inputter%strbuf = first_ch
call inputter%get_next_ch (eof, ch)
do while (isalnum_or_underscore (ch))
call inputter%strbuf%append (ch)
call inputter%get_next_ch (eof, ch)
end do

call inputter%unget_ch (ch)

!
! The following is a handwritten ‘implicit radix tree’ search
! for keywords, first partitioning the set of keywords according
! to their lengths.
!
! I did it this way for fun. One could, of course, write a
! program to generate code for such a search.
!
! Perfect hashes are another method one could use.
!
! The reference C implementation uses a binary search.
!
token%token_no = tk_Ident
select case (inputter%strbuf%length())
case (2)
select case (inputter%strbuf%chars(1))
case (ck_'i')
select case (inputter%strbuf%chars(2))
case (ck_'f')
token%token_no = tk_If
case default
continue
end select
case default
continue
end select
case (4)
select case (inputter%strbuf%chars(1))
case (ck_'e')
select case (inputter%strbuf%chars(2))
case (ck_'l')
select case (inputter%strbuf%chars(3))
case (ck_'s')
select case (inputter%strbuf%chars(4))
case (ck_'e')
token%token_no = tk_Else
case default
continue
end select
case default
continue
end select
case default
continue
end select
case (ck_'p')
select case (inputter%strbuf%chars(2))
case (ck_'u')
select case (inputter%strbuf%chars(3))
case (ck_'t')
select case (inputter%strbuf%chars(4))
case (ck_'c')
token%token_no = tk_Putc
case default
continue
end select
case default
continue
end select
case default
continue
end select
case default
continue
end select
case (5)
select case (inputter%strbuf%chars(1))
case (ck_'p')
select case (inputter%strbuf%chars(2))
case (ck_'r')
select case (inputter%strbuf%chars(3))
case (ck_'i')
select case (inputter%strbuf%chars(4))
case (ck_'n')
select case (inputter%strbuf%chars(5))
case (ck_'t')
token%token_no = tk_Print
case default
continue
end select
case default
continue
end select
case default
continue
end select
case default
continue
end select
case (ck_'w')
select case (inputter%strbuf%chars(2))
case (ck_'h')
select case (inputter%strbuf%chars(3))
case (ck_'i')
select case (inputter%strbuf%chars(4))
case (ck_'l')
select case (inputter%strbuf%chars(5))
case (ck_'e')
token%token_no = tk_While
case default
continue
end select
case default
continue
end select
case default
continue
end select
case default
continue
end select
case default
continue
end select
case default
continue
end select
if (token%token_no == tk_Ident) then
allocate (token%val, source = inputter%strbuf%to_unicode ())
end if
end subroutine read_identifier_or_keyword

end function get_token

subroutine skip_spaces_and_comments (inputter, eof, ch, line_no, column_no)
!
! This procedure skips spaces and comments, and also captures the
! line and column numbers at the correct moment to indicate the
! start of a token.
!
class(lexer_input_t), intent(inout) :: inputter
logical, intent(out) :: eof
character(1, kind = ck), intent(inout) :: ch
integer(kind = nk), intent(out) :: line_no
integer(kind = nk), intent(out) :: column_no

integer(kind = nk), parameter :: not_done = -(huge (1_nk))

line_no = not_done
do while (line_no == not_done)
call inputter%get_next_ch (eof, ch)
if (eof) then
line_no = inputter%line_no
column_no = inputter%column_no
else if (ch == ck_'/') then
line_no = inputter%line_no
column_no = inputter%column_no
call inputter%get_next_ch (eof, ch)
if (eof) then
call inputter%unget_eof
ch = ck_'/'
else if (ch /= ck_'*') then
call inputter%unget_ch (ch)
ch = ck_'/'
else
call read_to_end_of_comment
line_no = not_done
end if
else if (.not. isspace (ch)) then
line_no = inputter%line_no
column_no = inputter%column_no
end if
end do

contains

subroutine read_to_end_of_comment
logical :: done

done = .false.
do while (.not. done)
call inputter%get_next_ch (eof, ch)
if (eof) then
call end_of_input_in_comment
else if (ch == ck_'*') then
call inputter%get_next_ch (eof, ch)
if (eof) then
call end_of_input_in_comment
else if (ch == ck_'/') then
done = .true.
end if
end if
end do
end subroutine read_to_end_of_comment

subroutine end_of_input_in_comment
call start_error_message (inputter)
write (error_unit, '("end of input in comment")')
stop 1
end subroutine end_of_input_in_comment

end subroutine skip_spaces_and_comments

subroutine start_error_message (inputter)
class(lexer_input_t), intent(inout) :: inputter

write (error_unit, '("Lexical error at ", I0, ".", I0, ": ")', advance = 'no') &
& inputter%line_no, inputter%column_no
end subroutine start_error_message

elemental function isspace (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

bool = (ch == horizontal_tab_char) .or. &
& (ch == linefeed_char) .or. &
& (ch == vertical_tab_char) .or. &
& (ch == formfeed_char) .or. &
& (ch == carriage_return_char) .or. &
& (ch == space_char)
end function isspace

elemental function isupper (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

integer(kind = ichar_kind), parameter :: uppercase_A = ichar (ck_'A', kind = ichar_kind)
integer(kind = ichar_kind), parameter :: uppercase_Z = ichar (ck_'Z', kind = ichar_kind)

integer(kind = ichar_kind) :: i_ch

i_ch = ichar (ch, kind = ichar_kind)
bool = (uppercase_A <= i_ch .and. i_ch <= uppercase_Z)
end function isupper

elemental function islower (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

integer(kind = ichar_kind), parameter :: lowercase_a = ichar (ck_'a', kind = ichar_kind)
integer(kind = ichar_kind), parameter :: lowercase_z = ichar (ck_'z', kind = ichar_kind)

integer(kind = ichar_kind) :: i_ch

i_ch = ichar (ch, kind = ichar_kind)
bool = (lowercase_a <= i_ch .and. i_ch <= lowercase_z)
end function islower

elemental function isalpha (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

bool = isupper (ch) .or. islower (ch)
end function isalpha

elemental function isdigit (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

integer(kind = ichar_kind), parameter :: zero = ichar (ck_'0', kind = ichar_kind)
integer(kind = ichar_kind), parameter :: nine = ichar (ck_'9', kind = ichar_kind)

integer(kind = ichar_kind) :: i_ch

i_ch = ichar (ch, kind = ichar_kind)
bool = (zero <= i_ch .and. i_ch <= nine)
end function isdigit

elemental function isalnum (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

bool = isalpha (ch) .or. isdigit (ch)
end function isalnum

elemental function isalpha_or_underscore (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

bool = isalpha (ch) .or. (ch == ck_'_')
end function isalpha_or_underscore

elemental function isalnum_or_underscore (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool

bool = isalnum (ch) .or. (ch == ck_'_')
end function isalnum_or_underscore

end module lexical_analysis

program lex
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: string_buffers
use, non_intrinsic :: lexical_analysis

implicit none

integer :: arg_count
character(200) :: arg
type(lexer_input_t) :: inputter
type(lexer_output_t) :: outputter

arg_count = command_argument_count ()
if (3 <= arg_count) then
call print_usage
else if (arg_count == 0) then
call run_lexer (inputter, outputter)
else if (arg_count == 1) then
call get_command_argument (1, arg)
call inputter%use_file(trim (arg))
call run_lexer (inputter, outputter)
else if (arg_count == 2) then
call get_command_argument (1, arg)
call inputter%use_file(trim (arg))
call get_command_argument (2, arg)
call outputter%use_file(trim (arg))
call run_lexer (inputter, outputter)
end if

contains

subroutine print_usage
character(200) :: progname

call get_command_argument (0, progname)
write (output_unit, '("Usage: ", A, " [INPUT_FILE [OUTPUT_FILE]]")') &
& trim (progname)
end subroutine print_usage
end program lex</lang>


=={{header|FreeBASIC}}==
=={{header|FreeBASIC}}==