Lyndon word
You are encouraged to solve this task according to the task description, using any language you may know.
Given a finite alphabet, we can lexicographically order all strings in the alphabet. If two strings have different lengths, then pad the shorter string on the right with the smallest letter. For example, we have 01 > 001, but 01 = 010. As we see, this order is a total preorder, but not a total order, since it identifies different strings.
A Lyndon word is a non-empty string that is strictly lower in lexicographic order than all its circular rotations. In particular, if a string is equal to a circular rotation, then it is not a Lyndon word. For example, since 0101 = 0101 (rotation by 2), it is not a Lyndon word.
The first Lyndon words on the binary alphabet {0, 1} are:
- 0, 1, 01, 001, 011, 0001, 0011, 0111, 00001, 00011, 00101, 00111, 01011, 01111, ...
Task: Given a positive number and an ordered list of alphabet, list all Lyndon words of length at most , in the lexicographic order.
Duval (1988) provides an efficient algorithm. If is one of the words in the sequence, then the next word after can be found by the following steps:
- Repeat and truncate it to a new word of length exactly .
- As long as the final symbol of is the last symbol in the sorted ordering of the alphabet, remove it, producing a shorter word.
- Replace the final remaining symbol of by its successor in the sorted ordering of the alphabet.
For example, suppose we have generated the binary Lyndon words of length up to 7, and we have generated up to , then the steps are:
- Repeat and truncate to get
- Since the last symbol is 0, it is not the final symbol.
- Increment the last symbol to obtain .
C++
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
// Return the index of the given element in the given vector
int32_t index_of(const std::vector<std::string>& words, const std::string& word) {
std::vector<std::string>::const_iterator iterator = std::find(words.begin(), words.end(), word);
return ( iterator == words.end() ) ? -1 : std::distance(words.begin(), iterator);
}
// Using the Duval (1988) algorithm
std::string next_word(const uint32_t& max_length, const std::string& word, const std::vector<std::string>& alphabet) {
// Step 1: Repeat the word and truncate
std::string next_word = word;
while ( next_word.size() < max_length ) {
next_word += word;
}
next_word = next_word.substr(0, max_length);
// Step 2: Remove last symbol of the next word if it is the last symbol in the alphabet
const std::string alphabet_last_symbol = alphabet.back();
while ( next_word.ends_with(alphabet_last_symbol) ) {
next_word = next_word.substr(0, next_word.size() - 1);
}
// Step 3: Replace the last symbol of the next word by its successor in the alphabet
if ( ! next_word.empty() ) {
const std::string word_last_symbol = next_word.substr(next_word.size() - 1);
const uint32_t index = index_of(alphabet, word_last_symbol) + 1;
next_word.replace(next_word.size() - 1, 1, alphabet[index]);
}
return next_word;
}
int main() {
const std::vector<std::string> alphabet = { "0", "1" };
std::string word = alphabet.front();
while ( ! word.empty() ) {
std:: cout << word << std::endl;
word = next_word(5, word, alphabet);
}
}
- Output:
0 00001 0001 00011 001 00101 0011 00111 01 01011 011 0111 01111 1
EasyLang
func$ nextword n w$ alpha$ .
alpha$[] = strchars alpha$
while len x$ < n
x$ &= w$
.
x$[] = strchars substr x$ 1 n
while len x$[] > 0 and x$[len x$[]] = alpha$[len alpha$[]]
len x$[] -1
.
lx = len x$[]
if lx > 0
repeat
i += 1
until alpha$[i] = x$[lx]
.
x$[lx] = alpha$[i + 1]
.
return strjoin x$[]
.
proc lyndon n alpha$ . .
w$ = substr alpha$ 1 1
while len w$ <= n and len w$ > 0
print w$
w$ = nextword n w$ alpha$
.
.
lyndon 5 "01"
FreeBASIC
Function next_word(n As Integer, w As String, alphabet As String) As String
Dim x As String = Left(w & String((n \ Len(w)) + 1, w), n)
While Len(x) > 0 And Mid(x, Len(x), 1) = Mid(alphabet, Len(alphabet), 1)
x = Left(x, Len(x) - 1)
Wend
If Len(x) > 0 Then
Dim last_char As String = Mid(x, Len(x), 1)
Dim next_char_index As Integer = Instr(alphabet, last_char)
If next_char_index > 0 Then
next_char_index += 1
x = Left(x, Len(x) - 1) & Mid(alphabet, next_char_index, 1)
End If
End If
Return x
End Function
Sub generate_lyndon_words(n As Integer, alphabet As String)
Dim w As String = Mid(alphabet, 1, 1)
While Len(w) <= n
Print w
w = next_word(n, w, alphabet)
If Len(w) = 0 Then Exit While
Wend
End Sub
Dim alphabet As String = "01"
Dim As Integer maxLength = 5
generate_lyndon_words(maxLength, alphabet)
Sleep
- Output:
Same as Python entry.
Java
import java.util.List;
public final class LyndonWord {
public static void main(String[] args) {
List<String> alphabet = List.of( "0", "1" );
String word = alphabet.getFirst();
while ( ! word.isEmpty() ) {
System.out.println(word);
word = nextWord(5, word, alphabet);
}
}
// Using the Duval (1988) algorithm
private static String nextWord(int maxLength, String word, List<String> alphabet) {
// Step 1: Repeat the word and truncate
String nextWord = word;
while ( nextWord.length() < maxLength ) {
nextWord += word;
}
nextWord = nextWord.substring(0, maxLength);
// Step 2: Remove last symbol of the next word if it is the last symbol in the alphabet
final String alphabetLastSymbol = alphabet.getLast();
while ( nextWord.endsWith(alphabetLastSymbol) ) {
nextWord = nextWord.substring(0, nextWord.length() - 1);
}
// Step 3: Replace the last symbol of the next word by its successor in the alphabet
if ( ! nextWord.isEmpty() ) {
final String wordLastSymbol = nextWord.substring(nextWord.length() - 1);
final int index = alphabet.indexOf(wordLastSymbol) + 1;
nextWord = nextWord.substring(0, nextWord.length() - 1);
nextWord += alphabet.get(index);
}
return nextWord;
}
}
- Output:
0 00001 0001 00011 001 00101 0011 00111 01 01011 011 0111 01111 1
jq
Works with gojq, the Go implementation of jq
This entry defines a filter, `lyndon`, for finding the Lyndon word of the input string, if any, but is otherwise adapted from the Wren entry.
# Generic stream-oriented min function
# Assume the stream s is not empty and does contain null
def min(s):
reduce s as $x (null; if . == null then $x elif . < $x then . else $x end);
# Input: a string
# Assume 0 <= $n <= length
def rotate($n):
.[$n:] + .[:$n];
# Emit the Lyndon word corresponding to the input string, else empty.
def lyndon:
def circular:
. as $in
| any( range(1; length/2 + 1); . as $n | $in | rotate($n) == .) ;
if circular then empty
else min(range(0;length) as $i | rotate($i))
end;
# Input: a Lyndon word
# Output: the next Lyndon word relative to $alphabet
def nextWord($n; $alphabet):
def a($i): $alphabet[$i:$i+1];
((($n/length)|floor + 1) * .)[:$n]
| until (. == "" or .[-1:] != $alphabet[-1:]; .[:-1])
| if . == "" then .
else .[-1:] as $lastChar
| ($alphabet|index($lastChar) + 1) as $nextCharIndex
| .[0:-1] + a($nextCharIndex)
end ;
def lyndon_words($n):
. as $alphabet
| .[:1]
| while ( length <= $n and . != "";
nextWord($n; $alphabet) ) ;
"01" | lyndon_words(5)
- Output:
As for Wren.
Julia
function nextword(n, w, alphabet)
x = (w ^ ((n ÷ length(w)) + 1))[begin:n]
while x != "" && x[end] == alphabet[end]
x = x[begin:end-1]
end
if x != ""
last_char = x[end]
next_char_index = something(findfirst(==(last_char), alphabet), 0) + 1
x = x[begin:end-1] * alphabet[next_char_index]
end
return x
end
function generate_lyndon_words(n, alphabet)
lwords = String[]
w = string(alphabet[begin])
while length(w) <= n
push!(lwords, w)
w = nextword(n, w, alphabet)
w == "" && break
end
return lwords
end
const lyndon_words = generate_lyndon_words(5, "01")
foreach(println, lyndon_words)
- Output:
Same as Python example.
Perl
# 20240912 Perl programming solution
use strict;
use warnings;
sub nextword {
my ($n, $w, $alphabet) = @_;
my $x = substr($w x (int($n / length($w)) + 1), 0, $n);
while ($x && substr($x, -1) eq substr($alphabet, -1)) {
substr($x, -1) = ''
}
if ($x ne '') {
my $next_char_index = (index($alphabet, substr($x, -1)) // 0) + 1;
substr($x, -1, 1) = substr($alphabet, $next_char_index, 1);
}
return $x;
}
sub generate_words {
my ($n, $alphabet) = @_;
my $w = substr($alphabet, 0, 1);
my @result;
while (length($w) <= $n) {
push @result, $w;
last unless $w = nextword($n, $w, $alphabet);
}
return @result;
};
print "$_\n" for generate_words(5, '01');
You may Attempt This Online!
Phix
with javascript_semantics
function generate_lyndon_words(integer maxlen, string alphabet="01")
sequence res = {}
string w = alphabet[1..1]
while length(w) do
res = append(res,w)
while length(w)<maxlen do
w &= w
end while
w = trim_tail(w[1..maxlen],alphabet[$])
if length(w) then
-- w[$] += 1 -- not eg "0123456789ABCDEF":
w[$] = alphabet[find(w[$],alphabet)+1]
end if
end while
return res
end function
printf(1,"%s\n",join(generate_lyndon_words(5,"01"),"\n"))
- Output:
0 00001 0001 00011 001 00101 0011 00111 01 01011 011 0111 01111 1
Python
def next_word(n, w, alphabet):
x = (w * ((n // len(w)) + 1))[:n]
while x and x[-1] == alphabet[-1]:
x = x[:-1]
if x:
last_char = x[-1]
next_char_index = alphabet.index(last_char) + 1
x = x[:-1] + alphabet[next_char_index]
return x
def generate_lyndon_words(n, alphabet):
w = alphabet[0]
while len(w) <= n:
yield w
w = next_word(n, w, alphabet)
if not w: break
lyndon_words = generate_lyndon_words(5, [str(i) for i in range(2)])
for word in lyndon_words:
print(word)
Output:
0
00001
0001
00011
001
00101
0011
00111
01
01011
011
0111
01111
1
Raku
# 20240211 Raku programming solution
sub nextword($n, $w, $alphabet) {
my $x = ($w x ($n div $w.chars + 1)).substr(0, $n);
while $x.Bool && $x.substr(*-1) eq $alphabet.substr(*-1) {
$x.substr-rw(*-1) = ''
}
if $x.Bool {
my $next_char_index = ($alphabet.index($x.substr(*-1)) // 0) + 1;
$x.substr-rw(*-1, 1) = $alphabet.substr($next_char_index, 1);
}
return $x;
}
.say for sub ($n, $alphabet) {
my $w = $alphabet.substr(0, 1);
return gather while $w.chars <= $n {
take $w;
last unless $w = nextword($n, $w, $alphabet);
}
}(5, '01');
- Output:
Same as Julia example.
You may Attempt This Online!
RPL
REVSTR
is defined at Reverse a string.
« OVER REVSTR HEAD → w alphabet n lastsymbol « IF w lastsymbol == THEN "" ELSE w WHILE DUP SIZE n ≤ REPEAT w + END 1 n SUB REVSTR @ reversing string to use HEAD and TAIL rather than SUB WHILE DUP HEAD lastsymbol == REPEAT TAIL END TAIL LASTARG HEAD alphabet DUP ROT POS 1 + DUP SUB SWAP + REVSTR END » » 'NEXTLYNDON' STO @ ( "word" "alphabet" n → "nextword" ) « → alphabet n « { } alphabet HEAD WHILE DUP SIZE REPEAT SWAP OVER + SWAP alphabet n NEXTLYNDON END DROP » » 'TASK' STO
"01" 5 TASK
- Output:
1: { "0" "00001" "0001" "00011" "001" "00101" "0011" "00111" "01" "01011" "011" "0111" "01111" "1" }
Wren
var alphabet = "01"
var nextWord = Fn.new { |n, w|
var x = (w * ((n/w.count).floor + 1))[0...n]
while (x != "" && x[-1] == alphabet[-1]) x = x[0...-1]
if (x != "") {
var lastChar = x[-1]
var nextCharIndex = alphabet.indexOf(lastChar) + 1
x = x[0...-1] + alphabet[nextCharIndex]
}
return x
}
var generateLyndonWords = Fiber.new { |n|
var w = alphabet[0]
while (w.count <= n) {
Fiber.yield(w)
w = nextWord.call(n, w)
if (w == "") break
}
}
while (true) {
var word = generateLyndonWords.call(5)
if (!word) break
System.print(word)
}
- Output:
0 00001 0001 00011 001 00101 0011 00111 01 01011 011 0111 01111 1