Words from neighbour ones
- Task
Use the dictionary unixdict.txt
Ignore any word in the dictionary whose length is less than 9.
Let's take the words from next characters:
1 <= n < (dictionary length) - 9.
char1 = 1st character of nth word.
char2 = 2nd character of (n+1)th word.
char3 = 3rd character of (n+2)th word.
⋮
char9 = 9th character of (n+8)th word.
Concatenate (append) the nine characters by:
newword = char1 + char2 + char3 + ... + char9
If newword is in the dictionary, then show on this page.
Length of newword = 9
- Metrics
- Counting
- Word frequency
- Letter frequency
- Jewels and stones
- I before E except after C
- Bioinformatics/base count
- Count occurrences of a substring
- Remove/replace
- XXXX redacted
- Remove vowels from a string
- Strip block comments
- Strip comments from a string
- Strip a set of characters from a string
- Strip whitespace from a string -- top and tail
- Strip control codes and extended characters from a string
- Anagrams/Derangements/shuffling
- Word wheel
- ABC problem
- Anagrams
- Anagrams/Deranged anagrams
- Permutations/Derangements
- Superpermutation minimisation
- Sattolo cycle
- Knuth shuffle
- Ordered words
- Textonyms (using a phone text pad)
- Find/Search/Determine
- ABC words
- Odd words
- Semordnilap
- String matching
- Alternade words
- Changeable words
- String comparison
- Extract file extension
- Levenshtein distance
- Palindrome detection
- Compare a list of strings
- Longest common prefix
- Longest common suffix
- Longest common substring
- Find common directory path
- Words from neighbour ones
- Change e letters to i in words
- Non-continuous subsequences
- Longest common subsequence
- Longest palindromic substrings
- Longest increasing subsequence
- Words containing "the" substring
- Determine if a string is numeric
- Determine if a string is collapsible
- Determine if a string is squeezable
- Determine if a string has all unique characters
- Determine if a string has all the same characters
- Find words which contains all the vowels
- Find words which contains most consonants
- Find words which contains more than 3 vowels
- Find words which first and last three letters are equals
- Find words which odd letters are consonants and even letters are vowels or vice_versa
- Formatting
- String case
- Align columns
- Literals/String
- Repeat a string
- Brace expansion
- Brace expansion using ranges
- Reverse a string
- Phrase reversals
- Comma quibbling
- Special characters
- String concatenation
- Substring/Top and tail
- Commatizing numbers
- Reverse words in a string
- Suffixation of decimal numbers
- Long literals, with continuations
- Numerical and alphabetical suffixes
- Abbreviations, easy
- Abbreviations, simple
- Abbreviations, automatic
- Song lyrics/poems/Mad Libs/phrases
- 99 Bottles of Beer
- The Twelve Days of Christmas
- The Old lady swallowed a fly
- The Name Game (a song)
- Magic 8-ball
- Mad Libs
- Tokenize
- Word break problem
- Tokenize a string
- Tokenize a string with escaping
- Split a character string based on change of character
- Sequences
Contents
AWK[edit]
# syntax: GAWK -f WORDS_FROM_NEIGHBOUR_ONES.AWK unixdict.txt
{ if (length($0) < 9) { next }
arr1[++n] = $0
arr2[$0] = ""
}
END {
for (i=1; i<=n; i++) {
word = substr(arr1[i],1,1)
for (j=2; j<=9; j++) {
if (!((i+j) in arr1)) { continue }
word = word substr(arr1[i+j],j,1)
}
if (word in arr2) {
printf("%s\n",word)
delete arr2[word] # eliminate duplicates
}
}
exit(0)
}
- Output:
applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose
C[edit]
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_SIZE 80
#define MIN_LENGTH 9
#define WORD_SIZE (MIN_LENGTH + 1)
void fatal(const char* message) {
fprintf(stderr, "%s\n", message);
exit(1);
}
void* xmalloc(size_t n) {
void* ptr = malloc(n);
if (ptr == NULL)
fatal("Out of memory");
return ptr;
}
void* xrealloc(void* p, size_t n) {
void* ptr = realloc(p, n);
if (ptr == NULL)
fatal("Out of memory");
return ptr;
}
int word_compare(const void* p1, const void* p2) {
return memcmp(p1, p2, WORD_SIZE);
}
int main(int argc, char** argv) {
const char* filename = argc < 2 ? "unixdict.txt" : argv[1];
FILE* in = fopen(filename, "r");
if (!in) {
perror(filename);
return EXIT_FAILURE;
}
char line[MAX_WORD_SIZE];
size_t size = 0, capacity = 1024;
char* words = xmalloc(WORD_SIZE * capacity);
while (fgets(line, sizeof(line), in)) {
size_t len = strlen(line) - 1; // last character is newline
if (len < MIN_LENGTH)
continue;
line[len] = '\0';
if (size == capacity) {
capacity *= 2;
words = xrealloc(words, WORD_SIZE * capacity);
}
memcpy(&words[size * WORD_SIZE], line, WORD_SIZE);
++size;
}
fclose(in);
qsort(words, size, WORD_SIZE, word_compare);
int count = 0;
char prev_word[WORD_SIZE] = { 0 };
for (size_t i = 0; i + MIN_LENGTH <= size; ++i) {
char word[WORD_SIZE] = { 0 };
for (size_t j = 0; j < MIN_LENGTH; ++j)
word[j] = words[(i + j) * WORD_SIZE + j];
if (word_compare(word, prev_word) == 0)
continue;
if (bsearch(word, words, size, WORD_SIZE, word_compare))
printf("%2d. %s\n", ++count, word);
memcpy(prev_word, word, WORD_SIZE);
}
free(words);
return EXIT_SUCCESS;
}
- Output:
1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose
C++[edit]
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
int main(int argc, char** argv) {
const int min_length = 9;
const char* filename(argc < 2 ? "unixdict.txt" : argv[1]);
std::ifstream in(filename);
if (!in) {
std::cerr << "Cannot open file '" << filename << "'.\n";
return EXIT_FAILURE;
}
std::string line;
std::vector<std::string> words;
while (getline(in, line)) {
if (line.size() >= min_length)
words.push_back(line);
}
std::sort(words.begin(), words.end());
std::string previous_word;
int count = 0;
for (size_t i = 0, n = words.size(); i + min_length <= n; ++i) {
std::string word;
word.reserve(min_length);
for (size_t j = 0; j < min_length; ++j)
word += words[i + j][j];
if (previous_word == word)
continue;
auto w = std::lower_bound(words.begin(), words.end(), word);
if (w != words.end() && *w == word)
std::cout << std::setw(2) << ++count << ". " << word << '\n';
previous_word = word;
}
return EXIT_SUCCESS;
}
- Output:
1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose
Delphi[edit]
program Words_from_neighbour_ones;
{$APPTYPE CONSOLE}
uses
System.SysUtils,
System.Classes;
function GetWords(minLength: Integer = 1): TStringList;
var
i: Integer;
begin
Result := TStringList.create;
Result.LoadFromFile('Unixdict.txt');
with Result do
for i := Count - 1 downto 0 do
if Strings[i].Length < minLength then
Delete(i);
Result.Sort;
end;
var
Words: TStringList;
const
minLength = 9;
begin
Words := GetWords(minLength);
var previousWord := '';
var count := 0;
var n := Words.Count;
for var i := 0 to n - minLength do
begin
var W := '';
for var j := 0 to minLength - 1 do
W := W + Words[i + j][j + 1];
if W.Equals(previousWord) then
Continue;
if Words.IndexOf(W) >= 0 then
begin
inc(count);
writeln(count: 2, '. ', W);
end;
previousWord := W;
end;
Words.Free;
readln;
end.
- Output:
1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose
F#[edit]
// Words from neighbour ones. Nigel Galloway: February 11th., 2021.
let g=[|use n=System.IO.File.OpenText("unixdict.txt") in while not n.EndOfStream do yield n.ReadLine()|]|>Array.filter(fun n->n.Length>8)
g|>Array.windowed 9|>Array.map(fun n->n|>Array.mapi(fun n g->g.[n])|>System.String)|>Array.filter(fun n-> Array.contains n g)|>Array.distinct|>Array.iter(printfn "%s")
- Output:
applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose
Factor[edit]
{ "abc" "def" "ghi" } 2 clump
produces
{ { "abc" "def" } { "def" "ghi" } }
.
<clumps>
is the same idea except it doesn't actually store all that redundant information in memory; it's a generator that generates clumps on demand. Notice that clumps are matrices, so we can take their diagonal with main-diagonal
.
USING: formatting grouping hash-sets io.encodings.ascii io.files
kernel literals math math.matrices sequences sequences.extras
sets strings ;
<< CONSTANT: words $[ "unixdict.txt" ascii file-lines ] >>
CONSTANT: wordset $[ words >hash-set ]
words ! place word list on data stack
[ length 9 < ] reject ! remove small words from list
9 <clumps> ! create virtual sequence of every 9 adjacent words (overlapping)
[ main-diagonal >string ] ! map clump to its diagonal
[ wordset in? ] map-filter ! filter diagonals that are words
members ! remove duplicates
[ 1 + swap "%2d. %s\n" printf ] each-index ! print words formatted nicely
- Output:
1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose
Go[edit]
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"sort"
"strings"
"unicode/utf8"
)
func main() {
wordList := "unixdict.txt"
b, err := ioutil.ReadFile(wordList)
if err != nil {
log.Fatal("Error reading file")
}
bwords := bytes.Fields(b)
var words []string
for _, bword := range bwords {
s := string(bword)
if utf8.RuneCountInString(s) >= 9 {
words = append(words, s)
}
}
count := 0
var alreadyFound []string
le := len(words)
var sb strings.Builder
for i := 0; i < le-9; i++ {
sb.Reset()
for j := i; j < i+9; j++ {
sb.WriteByte(words[j][j-i])
}
word := sb.String()
ix := sort.SearchStrings(words, word)
if ix < le && word == words[ix] {
ix2 := sort.SearchStrings(alreadyFound, word)
if ix2 == len(alreadyFound) {
count++
fmt.Printf("%2d: %s\n", count, word)
alreadyFound = append(alreadyFound, word)
}
}
}
}
- Output:
1: applicate 2: architect 3: astronomy 4: christine 5: christoph 6: committee 7: composite 8: constrict 9: construct 10: different 11: extensive 12: greenwood 13: implement 14: improvise 15: intercept 16: interpret 17: interrupt 18: philosoph 19: prescript 20: receptive 21: telephone 22: transcend 23: transport 24: transpose
Java[edit]
import java.io.*;
import java.util.*;
public class NeighbourWords {
public static void main(String[] args) {
try {
int minLength = 9;
List<String> words = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new FileReader("unixdict.txt"))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.length() >= minLength)
words.add(line);
}
}
Collections.sort(words);
String previousWord = null;
int count = 0;
for (int i = 0, n = words.size(); i + minLength <= n; ++i) {
StringBuilder sb = new StringBuilder(minLength);
for (int j = 0; j < minLength; ++j)
sb.append(words.get(i + j).charAt(j));
String word = sb.toString();
if (word.equals(previousWord))
continue;
if (Collections.binarySearch(words, word) >= 0)
System.out.println(String.format("%2d. %s", ++count, word));
previousWord = word;
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
- Output:
1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose
Julia[edit]
function wordsfromneighbourones(wordfile::String, len = 9, colwidth = 11, numcols = 8)
println("Word source: $wordfile\n")
words = filter(w -> length(w) >= len, split(read(wordfile, String), r"\s+"))
dict, shown, found = Dict(w => 1 for w in words), 0, String[]
for position in eachindex(@view words[1:end-len+1])
new_word = prod([words[i + position - 1][i] for i in 1:len])
if haskey(dict, new_word) && !(new_word in found)
push!(found, new_word)
print(rpad(new_word, colwidth), (shown += 1) % numcols == 0 ? "\n" : "")
end
end
end
wordsfromneighbourones("unixdict.txt")
- Output:
Word source: unixdict.txt applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose
Perl[edit]
#!/usr/bin/perl
use strict; # https://rosettacode.org/wiki/Words_from_neighbour_ones
use warnings;
@ARGV = 'unixdict.txt';
my $skew = join '', map { s/^.{9}\K.+//r } my @words = grep length() > 9, <>;
my %dict = map { $_ => 1 } grep length == 10, @words;
my %seen;
my $nextch = '.{10}(\\w)' x 8;
while( $skew =~ /^(\w)(?=$nextch)/gms )
{
my $new = join '', @{^CAPTURE}, "\n";
$dict{$new} and !$seen{$new}++ and print $new;
}
- Output:
applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose
Phix[edit]
function over9(string word) return length(word)>=9 end function
sequence dictionary = filter(get_text("demo/unixdict.txt",GT_LF_STRIPPED),over9)
function slicen(integer n) return vslice(dictionary,n)[n..-10+n] end function
sequence neighwords = unique(filter(columnize(apply(tagset(9),slicen)),"in",dictionary))
printf(1,"%d words: %s\n",{length(neighwords),join(shorten(neighwords,"",3))})
- Output:
24 words: applicate architect astronomy ... transcend transport transpose
Raku[edit]
my @words_ge_9 = 'unixdict.txt'.IO.lines.grep( *.chars >= 9 );
my %words_eq_9 = @words_ge_9 .grep( *.chars == 9 ).Set;
my @new_words = gather for @words_ge_9.rotor( 9 => -8 ) -> @nine_words {
my $new_word = [~] map { @nine_words[$_].substr($_, 1) }, ^9;
take $new_word if %words_eq_9{$new_word};
}
.say for unique @new_words;
- Output:
applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose
REXX[edit]
This REXX version doesn't care what order the words in the dictionary are in, nor does it care what
case (lower/upper/mixed) the words are in, the search for the words is caseless.
It also allows the minimum length to be specified on the command line (CL) as well as the dictionary file identifier.
/*REXX pgm finds words that're composed from neighbor words (within an identified dict).*/
parse arg minL iFID . /*obtain optional arguments from the CL*/
if minL=='' | minL=="," then minL= 9 /*Not specified? Then use the default.*/
if iFID=='' | iFID=="," then iFID='unixdict.txt' /* " " " " " " */
#= 0; @.=; !.= 0 /*number of usable words in dictionary.*/
do recs=0 while lines(iFID)\==0 /*read each word in the file (word=X).*/
x= strip( linein( iFID) ) /*pick off a word from the input line. */
if length(x)<minL then iterate /*Is the word too short? Then skip it.*/
#= # + 1 /*bump the count of usable words. */
@.#= x; upper x; !.x= 1 /*original case; create findable word.*/
end /*recs*/ /* [↑] semaphore name is uppercased. */
say copies('─', 30) recs "words in the dictionary file: " iFID
say copies('─', 30) right(#, length(recs) ) "usable words in the dictionary file."
finds= 0 /*count of the changable words found.*/
say; $=
do j=1 for #; y= left(@.j, 1) /*initialize the new word to be built. */
do k=2 to 9 until n>#; n= j + k /*use next 8 usable words in dictionary*/
y= y || substr(@.n, k, 1) /*build a new word, 1 letter at a time.*/
end /*k*/
uy=y; upper uy /*obtain uppercase version of the word.*/
if \!.uy then iterate /*Does the new word exist? No, skip it*/
if wordpos(uy, $)>0 then iterate /*Word is a dup? Then skip duplicate. */
finds= finds + 1 /*bump count of found neighboring words*/
$= $ uy /*add a word to the list of words found*/
say right( left(y, 30), 40) /*indent original word for readability.*/
end /*j*/
/*stick a fork in it, we're all done. */
say copies('─', 30) finds ' neighbor words found with a minimum length of ' minL
- output when using the default inputs:
────────────────────────────── 25104 words in the dictionary file: unixdict.txt ────────────────────────────── 7250 usable words in the dictionary file. applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose ────────────────────────────── 24 neighbor words found with a minimum length of 9
Ring[edit]
cStr = read("unixdict.txt")
wordList = str2list(cStr)
char = list(9)
nextwords = []
num = 0
see "working..." + nl
ln = len(wordList)
for n = ln to 1 step -1
if len(wordList[n]) < 9
del(wordList,n)
ok
next
see "New words are:" + nl
for n = 1 to len(wordList)-8
for m = 1 to 9
char[m] = substr(wordList[n+m-1],m,1)
next
str = ""
for p = 1 to 9
str = str + char[p]
next
ind = find(wordList,str)
if ind > 0
add(nextwords,wordList[ind])
ok
next
nextwords = sort(nextwords)
for n = len(nextwords) to 2 step -1
if nextwords[n] = nextwords[n-1]
del(nextwords,n)
ok
next
for n = 1 to len(nextwords)
see "" + n + ". " + nextwords[n] + nl
next
see "done..." + nl
Output:
working... New words are: 1. applicate 2. architect 3. astronomy 4. christine 5. christoph 6. committee 7. composite 8. constrict 9. construct 10. different 11. extensive 12. greenwood 13. implement 14. improvise 15. intercept 16. interpret 17. interrupt 18. philosoph 19. prescript 20. receptive 21. telephone 22. transcend 23. transport 24. transpose done...
Wren[edit]
import "io" for File
import "/sort" for Find
import "/fmt" for Fmt
var wordList = "unixdict.txt" // local copy
var words = File.read(wordList).trimEnd().split("\n").where { |w| w.count >= 9 }.toList
var count = 0
var alreadyFound = []
for (i in 0...words.count - 9) {
var word = ""
for (j in i...i+9) word = word + words[j][j-i]
if (Find.all(words, word)[0] && !Find.all(alreadyFound, word)[0]) {
count = count + 1
Fmt.print("$2d: $s", count, word)
alreadyFound.add(word)
}
}
- Output:
1: applicate 2: architect 3: astronomy 4: christine 5: christoph 6: committee 7: composite 8: constrict 9: construct 10: different 11: extensive 12: greenwood 13: implement 14: improvise 15: intercept 16: interpret 17: interrupt 18: philosoph 19: prescript 20: receptive 21: telephone 22: transcend 23: transport 24: transpose