I'm working on modernizing Rosetta Code's infrastructure. Starting with communications. Please accept this time-limited open invite to RC's Slack.. --Michael Mol (talk) 20:59, 30 May 2020 (UTC)

Words from neighbour ones

From Rosetta Code
Words from neighbour ones is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Use the dictionary   unixdict.txt

Ignore any word in the dictionary whose length is less than 9.


Let's take the words from next characters:
1 <= n < (dictionary length) - 9.
char1 = 1st character of         nth   word.
char2 = 2nd character of  (n+1)th  word.
char3 = 3rd character of  (n+2)th  word.
   
char9 = 9th character of  (n+8)th  word.


Concatenate (append) the nine characters by:

      newword = char1 + char2 + char3 + ... + char9 


If   newword   is in the dictionary, then show on this page.

Length of  newword = 9


Other tasks related to string operations:
Metrics
Counting
Remove/replace
Anagrams/Derangements/shuffling
Find/Search/Determine
Formatting
Song lyrics/poems/Mad Libs/phrases
Tokenize
Sequences



AWK[edit]

 
# syntax: GAWK -f WORDS_FROM_NEIGHBOUR_ONES.AWK unixdict.txt
{ if (length($0) < 9) { next }
arr1[++n] = $0
arr2[$0] = ""
}
END {
for (i=1; i<=n; i++) {
word = substr(arr1[i],1,1)
for (j=2; j<=9; j++) {
if (!((i+j) in arr1)) { continue }
word = word substr(arr1[i+j],j,1)
}
if (word in arr2) {
printf("%s\n",word)
delete arr2[word] # eliminate duplicates
}
}
exit(0)
}
 
Output:
applicate
architect
astronomy
christine
christoph
committee
composite
constrict
construct
different
extensive
greenwood
implement
improvise
intercept
interpret
interrupt
philosoph
prescript
receptive
telephone
transcend
transport
transpose

C[edit]

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
#define MAX_WORD_SIZE 80
#define MIN_LENGTH 9
#define WORD_SIZE (MIN_LENGTH + 1)
 
void fatal(const char* message) {
fprintf(stderr, "%s\n", message);
exit(1);
}
 
void* xmalloc(size_t n) {
void* ptr = malloc(n);
if (ptr == NULL)
fatal("Out of memory");
return ptr;
}
 
void* xrealloc(void* p, size_t n) {
void* ptr = realloc(p, n);
if (ptr == NULL)
fatal("Out of memory");
return ptr;
}
 
int word_compare(const void* p1, const void* p2) {
return memcmp(p1, p2, WORD_SIZE);
}
 
int main(int argc, char** argv) {
const char* filename = argc < 2 ? "unixdict.txt" : argv[1];
FILE* in = fopen(filename, "r");
if (!in) {
perror(filename);
return EXIT_FAILURE;
}
char line[MAX_WORD_SIZE];
size_t size = 0, capacity = 1024;
char* words = xmalloc(WORD_SIZE * capacity);
while (fgets(line, sizeof(line), in)) {
size_t len = strlen(line) - 1; // last character is newline
if (len < MIN_LENGTH)
continue;
line[len] = '\0';
if (size == capacity) {
capacity *= 2;
words = xrealloc(words, WORD_SIZE * capacity);
}
memcpy(&words[size * WORD_SIZE], line, WORD_SIZE);
++size;
}
fclose(in);
qsort(words, size, WORD_SIZE, word_compare);
int count = 0;
char prev_word[WORD_SIZE] = { 0 };
for (size_t i = 0; i + MIN_LENGTH <= size; ++i) {
char word[WORD_SIZE] = { 0 };
for (size_t j = 0; j < MIN_LENGTH; ++j)
word[j] = words[(i + j) * WORD_SIZE + j];
if (word_compare(word, prev_word) == 0)
continue;
if (bsearch(word, words, size, WORD_SIZE, word_compare))
printf("%2d. %s\n", ++count, word);
memcpy(prev_word, word, WORD_SIZE);
}
free(words);
return EXIT_SUCCESS;
}
Output:
 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

C++[edit]

#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
 
int main(int argc, char** argv) {
const int min_length = 9;
const char* filename(argc < 2 ? "unixdict.txt" : argv[1]);
std::ifstream in(filename);
if (!in) {
std::cerr << "Cannot open file '" << filename << "'.\n";
return EXIT_FAILURE;
}
std::string line;
std::vector<std::string> words;
while (getline(in, line)) {
if (line.size() >= min_length)
words.push_back(line);
}
std::sort(words.begin(), words.end());
std::string previous_word;
int count = 0;
for (size_t i = 0, n = words.size(); i + min_length <= n; ++i) {
std::string word;
word.reserve(min_length);
for (size_t j = 0; j < min_length; ++j)
word += words[i + j][j];
if (previous_word == word)
continue;
auto w = std::lower_bound(words.begin(), words.end(), word);
if (w != words.end() && *w == word)
std::cout << std::setw(2) << ++count << ". " << word << '\n';
previous_word = word;
}
return EXIT_SUCCESS;
}
Output:
 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

Delphi[edit]

Translation of: Java
 
program Words_from_neighbour_ones;
 
{$APPTYPE CONSOLE}
 
uses
System.SysUtils,
System.Classes;
 
function GetWords(minLength: Integer = 1): TStringList;
var
i: Integer;
begin
Result := TStringList.create;
Result.LoadFromFile('Unixdict.txt');
with Result do
for i := Count - 1 downto 0 do
if Strings[i].Length < minLength then
Delete(i);
Result.Sort;
end;
 
var
Words: TStringList;
 
const
minLength = 9;
 
begin
Words := GetWords(minLength);
var previousWord := '';
var count := 0;
var n := Words.Count;
 
for var i := 0 to n - minLength do
begin
 
var W := '';
for var j := 0 to minLength - 1 do
W := W + Words[i + j][j + 1];
if W.Equals(previousWord) then
Continue;
if Words.IndexOf(W) >= 0 then
begin
inc(count);
writeln(count: 2, '. ', W);
end;
previousWord := W;
end;
 
Words.Free;
readln;
end.
Output:
 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

F#[edit]

 
// Words from neighbour ones. Nigel Galloway: February 11th., 2021.
let g=[|use n=System.IO.File.OpenText("unixdict.txt") in while not n.EndOfStream do yield n.ReadLine()|]|>Array.filter(fun n->n.Length>8)
g|>Array.windowed 9|>Array.map(fun n->n|>Array.mapi(fun n g->g.[n])|>System.String)|>Array.filter(fun n-> Array.contains n g)|>Array.distinct|>Array.iter(printfn "%s")
 
Output:
applicate
architect
astronomy
christine
christoph
committee
composite
constrict
construct
different
extensive
greenwood
implement
improvise
intercept
interpret
interrupt
philosoph
prescript
receptive
telephone
transcend
transport
transpose

Factor[edit]

{ "abc" "def" "ghi" } 2 clump produces { { "abc" "def" } { "def" "ghi" } }. <clumps> is the same idea except it doesn't actually store all that redundant information in memory; it's a generator that generates clumps on demand. Notice that clumps are matrices, so we can take their diagonal with main-diagonal.

Works with: Factor version 0.99 2020-08-14
USING: formatting grouping hash-sets io.encodings.ascii io.files
kernel literals math math.matrices sequences sequences.extras
sets strings ;
 
<< CONSTANT: words $[ "unixdict.txt" ascii file-lines ] >>
 
CONSTANT: wordset $[ words >hash-set ]
 
words  ! place word list on data stack
[ length 9 < ] reject  ! remove small words from list
9 <clumps>  ! create virtual sequence of every 9 adjacent words (overlapping)
[ main-diagonal >string ]  ! map clump to its diagonal
[ wordset in? ] map-filter  ! filter diagonals that are words
members  ! remove duplicates
[ 1 + swap "%2d. %s\n" printf ] each-index  ! print words formatted nicely
Output:
 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

Go[edit]

package main
 
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"sort"
"strings"
"unicode/utf8"
)
 
func main() {
wordList := "unixdict.txt"
b, err := ioutil.ReadFile(wordList)
if err != nil {
log.Fatal("Error reading file")
}
bwords := bytes.Fields(b)
var words []string
for _, bword := range bwords {
s := string(bword)
if utf8.RuneCountInString(s) >= 9 {
words = append(words, s)
}
}
count := 0
var alreadyFound []string
le := len(words)
var sb strings.Builder
for i := 0; i < le-9; i++ {
sb.Reset()
for j := i; j < i+9; j++ {
sb.WriteByte(words[j][j-i])
}
word := sb.String()
ix := sort.SearchStrings(words, word)
if ix < le && word == words[ix] {
ix2 := sort.SearchStrings(alreadyFound, word)
if ix2 == len(alreadyFound) {
count++
fmt.Printf("%2d: %s\n", count, word)
alreadyFound = append(alreadyFound, word)
}
}
}
}
Output:
 1: applicate
 2: architect
 3: astronomy
 4: christine
 5: christoph
 6: committee
 7: composite
 8: constrict
 9: construct
10: different
11: extensive
12: greenwood
13: implement
14: improvise
15: intercept
16: interpret
17: interrupt
18: philosoph
19: prescript
20: receptive
21: telephone
22: transcend
23: transport
24: transpose

Java[edit]

import java.io.*;
import java.util.*;
 
public class NeighbourWords {
public static void main(String[] args) {
try {
int minLength = 9;
List<String> words = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(new FileReader("unixdict.txt"))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.length() >= minLength)
words.add(line);
}
}
Collections.sort(words);
String previousWord = null;
int count = 0;
for (int i = 0, n = words.size(); i + minLength <= n; ++i) {
StringBuilder sb = new StringBuilder(minLength);
for (int j = 0; j < minLength; ++j)
sb.append(words.get(i + j).charAt(j));
String word = sb.toString();
if (word.equals(previousWord))
continue;
if (Collections.binarySearch(words, word) >= 0)
System.out.println(String.format("%2d. %s", ++count, word));
previousWord = word;
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Output:
 1. applicate
 2. architect
 3. astronomy
 4. christine
 5. christoph
 6. committee
 7. composite
 8. constrict
 9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose

Julia[edit]

function wordsfromneighbourones(wordfile::String, len = 9, colwidth = 11, numcols = 8)
println("Word source: $wordfile\n")
words = filter(w -> length(w) >= len, split(read(wordfile, String), r"\s+"))
dict, shown, found = Dict(w => 1 for w in words), 0, String[]
for position in eachindex(@view words[1:end-len+1])
new_word = prod([words[i + position - 1][i] for i in 1:len])
if haskey(dict, new_word) && !(new_word in found)
push!(found, new_word)
print(rpad(new_word, colwidth), (shown += 1) % numcols == 0 ? "\n" : "")
end
end
end
 
wordsfromneighbourones("unixdict.txt")
 
Output:
Word source: unixdict.txt

applicate  architect  astronomy  christine  christoph  committee  composite  constrict
construct  different  extensive  greenwood  implement  improvise  intercept  interpret
interrupt  philosoph  prescript  receptive  telephone  transcend  transport  transpose

Perl[edit]

#!/usr/bin/perl
 
use strict; # https://rosettacode.org/wiki/Words_from_neighbour_ones
use warnings;
 
@ARGV = 'unixdict.txt';
my $skew = join '', map { s/^.{9}\K.+//r } my @words = grep length() > 9, <>;
my %dict = map { $_ => 1 } grep length == 10, @words;
my %seen;
my $nextch = '.{10}(\\w)' x 8;
while( $skew =~ /^(\w)(?=$nextch)/gms )
{
my $new = join '', @{^CAPTURE}, "\n";
$dict{$new} and !$seen{$new}++ and print $new;
}
Output:

applicate architect astronomy christine christoph committee composite constrict construct different extensive greenwood implement improvise intercept interpret interrupt philosoph prescript receptive telephone transcend transport transpose

Phix[edit]

function over9(string word) return length(word)>=9 end function
sequence dictionary = filter(get_text("demo/unixdict.txt",GT_LF_STRIPPED),over9)
function slicen(integer n) return vslice(dictionary,n)[n..-10+n] end function
sequence neighwords = unique(filter(columnize(apply(tagset(9),slicen)),"in",dictionary))
printf(1,"%d words: %s\n",{length(neighwords),join(shorten(neighwords,"",3))})
Output:
24 words: applicate architect astronomy ... transcend transport transpose

Raku[edit]

my @words_ge_9 = 'unixdict.txt'.IO.lines.grep( *.chars >= 9 );
my %words_eq_9 = @words_ge_9 .grep( *.chars == 9 ).Set;
 
my @new_words = gather for @words_ge_9.rotor( 9 => -8 ) -> @nine_words {
my $new_word = [~] map { @nine_words[$_].substr($_, 1) }, ^9;
 
take $new_word if %words_eq_9{$new_word};
}
 
.say for unique @new_words;
Output:
applicate
architect
astronomy
christine
christoph
committee
composite
constrict
construct
different
extensive
greenwood
implement
improvise
intercept
interpret
interrupt
philosoph
prescript
receptive
telephone
transcend
transport
transpose


REXX[edit]

This REXX version doesn't care what order the words in the dictionary are in,   nor does it care what
case  (lower/upper/mixed)  the words are in,   the search for the words is   caseless.

It also allows the minimum length to be specified on the command line (CL) as well as the dictionary file identifier.

/*REXX pgm finds words that're composed from neighbor words (within an identified dict).*/
parse arg minL iFID . /*obtain optional arguments from the CL*/
if minL=='' | minL=="," then minL= 9 /*Not specified? Then use the default.*/
if iFID=='' | iFID=="," then iFID='unixdict.txt' /* " " " " " " */
#= 0; @.=;  !.= 0 /*number of usable words in dictionary.*/
do recs=0 while lines(iFID)\==0 /*read each word in the file (word=X).*/
x= strip( linein( iFID) ) /*pick off a word from the input line. */
if length(x)<minL then iterate /*Is the word too short? Then skip it.*/
#= # + 1 /*bump the count of usable words. */
@.#= x; upper x;  !.x= 1 /*original case; create findable word.*/
end /*recs*/ /* [↑] semaphore name is uppercased. */
say copies('─', 30) recs "words in the dictionary file: " iFID
say copies('─', 30) right(#, length(recs) ) "usable words in the dictionary file."
finds= 0 /*count of the changable words found.*/
say; $=
do j=1 for #; y= left(@.j, 1) /*initialize the new word to be built. */
do k=2 to 9 until n>#; n= j + k /*use next 8 usable words in dictionary*/
y= y || substr(@.n, k, 1) /*build a new word, 1 letter at a time.*/
end /*k*/
uy=y; upper uy /*obtain uppercase version of the word.*/
if \!.uy then iterate /*Does the new word exist? No, skip it*/
if wordpos(uy, $)>0 then iterate /*Word is a dup? Then skip duplicate. */
finds= finds + 1 /*bump count of found neighboring words*/
$= $ uy /*add a word to the list of words found*/
say right( left(y, 30), 40) /*indent original word for readability.*/
end /*j*/
/*stick a fork in it, we're all done. */
say copies('─', 30) finds ' neighbor words found with a minimum length of ' minL
output   when using the default inputs:
────────────────────────────── 25104 words in the dictionary file:  unixdict.txt
──────────────────────────────  7250 usable words in the dictionary file.

          applicate
          architect
          astronomy
          christine
          christoph
          committee
          composite
          constrict
          construct
          different
          extensive
          greenwood
          implement
          improvise
          intercept
          interpret
          interrupt
          philosoph
          prescript
          receptive
          telephone
          transcend
          transport
          transpose
────────────────────────────── 24  neighbor words found with a minimum length of  9

Ring[edit]

 
cStr = read("unixdict.txt")
wordList = str2list(cStr)
char = list(9)
nextwords = []
num = 0
 
see "working..." + nl
 
ln = len(wordList)
for n = ln to 1 step -1
if len(wordList[n]) < 9
del(wordList,n)
ok
next
 
see "New words are:" + nl
 
for n = 1 to len(wordList)-8
for m = 1 to 9
char[m] = substr(wordList[n+m-1],m,1)
next
str = ""
for p = 1 to 9
str = str + char[p]
next
ind = find(wordList,str)
if ind > 0
add(nextwords,wordList[ind])
ok
next
 
nextwords = sort(nextwords)
for n = len(nextwords) to 2 step -1
if nextwords[n] = nextwords[n-1]
del(nextwords,n)
ok
next
 
for n = 1 to len(nextwords)
see "" + n + ". " + nextwords[n] + nl
next
 
see "done..." + nl
 

Output:

working...
New words are:
1. applicate
2. architect
3. astronomy
4. christine
5. christoph
6. committee
7. composite
8. constrict
9. construct
10. different
11. extensive
12. greenwood
13. implement
14. improvise
15. intercept
16. interpret
17. interrupt
18. philosoph
19. prescript
20. receptive
21. telephone
22. transcend
23. transport
24. transpose
done...

Wren[edit]

Library: Wren-sort
Library: Wren-fmt
import "io" for File
import "/sort" for Find
import "/fmt" for Fmt
 
var wordList = "unixdict.txt" // local copy
var words = File.read(wordList).trimEnd().split("\n").where { |w| w.count >= 9 }.toList
var count = 0
var alreadyFound = []
for (i in 0...words.count - 9) {
var word = ""
for (j in i...i+9) word = word + words[j][j-i]
if (Find.all(words, word)[0] && !Find.all(alreadyFound, word)[0]) {
count = count + 1
Fmt.print("$2d: $s", count, word)
alreadyFound.add(word)
}
}
Output:
 1: applicate
 2: architect
 3: astronomy
 4: christine
 5: christoph
 6: committee
 7: composite
 8: constrict
 9: construct
10: different
11: extensive
12: greenwood
13: implement
14: improvise
15: intercept
16: interpret
17: interrupt
18: philosoph
19: prescript
20: receptive
21: telephone
22: transcend
23: transport
24: transpose