Determine if a string has all unique characters: Difference between revisions
Line 445: | Line 445: | ||
Or perhaps, as an alternative to grouping and sorting: |
Or perhaps, as an alternative to grouping and sorting: |
||
<lang haskell>import Data.List (find, intercalate) |
<lang haskell>import Data.List (find, intercalate) |
||
import Control.Arrow (second) |
|||
import Numeric (showHex) |
import Numeric (showHex) |
||
import Data.Char (ord) |
import Data.Char (ord) |
||
duplicatedCharIndices :: String -> Maybe (Char, [Int]) |
duplicatedCharIndices :: String -> Maybe (Char, [Int]) |
||
Line 456: | Line 458: | ||
maybe |
maybe |
||
(go cs) |
(go cs) |
||
( |
(Just . second ((i :) . return)) |
||
(find ((c ==) . fst) cs) |
(find ((c ==) . fst) cs) |
||
Revision as of 01:24, 29 December 2019
You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Given a character string (which may be empty, or have a length of zero characters):
- create a function/procedure/routine to:
- determine if all the characters in the string are unique
- indicate if or which character is duplicated and where
- display each string and its length (as the strings are being examined)
- a zero─length (empty) string shall be considered as unique
- process the strings from left─to─right
- if unique, display a message saying such
- if not unique, then:
- display a message saying such
- display what character is duplicated
- only the 1st non─unique character need be displayed
- display where "both" duplicated characters are in the string
- the above messages can be part of a single message
- display the hexadecimal value of the duplicated character
Use (at least) these five test values (strings):
- a string of length 0 (an empty string)
- a string of length 1 which is a single period (.)
- a string of length 6 which contains: abcABC
- a string of length 7 which contains a blank in the middle: XYZ ZYX
- a string of length 36 which doesn't contain the letter "oh":
- 1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ
Show all output here on this page.
- Related tasks
AWK
<lang AWK>
- syntax: GAWK -f DETERMINE_IF_A_STRING_HAS_ALL_UNIQUE_CHARACTERS.AWK
BEGIN {
for (i=0; i<=255; i++) { ord_arr[sprintf("%c",i)] = i } # build array[character]=ordinal_value n = split(",.,abcABC,XYZ ZYX,1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ",arr,",") for (i in arr) { width = max(width,length(arr[i])) } width += 2 fmt = "| %-*s | %-6s | %-10s | %-8s | %-3s | %-9s |\n" head1 = head2 = sprintf(fmt,width,"string","length","all unique","1st diff","hex","positions") gsub(/[^|\n]/,"-",head1) printf(head1 head2 head1) # column headings for (i=1; i<=n; i++) { main(arr[i]) } printf(head1) # column footing exit(0)
} function main(str, c,hex,i,leng,msg,position1,position2,tmp_arr) {
msg = "yes" leng = length(str) for (i=1; i<=leng; i++) { c = substr(str,i,1) if (c in tmp_arr) { msg = "no" first_diff = "'" c "'" hex = sprintf("%2X",ord_arr[c]) position1 = index(str,c) position2 = i break } tmp_arr[c] = "" } printf(fmt,width,"'" str "'",leng,msg,first_diff,hex,position1 " " position2)
} function max(x,y) { return((x > y) ? x : y) } </lang>
- Output:
|----------------------------------------|--------|------------|----------|-----|-----------| | string | length | all unique | 1st diff | hex | positions | |----------------------------------------|--------|------------|----------|-----|-----------| | '' | 0 | yes | | | | | '.' | 1 | yes | | | | | 'abcABC' | 6 | yes | | | | | 'XYZ ZYX' | 7 | no | 'Z' | 5A | 3 5 | | '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ' | 36 | no | '0' | 30 | 10 25 | |----------------------------------------|--------|------------|----------|-----|-----------|
C
In interactive mode, strings with spaces have to be enclosed in double quotes ("") <lang C>
- include<stdbool.h>
- include<string.h>
- include<stdlib.h>
- include<stdio.h>
typedef struct positionList{
int position; struct positionList *next;
}positionList;
typedef struct letterList{
char letter; int repititions; positionList* positions; struct letterList *next;
}letterList;
letterList* letterSet; bool duplicatesFound = false;
void checkAndUpdateLetterList(char c,int pos){
bool letterOccurs = false; letterList *letterIterator,*newLetter; positionList *positionIterator,*newPosition;
if(letterSet==NULL){ letterSet = (letterList*)malloc(sizeof(letterList)); letterSet->letter = c; letterSet->repititions = 0;
letterSet->positions = (positionList*)malloc(sizeof(positionList)); letterSet->positions->position = pos; letterSet->positions->next = NULL;
letterSet->next = NULL; }
else{ letterIterator = letterSet;
while(letterIterator!=NULL){ if(letterIterator->letter==c){ letterOccurs = true; duplicatesFound = true;
letterIterator->repititions++; positionIterator = letterIterator->positions;
while(positionIterator->next!=NULL) positionIterator = positionIterator->next; newPosition = (positionList*)malloc(sizeof(positionList)); newPosition->position = pos; newPosition->next = NULL;
positionIterator->next = newPosition; } if(letterOccurs==false && letterIterator->next==NULL) break; else letterIterator = letterIterator->next; }
if(letterOccurs==false){ newLetter = (letterList*)malloc(sizeof(letterList)); newLetter->letter = c;
newLetter->repititions = 0;
newLetter->positions = (positionList*)malloc(sizeof(positionList)); newLetter->positions->position = pos; newLetter->positions->next = NULL;
newLetter->next = NULL;
letterIterator->next = newLetter; } }
}
void printLetterList(){
positionList* positionIterator; letterList* letterIterator = letterSet;
while(letterIterator!=NULL){ if(letterIterator->repititions>0){ printf("\n'%c' (0x%x) at positions :",letterIterator->letter,letterIterator->letter);
positionIterator = letterIterator->positions;
while(positionIterator!=NULL){ printf("%3d",positionIterator->position + 1); positionIterator = positionIterator->next; } }
letterIterator = letterIterator->next; } printf("\n");
}
int main(int argc,char** argv) {
int i,len; if(argc>2){ printf("Usage : %s <Test string>\n",argv[0]); return 0; }
if(argc==1||strlen(argv[1])==1){ printf("\"%s\" - Length %d - Contains only unique characters.\n",argc==1?"":argv[1],argc==1?0:1); return 0; }
len = strlen(argv[1]);
for(i=0;i<len;i++){ checkAndUpdateLetterList(argv[1][i],i); }
printf("\"%s\" - Length %d - %s",argv[1],len,duplicatesFound==false?"Contains only unique characters.\n":"Contains the following duplicate characters :");
if(duplicatesFound==true) printLetterList(); return 0;
} </lang> Output, test strings from the task Determine_if_a_string_has_all_the_same_characters are also included :
abhishek_ghosh@Azure:~/doodles$ ./a.out "" - Length 0 - Contains only unique characters. abhishek_ghosh@Azure:~/doodles$ ./a.out . "." - Length 1 - Contains only unique characters. abhishek_ghosh@Azure:~/doodles$ ./a.out abcABC "abcABC" - Length 6 - Contains only unique characters. abhishek_ghosh@Azure:~/doodles$ ./a.out "XYZ YZX" "XYZ YZX" - Length 7 - Contains the following duplicate characters : 'X' (0x58) at positions : 1 7 'Y' (0x59) at positions : 2 5 'Z' (0x5a) at positions : 3 6 abhishek_ghosh@Azure:~/doodles$ ./a.out 1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" - Length 36 - Contains the following duplicate characters : '0' (0x30) at positions : 10 25 abhishek_ghosh@Azure:~/doodles$ ./a.out " " " " - Length 3 - Contains the following duplicate characters : ' ' (0x20) at positions : 1 2 3 abhishek_ghosh@Azure:~/doodles$ ./a.out 2 "2" - Length 1 - Contains only unique characters. abhishek_ghosh@Azure:~/doodles$ ./a.out 333 "333" - Length 3 - Contains the following duplicate characters : '3' (0x33) at positions : 1 2 3 abhishek_ghosh@Azure:~/doodles$ ./a.out .55 ".55" - Length 3 - Contains the following duplicate characters : '5' (0x35) at positions : 2 3 abhishek_ghosh@Azure:~/doodles$ ./a.out tttTTT "tttTTT" - Length 6 - Contains the following duplicate characters : 't' (0x74) at positions : 1 2 3 'T' (0x54) at positions : 4 5 6 abhishek_ghosh@Azure:~/doodles$ ./a.out "4444 444k" "4444 444k" - Length 9 - Contains the following duplicate characters : '4' (0x34) at positions : 1 2 3 4 6 7 8
Factor
<lang factor>USING: accessors formatting generalizations io kernel math.parser regexp sequences sets strings ;
- >dup-char< ( str n -- char hex first-index second-index )
1string tuck [ dup first >hex ] 2dip <regexp> all-matching-slices first2 [ from>> ] bi@ ;
- duplicate-info. ( str -- )
dup duplicates [ >dup-char< "'%s' (0x%s) at indices %d and %d.\n" printf ] with each nl ;
- uniqueness-report. ( str -- )
dup dup length "%u — length %d — contains " printf dup all-unique? [ drop "all unique characters." print nl ] [ "duplicate characters:" print duplicate-info. ] if ;
"" "." "abcABC" "XYZ ZYX" "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" [ uniqueness-report. ] 5 napply</lang>
- Output:
"" — length 0 — contains all unique characters. "." — length 1 — contains all unique characters. "abcABC" — length 6 — contains all unique characters. "XYZ ZYX" — length 7 — contains duplicate characters: 'Z' (0x5a) at indices 2 and 4. 'Y' (0x59) at indices 1 and 5. 'X' (0x58) at indices 0 and 6. "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" — length 36 — contains duplicate characters: '0' (0x30) at indices 9 and 24.
Go
<lang go>package main
import "fmt"
func analyze(s string) {
chars := []rune(s) le := len(chars) fmt.Printf("Analyzing %q which has a length of %d:\n", s, le) if le > 1 { for i := 0; i < le-1; i++ { for j := i + 1; j < le; j++ { if chars[j] == chars[i] { fmt.Println(" Not all characters in the string are unique.") fmt.Printf(" %q (%#[1]x) is duplicated at positions %d and %d.\n\n", chars[i], i+1, j+1) return } } } } fmt.Println(" All characters in the string are unique.\n")
}
func main() {
strings := []string{ "", ".", "abcABC", "XYZ ZYX", "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ", "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X", "hétérogénéité", "🎆🎃🎇🎈", "😍😀🙌💃😍🙌", "🐠🐟🐡🦈🐬🐳🐋🐡", } for _, s := range strings { analyze(s) }
}</lang>
- Output:
Analyzing "" which has a length of 0: All characters in the string are unique. Analyzing "." which has a length of 1: All characters in the string are unique. Analyzing "abcABC" which has a length of 6: All characters in the string are unique. Analyzing "XYZ ZYX" which has a length of 7: Not all characters in the string are unique. 'X' (0x58) is duplicated at positions 1 and 7. Analyzing "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" which has a length of 36: Not all characters in the string are unique. '0' (0x30) is duplicated at positions 10 and 25. Analyzing "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X" which has a length of 39: Not all characters in the string are unique. '0' (0x30) is duplicated at positions 1 and 11. Analyzing "hétérogénéité" which has a length of 13: Not all characters in the string are unique. 'é' (0xe9) is duplicated at positions 2 and 4. Analyzing "🎆🎃🎇🎈" which has a length of 4: All characters in the string are unique. Analyzing "😍😀🙌💃😍🙌" which has a length of 6: Not all characters in the string are unique. '😍' (0x1f60d) is duplicated at positions 1 and 5. Analyzing "🐠🐟🐡🦈🐬🐳🐋🐡" which has a length of 8: Not all characters in the string are unique. '🐡' (0x1f421) is duplicated at positions 3 and 8.
Haskell
<lang Haskell>import Data.List import Data.Char import Numeric
hexFromChar :: Char -> String hexFromChar c = map toUpper $ showHex (ord c) ""
string :: String -> String string xs = ('\"':xs) ++ "\""
char :: Char -> String char c = ['\,c,'\]
size :: String -> String size = show.length
positions :: (Int,Int) -> String positions (a,b) = (show a) ++ " " ++ (show b)
forTable::String -> [String] forTable xs = (string xs): (go $ allUnique xs)
where go Nothing = [size xs,"yes","","",""] go (Just (u,ij)) = [size xs,"no",char u,hexFromChar u, positions ij]
showTable::Bool -> Char -> Char -> Char -> String -> String showTable _ _ _ _ [] = [] showTable header ver hor sep contents = unlines $ hr:(if header then z:hr:zs else intersperse hr zss) ++ [hr]
where vss = map (map length) $ contents ms = map maximum $ transpose vss ::[Int] hr = concatMap (\ n -> sep : replicate n hor) ms ++ [sep] top = replicate (length hr) hor bss = map (\ps -> map (flip replicate ' ') $ zipWith (-) ms ps) $ vss zss@(z:zs) = zipWith (\us bs -> (concat $ zipWith (\x y -> (ver:x) ++ y) us bs) ++ [ver]) contents bss
table xs = showTable True '|' '-' '+' (["string","length","all unique","1st diff","hex","positions"]:map forTable xs)
allUnique:: (Ord b, Ord a, Num b, Enum b) => [a] -> Maybe (a, (b, b)) allUnique xs = go.groupBy (\(x,_) (y,_) -> x == y).sort.zip xs $ [0..]
where go [] = Nothing go ([_]:us) = go us go (((u,i):(_,j):_):_) = Just (u, (i,j))
main = putStrLn $ table ["",".","abcABC","XYZ ZYX","1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ"]
</lang>
- Output:
+--------------------------------------+------+----------+--------+---+---------+ |string |length|all unique|1st diff|hex|positions| +--------------------------------------+------+----------+--------+---+---------+ |"" |0 |yes | | | | |"." |1 |yes | | | | |"abcABC" |6 |yes | | | | |"XYZ ZYX" |7 |no |'X' |58 |0 6 | |"1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ"|36 |no |'0' |30 |9 24 | +--------------------------------------+------+----------+--------+---+---------+
Or perhaps, as an alternative to grouping and sorting:
<lang haskell>import Data.List (find, intercalate)
import Control.Arrow (second)
import Numeric (showHex)
import Data.Char (ord)
duplicatedCharIndices :: String -> Maybe (Char, [Int])
duplicatedCharIndices xs = go $ zip xs [0 ..]
where go [] = Nothing go [_] = Nothing go ((c, i):cs) = maybe (go cs) (Just . second ((i :) . return)) (find ((c ==) . fst) cs)
TEST----------------------------
main :: IO () main =
putStrLn $ fTable "First duplicated character, if any:" (\xs -> show xs ++ " (" ++ (show . length) xs ++ ")") (\mb -> maybe "None" (\(c, ixs) -> unwords [ show c , "(0x" ++ (showHex (ord c) ") at") , intercalate ", " (show <$> ixs) ]) mb) duplicatedCharIndices ["", ".", "abcABC", "XYZ ZYX", "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ"]
DISPLAY--------------------------
fTable :: String -> (a -> String) -> (b -> String) -> (a -> b) -> [a] -> String fTable s xShow fxShow f xs =
let rjust n c = drop . length <*> (replicate n c ++) w = maximum (length . xShow <$> xs) in unlines $ s : fmap (((++) . rjust w ' ' . xShow) <*> ((" -> " ++) . fxShow . f)) xs</lang>
- Output:
First duplicated character, if any: "" (0) -> None "." (1) -> None "abcABC" (6) -> None "XYZ ZYX" (7) -> 'X' (0x58) at 0, 6 "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" (36) -> '0' (0x30) at 9, 24
JavaScript
<lang javascript>(() => {
'use strict';
// duplicatedCharIndices :: String -> Maybe (Char, [Int]) const duplicatedCharIndices = s => { const go = xs => 1 < xs.length ? (() => { const rest = xs.slice(1), [c, i] = Array.from(xs[0]);
return maybe(go(rest))( ci => Just(Tuple(fst(ci))([i, snd(ci)])) )(find(compose(eq(c), fst))(rest)) })() : Nothing(); return go( zip(chars(s))(enumFrom(0)) ); };
// ------------------------TEST------------------------ const main = () => console.log( fTable('First duplicated character, if any:')( s => `'${s}'` )(maybe('None')(tpl => { const [c, ixs] = Array.from(tpl); return `'${c}' (0x${showHex(ord(c))}) at ${ixs.join(', ')}` }))(duplicatedCharIndices)([ "", ".", "abcABC", "XYZ ZYX", "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" ]) );
// -----------------GENERIC FUNCTIONS------------------
// Just :: a -> Maybe a const Just = x => ({ type: 'Maybe', Nothing: false, Just: x });
// Nothing :: Maybe a const Nothing = () => ({ type: 'Maybe', Nothing: true, });
// Tuple (,) :: a -> b -> (a, b) const Tuple = a => b => ({ type: 'Tuple', '0': a, '1': b, length: 2 });
// chars :: String -> [Char] const chars = s => s.split();
// compose (<<<) :: (b -> c) -> (a -> b) -> a -> c const compose = (...fs) => x => fs.reduceRight((a, f) => f(a), x);
// enumFrom :: Enum a => a -> [a] function* enumFrom(x) { let v = x; while (true) { yield v; v = 1 + v; } }
// eq (==) :: Eq a => a -> a -> Bool const eq = a => b => a === b;
// find :: (a -> Bool) -> [a] -> Maybe a const find = p => xs => { const i = xs.findIndex(p); return -1 !== i ? ( Just(xs[i]) ) : Nothing(); };
// fTable :: String -> (a -> String) -> (b -> String) // -> (a -> b) -> [a] -> String const fTable = s => xShow => fxShow => f => xs => { // Heading -> x display function -> // fx display function -> // f -> values -> tabular string const ys = xs.map(xShow), w = Math.max(...ys.map(length)); return s + '\n' + zipWith( a => b => a.padStart(w, ' ') + ' -> ' + b )(ys)( xs.map(x => fxShow(f(x))) ).join('\n'); };
// fst :: (a, b) -> a const fst = tpl => tpl[0];
// length :: [a] -> Int const length = xs => // Returns Infinity over objects without finite length. // This enables zip and zipWith to choose the shorter // argument when one is non-finite, like cycle, repeat etc (Array.isArray(xs) || 'string' === typeof xs) ? ( xs.length ) : Infinity;
// maybe :: b -> (a -> b) -> Maybe a -> b const maybe = v => // Default value (v) if m is Nothing, or f(m.Just) f => m => m.Nothing ? v : f(m.Just);
// ord :: Char -> Int const ord = c => c.codePointAt(0);
// showHex :: Int -> String const showHex = n => n.toString(16);
// snd :: (a, b) -> b const snd = tpl => tpl[1];
// take :: Int -> [a] -> [a] // take :: Int -> String -> String const take = n => xs => 'GeneratorFunction' !== xs.constructor.constructor.name ? ( xs.slice(0, n) ) : [].concat.apply([], Array.from({ length: n }, () => { const x = xs.next(); return x.done ? [] : [x.value]; }));
// zip :: [a] -> [b] -> [(a, b)] const zip = xs => ys => { const lng = Math.min(length(xs), length(xs)), vs = take(lng)(ys); return take(lng)(xs) .map((x, i) => Tuple(x)(vs[i])); };
// zipWith :: (a -> b -> c) -> [a] -> [b] -> [c] const zipWith = f => xs => ys => { const lng = Math.min(length(xs), length(ys)), vs = take(lng)(ys); return take(lng)(xs) .map((x, i) => f(x)(vs[i])); };
// MAIN --- return main();
})();</lang>
- Output:
First duplicated character, if any: '' -> None '.' -> None 'abcABC' -> None 'XYZ ZYX' -> 'X' (0x58) at 0, 6 '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ' -> '0' (0x30) at 9, 24
Julia
<lang julia>arr(s) = [c for c in s] alldup(a) = filter(x -> length(x) > 1, [findall(x -> x == a[i], a) for i in 1:length(a)]) firstduplicate(s) = (a = arr(s); d = alldup(a); isempty(d) ? nothing : first(d))
function testfunction(strings)
println("String | Length | All Unique | First Duplicate | Positions\n" * "-------------------------------------------------------------------------------------") for s in strings n = firstduplicate(s) a = arr(s) println(rpad(s, 38), rpad(length(s), 11), n == nothing ? "yes" : rpad("no $(a[n[1]])", 26) * rpad(n[1], 4) * "$(n[2])") end
end
testfunction([ "", ".", "abcABC", "XYZ ZYX", "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ",
"hétérogénéité",
"🎆🎃🎇🎈", "😍😀🙌💃😍🙌", "🐠🐟🐡🦈🐬🐳🐋🐡", ])
</lang>
- Output:
String | Length | All Unique | First Duplicate (Hex) | Positions ------------------------------------------------------------------------------------------- 0 yes . 1 yes abcABC 6 yes XYZ ZYX 7 no X (58) 1 7 1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ 36 no 0 (30) 10 25 hétérogénéité 13 no é (e9) 2 4 🎆🎃🎇🎈 4 yes 😍😀🙌💃😍🙌 6 no 😍 (1f60d) 1 5 🐠🐟🐡🦈🐬🐳🐋🐡 8 no 🐡 (1f421) 3 8
Perl
<lang perl>use strict; use warnings; use feature 'say'; use utf8; binmode(STDOUT, ':utf8'); use List::AllUtils qw(uniq); use Unicode::UCD 'charinfo';
for my $str (
, '.', 'abcABC', 'XYZ ZYX', '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ', '01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X', 'Δ👍👨👍Δ', 'ΔδΔ̂ΔΛ',
) {
my @S; push @S, $1 while $str =~ /(\X)/g; printf qq{\n"$str" (length: %d) has }, scalar @S; if (@S != uniq @S ) { say "duplicated characters:"; my %P; push @{ $P{$S[$_]} }, 1+$_ for 0..$#S; for my $k (sort keys %P) { next unless @{$P{$k}} > 1; printf "'%s' %s (0x%x) in positions: %s\n", $k, charinfo(ord $k)->{'name'}, ord($k), join ', ', @{$P{$k}}; } } else { say "no duplicated characters." }
}</lang>
- Output:
"" (length: 0) has no duplicated characters. "." (length: 1) has no duplicated characters. "abcABC" (length: 6) has no duplicated characters. "XYZ ZYX" (length: 7) has duplicated characters: 'X' LATIN CAPITAL LETTER X (0x58) in positions: 1, 7 'Y' LATIN CAPITAL LETTER Y (0x59) in positions: 2, 6 'Z' LATIN CAPITAL LETTER Z (0x5a) in positions: 3, 5 "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" (length: 36) has duplicated characters: '0' DIGIT ZERO (0x30) in positions: 10, 25 "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X" (length: 39) has duplicated characters: '0' DIGIT ZERO (0x30) in positions: 1, 11, 26, 38 'X' LATIN CAPITAL LETTER X (0x58) in positions: 35, 39 "Δ👍👨👍Δ" (length: 5) has duplicated characters: 'Δ' GREEK CAPITAL LETTER DELTA (0x394) in positions: 1, 5 '👍' THUMBS UP SIGN (0x1f44d) in positions: 2, 4 "ΔδΔ̂ΔΛ" (length: 5) has duplicated characters: 'Δ' GREEK CAPITAL LETTER DELTA (0x394) in positions: 1, 4
Perl 6
Perl 6 works with unicode natively and handles combining characters and multi-byte emoji correctly. In the last string, notice the the length is correctly shown as 11 characters and that the delta with a combining circumflex in position 6 is not the same as the deltas without in positions 5 & 9.
<lang perl6> -> $str {
my $i = 0; print "\n{$str.perl} (length: {$str.chars}), has "; my %m; %m{$_}.push: ++$i for $str.comb; if any(%m.values) > 1 { say "duplicated characters:"; say "'{.key}' ({.key.uninames}; hex ordinal: {(.key.ords).fmt: "0x%X"})" ~ " in positions: {.value.join: ', '}" for %m.grep( *.value > 1 ).sort( *.value[0] ); } else { say "no duplicated characters." }
} for
, '.', 'abcABC', 'XYZ ZYX', '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ', '01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X', '🦋🙂👨👩👧👦🙄ΔΔ̂ 🦋Δ👍👨👩👧👦'</lang>
- Output:
"" (length: 0), has no duplicated characters. "." (length: 1), has no duplicated characters. "abcABC" (length: 6), has no duplicated characters. "XYZ ZYX" (length: 7), has duplicated characters: 'X' (LATIN CAPITAL LETTER X; hex ordinal: 0x58) in positions: 1, 7 'Y' (LATIN CAPITAL LETTER Y; hex ordinal: 0x59) in positions: 2, 6 'Z' (LATIN CAPITAL LETTER Z; hex ordinal: 0x5A) in positions: 3, 5 "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" (length: 36), has duplicated characters: '0' (DIGIT ZERO; hex ordinal: 0x30) in positions: 10, 25 "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X" (length: 39), has duplicated characters: '0' (DIGIT ZERO; hex ordinal: 0x30) in positions: 1, 11, 26, 38 'X' (LATIN CAPITAL LETTER X; hex ordinal: 0x58) in positions: 35, 39 "🦋🙂👨👩👧👦🙄ΔΔ̂ 🦋Δ👍👨👩👧👦" (length: 11), has duplicated characters: '🦋' (BUTTERFLY; hex ordinal: 0x1F98B) in positions: 1, 8 '👨👩👧👦' (MAN ZERO WIDTH JOINER WOMAN ZERO WIDTH JOINER GIRL ZERO WIDTH JOINER BOY; hex ordinal: 0x1F468 0x200D 0x1F469 0x200D 0x1F467 0x200D 0x1F466) in positions: 3, 11 'Δ' (GREEK CAPITAL LETTER DELTA; hex ordinal: 0x394) in positions: 5, 9
Phix
As with Determine_if_a_string_has_all_the_same_characters#Phix, you can use utf8_to_utf32() when needed. <lang Phix>procedure all_uniq(sequence s)
string msg = "all characters are unique" for i=1 to length(s) do integer si = s[i], r = find(si,s,i+1) -- (or maybe rfind(si,s,i-1)) if r then msg = sprintf(`first duplicate character "%c"(#%02x) at positions %d and %d`,{si,si,i,r}) exit end if end for printf(1,"\"%s\" (length %d): %s\n",{s,length(s),msg})
end procedure
constant tests = {"",".","abcABC","XYZ ZYX","1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ"} for i=1 to length(tests) do all_uniq(tests[i]) end for</lang>
- Output:
"" (length 0): all characters are unique "." (length 1): all characters are unique "abcABC" (length 6): all characters are unique "XYZ ZYX" (length 7): first duplicate character "X"(#58) at positions 1 and 7 "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" (length 36): first duplicate character "0"(#30) at positions 10 and 25
REXX
<lang rexx>/*REXX pgm determines if a string is comprised of all unique characters (no duplicates).*/ @.= /*assign a default for the @. array. */ parse arg @.1 /*obtain optional argument from the CL.*/ if @.1= then do; @.1= /*Not specified? Then assume defaults.*/
@.2= . @.3= 'abcABC' @.4= 'XYZ ZYX' @.5= '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ' end
do j=1; if j\==1 & @.j== then leave /*String is null & not j=1? We're done*/ say copies('─', 79) /*display a separator line (a fence). */ say 'Testing for the string (length' length(@.j)"): " @.j say dup= isUnique(@.j) say 'The characters in the string' word("are aren't", 1 + (dup>0) ) 'all unique.' if dup==0 then iterate ?= substr(@.j, dup, 1) say 'The character ' ? " ('"c2x(?)"'x) at position " dup , ' is repeated at position ' pos(?, @.j, dup+1) end /*j*/
exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ isUnique: procedure; parse arg x /*obtain the character string.*/
do k=1 to length(x) - 1 /*examine all but the last. */ p= pos( substr(x, k, 1), x, k + 1) /*see if the Kth char is a dup*/ if p\==0 then return k /*Find a dup? Return location.*/ end /*k*/ return 0 /*indicate all chars unique. */</lang>
- output when using the internal defaults
─────────────────────────────────────────────────────────────────────────────── Testing for the string (length 0): The characters in the string are all unique. ─────────────────────────────────────────────────────────────────────────────── Testing for the string (length 1): . The characters in the string are all unique. ─────────────────────────────────────────────────────────────────────────────── Testing for the string (length 6): abcABC The characters in the string are all unique. ─────────────────────────────────────────────────────────────────────────────── Testing for the string (length 7): XYZ ZYX The characters in the string aren't all unique. The character X ('58'x) at position 1 is repeated at position 7 ─────────────────────────────────────────────────────────────────────────────── Testing for the string (length 36): 1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ The characters in the string aren't all unique. The character 0 ('30'x) at position 10 is repeated at position 25
Ruby
<lang ruby>strings = ["",
".", "abcABC", "XYZ ZYX", "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ", "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X", "hétérogénéité", "🎆🎃🎇🎈", "😍😀🙌💃😍🙌", "🐠🐟🐡🦈🐬🐳🐋🐡",]
strings.each do |str|
seen = {} print "#{str.inspect} (size #{str.size}) " res = "has no duplicates." #may change str.chars.each_with_index do |c,i| if seen[c].nil? seen[c] = i else res = "has duplicate char #{c} (#{'%#x' % c.ord}) on #{seen[c]} and #{i}." break end end puts res
end </lang>
- Output:
"" (size 0) has no duplicates. "." (size 1) has no duplicates. "abcABC" (size 6) has no duplicates. "XYZ ZYX" (size 7) has duplicate char Z (0x5a) on 2 and 4. "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" (size 36) has duplicate char 0 (0x30) on 9 and 24. "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X" (size 39) has duplicate char 0 (0x30) on 0 and 10. "hétérogénéité" (size 13) has duplicate char é (0xe9) on 1 and 3. "🎆🎃🎇🎈" (size 4) has no duplicates. "😍😀🙌💃😍🙌" (size 6) has duplicate char 😍 (0x1f60d) on 0 and 4. "🐠🐟🐡🦈🐬🐳🐋🐡" (size 8) has duplicate char 🐡 (0x1f421) on 2 and 7.
Tcl
<lang tcl>package require Tcl 8.6 ; # For binary encode
array set yesno {1 Yes 2 No}
set test {
{} {.} {abcABC} {XYZ ZYX} {1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ} {hétérogénéité}
}
- Loop through test strings
foreach str $test {
set chars [dict create] ; # init dictionary set num_chars 1 ; # In case of empty string
# Loop through characters in string for {set i 0} {$i < [string length $str]} {incr i} { set c [string index $str $i] ; # get char at index dict lappend chars $c $i ; # add index to a running list for key=char set indexes [dict get $chars $c] ; # get the whole running list set num_chars [llength $indexes] ; # count the # of indexes if {$num_chars > 1} { break ; # Found a duplicate, break out of the loop } }
# Handle Output puts [format "Tested: %38s (len: %2d). All unique? %3s. " \ "'$str'" [string length $str] $yesno($num_chars)] if {$num_chars > 1} { puts [format " --> Character '%s' (hex: 0x%s) reappears at indexes: %s." \ $c [binary encode hex $c] $indexes] }
} </lang>
- Output:
Tested: '' (len: 0). All unique? Yes. Tested: '.' (len: 1). All unique? Yes. Tested: 'abcABC' (len: 6). All unique? Yes. Tested: 'XYZ ZYX' (len: 7). All unique? No. --> Character 'Z' (hex: 0x5a) reappears at indexes: 2 4. Tested: '1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ' (len: 36). All unique? No. --> Character '0' (hex: 0x30) reappears at indexes: 9 24. Tested: 'hétérogénéité' (len: 13). All unique? No. --> Character 'é' (hex: 0xe9) reappears at indexes: 1 3.
XPL0
<lang XPL0>include xpllib; \contains StrLen function
proc StrUnique(S); \Show if string has unique chars char S; int L, I, J, K; [L:= StrLen(S); IntOut(0, L); Text(0, ": ^""); Text(0, S); ChOut(0, ^"); CrLf(0); for I:= 0 to L-1 do
for J:= I+1 to L-1 do [if S(I) = S(J) then [ChOut(0, \tab\ 9); for K:= 0 to I do ChOut(0, ^ ); ChOut(0, ^^); for K:= 0 to J-I-2 do ChOut(0, ^ ); ChOut(0, ^^); Text(0, " Duplicate character: "); ChOut(0, S(I)); Text(0, ", hex "); SetHexDigits(2); HexOut(0, S(I)); CrLf(0); return; ]; ];
Text(0, " Unique, no duplicates"); CrLf(0); ];
[Text(0, "Length"); CrLf(0); StrUnique(""); StrUnique("."); StrUnique("abcABC"); StrUnique("XYZ ZYX"); StrUnique("1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ"); StrUnique("thequickbrownfoxjumps"); ]</lang>
- Output:
Length 0: "" Unique, no duplicates 1: "." Unique, no duplicates 6: "abcABC" Unique, no duplicates 7: "XYZ ZYX" ^ ^ Duplicate character: X, hex 58 36: "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" ^ ^ Duplicate character: 0, hex 30 21: "thequickbrownfoxjumps" ^ ^ Duplicate character: u, hex 75
zkl
<lang zkl>fcn stringUniqueness(str){ // Does not handle Unicode
sz,unique,uz,counts := str.len(), str.unique(), unique.len(), str.counts(); println("Length %d: \"%s\"".fmt(sz,str)); if(sz==uz or uz==1) println("\tAll characters are unique"); else // counts is (char,count, char,count, ...) println("\tDuplicate: ", counts.pump(List,Void.Read,fcn(str,c,n){ if(n>1){
is,z:=List(),-1; do(n){ is.append(z=str.find(c,z+1)) } "'%s' (0x%x)[%s]".fmt(c,c.toAsc(),is.concat(",")) } else Void.Skip }.fp(str)).concat(", ")); }</lang> <lang zkl>testStrings:=T("", ".", "abcABC", "XYZ ZYX",
"1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ", "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X");
foreach s in (testStrings){ stringUniqueness(s) }</lang>
- Output:
Length 0: "" All characters are unique Length 1: "." All characters are unique Length 6: "abcABC" All characters are unique Length 7: "XYZ ZYX" Duplicate: 'X' (0x58)[0,6], 'Y' (0x59)[1,5], 'Z' (0x5a)[2,4] Length 36: "1234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ" Duplicate: '0' (0x30)[9,24] Length 39: "01234567890ABCDEFGHIJKLMN0PQRSTUVWXYZ0X" Duplicate: '0' (0x30)[0,10,25,37], 'X' (0x58)[34,38]