Word break problem: Difference between revisions

Content added Content deleted
m (→‎{{header|Perl}}: oops, missing)
(→‎Functional Python: Pylinted for Python 3, added a {Works with} tag.)
Line 753: Line 753:
The '''tokenTrees''' function recursively builds a tree of possible token sequences, using a list monad ('''concatMap''' with a function which returns its result wrapped in a list – an empty list where a parse has failed) to discard all branches which lead to dead ends. This allows us to return more than one possible word-break parse for a given lexicon and input string. (Searches for 'monadic parsing in Python' will yield references to more sophisticated uses of this general approach).
The '''tokenTrees''' function recursively builds a tree of possible token sequences, using a list monad ('''concatMap''' with a function which returns its result wrapped in a list – an empty list where a parse has failed) to discard all branches which lead to dead ends. This allows us to return more than one possible word-break parse for a given lexicon and input string. (Searches for 'monadic parsing in Python' will yield references to more sophisticated uses of this general approach).


{{Works with|Python|3.7}}
<lang python>from itertools import (chain)
<lang python>'''Parsing a string for word breaks'''


from itertools import (chain)

# main :: IO ()
def main():
lexicon = 'a bc abc cd b'.split()
testSamples = 'abcd abbc abcbcd acdbc abcdd'.split()
print (
'\n'.join(
_map(showParse)(
_map(stringParse(lexicon))(
testSamples
)
)
)
)




# stringParse :: [String] -> String -> Tree String
# stringParse :: [String] -> String -> Tree String
def stringParse(lexicon):
def stringParse(lexicon):
'''A tree of strings representing a parse of s
in terms of the tokens in lexicon.
'''
return lambda s: Node(s)(
return lambda s: Node(s)(
tokenTrees(lexicon)(s)
tokenTrees(lexicon)(s)
Line 780: Line 771:
# tokenTrees :: [String] -> String -> [Tree String]
# tokenTrees :: [String] -> String -> [Tree String]
def tokenTrees(wds):
def tokenTrees(wds):
'''A list of possible parse trees for s,
based on the lexicon supplied in wds.
'''
def go(s):
def go(s):
return [Node(s)([])] if s in wds else (
return [Node(s)([])] if s in wds else (
concatMap(next(s))(wds)
concatMap(nxt(s))(wds)
)
)


def next(s):
def nxt(s):
return lambda w: parse(
return lambda w: parse(
w, go(s[len(w):])
w, go(s[len(w):])
Line 798: Line 792:
# showParse :: Tree String -> String
# showParse :: Tree String -> String
def showParse(tree):
def showParse(tree):
'''Multi line display of a string followed by any
possible parses of it, or an explanatory
message, if no parse was possible.
'''
def showTokens(x):
def showTokens(x):
xs = x['nest']
xs = x['nest']
Line 804: Line 802:
return tree['root'] + ':\n' + (
return tree['root'] + ':\n' + (
'\n'.join(
'\n'.join(
_map(showTokens)(parses)
map(showTokens, parses)
) if parses else ' ( Not parseable in terms of these words )'
) if parses else ' ( Not parseable in terms of these words )'
)
)




# GENERIC FUNCTIONS ---------------------------------------
# TEST -------------------------------------------------
# main :: IO ()
def main():
'''Parse test and display of results.'''


lexicon = 'a bc abc cd b'.split()
testSamples = 'abcd abbc abcbcd acdbc abcdd'.split()

print(unlines(
map(
showParse,
map(
stringParse(lexicon),
testSamples
)
)
))


# GENERIC FUNCTIONS ---------------------------------------


# Node :: a -> [Tree a] -> Tree a
# Node :: a -> [Tree a] -> Tree a
def Node(v):
def Node(v):
'''Contructor for a Tree node which connects a
value of some kind to a list of zero or
more child trees.'''
return lambda xs: {'type': 'Node', 'root': v, 'nest': xs}
return lambda xs: {'type': 'Node', 'root': v, 'nest': xs}


Line 819: Line 838:
# concatMap :: (a -> [b]) -> [a] -> [b]
# concatMap :: (a -> [b]) -> [a] -> [b]
def concatMap(f):
def concatMap(f):
'''A concatenated list over which a function has been mapped.
return lambda xs: list(chain.from_iterable(map(f, xs)))
The list monad can be derived by using a function f which
wraps its output in a list,
(using an empty list to represent computational failure).'''
return lambda xs: list(
chain.from_iterable(map(f, xs))
)




# map :: (a -> b) -> [a] -> [b]
# unlines :: [String] -> String
def _map(f):
def unlines(xs):
'''A single string derived by the intercalation
return lambda xs: list(map(f, xs))
of a list of strings with the newline character.'''
return '\n'.join(xs)




# MAIN ---
if __name__ == '__main__':
if __name__ == '__main__':
main()</lang>
main()</lang>