Names to numbers: Difference between revisions
(→{{header|Perl}}: Add python skeleton.) |
(→{{header|Python}}: Add implementation.) |
||
Line 195: | Line 195: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
This example assumes that the module from [[Number_names#Python]] is stored as spell_integer.py. |
|||
<lang python></lang> |
|||
The example understands the textual format generated from number-to-names module. |
|||
<small>Note: This example and [[Number_names#Python]] need to be kept in sync</small> |
|||
<lang python>from spell_integer import spell_integer |
|||
def int_from_words(num): |
|||
words = num.replace(',','').replace(' and ', ' ').replace('-', ' ').split() |
|||
if words[0] == 'minus': |
|||
negmult = -1 |
|||
words.pop(0) |
|||
else: |
|||
negmult = 1 |
|||
small, total = 0, 0 |
|||
for word in words: |
|||
if word in SMALL: |
|||
small += SMALL.index(word) |
|||
elif word in TENS: |
|||
small += TENS.index(word) * 10 |
|||
elif word == 'hundred': |
|||
small *= 100 |
|||
elif word == 'thousand': |
|||
total += small * 1000 |
|||
small = 0 |
|||
elif word in HUGE: |
|||
total += small * 10 ** (HUGE.index(word) * 3) |
|||
small = 0 |
|||
else: |
|||
raise ValueError("Don't understand %r part of %r" % (word, num)) |
|||
return negmult * (total + small) |
|||
if __name__ == '__main__': |
|||
# examples |
|||
for n in range(-10000, 10000, 17): |
|||
assert n == int_from_words(spell_integer(n)) |
|||
for n in range(20): |
|||
assert 13**n == int_from_words(spell_integer(13**n)) |
|||
print('\n##\n## These tests show <==> for a successful round trip, otherwise <??>\n##\n') |
|||
for n in (0, -3, 5, -7, 11, -13, 17, -19, 23, -29): |
|||
txt = spell_integer(n) |
|||
num = int_from_words(txt) |
|||
print('%+4i <%s> %s' % (n, '==' if n == num else '??', txt)) |
|||
print('') |
|||
n = 201021002001 |
|||
while n: |
|||
txt = spell_integer(n) |
|||
num = int_from_words(txt) |
|||
print('%12i <%s> %s' % (n, '==' if n == num else '??', txt)) |
|||
n //= -10 |
|||
txt = spell_integer(n) |
|||
num = int_from_words(txt) |
|||
print('%12i <%s> %s' % (n, '==' if n == num else '??', txt)) |
|||
print('')</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre>## |
||
## These tests show <==> for a successful round trip, otherwise <??> |
|||
## |
|||
+0 <==> zero |
|||
-3 <==> minus three |
|||
+5 <==> five |
|||
-7 <==> minus seven |
|||
+11 <==> eleven |
|||
-13 <==> minus thirteen |
|||
+17 <==> seventeen |
|||
-19 <==> minus nineteen |
|||
+23 <==> twenty-three |
|||
-29 <==> minus twenty-nine |
|||
201021002001 <==> two hundred and one billion, twenty-one million, two thousand, and one |
|||
-20102100201 <==> minus twenty billion, one hundred and two million, one hundred thousand, two hundred and one |
|||
2010210020 <==> two billion, ten million, two hundred and ten thousand, and twenty |
|||
-201021002 <==> minus two hundred and one million, twenty-one thousand, and two |
|||
20102100 <==> twenty million, one hundred and two thousand, and one hundred |
|||
-2010210 <==> minus two million, ten thousand, two hundred and ten |
|||
201021 <==> two hundred and one thousand, and twenty-one |
|||
-20103 <==> minus twenty thousand, one hundred and three |
|||
2010 <==> two thousand, and ten |
|||
-201 <==> minus two hundred and one |
|||
20 <==> twenty |
|||
-2 <==> minus two |
|||
0 <==> zero</pre> |
Revision as of 06:43, 16 May 2013
Translate the spelled-out English name of a number to a number. You can use a preexisting implementation or roll your own, but you should support inputs up to at least one million (or the maximum value of your language's default bounded integer type, if that's less).
Support for inputs other than positive integers (like zero, negative integers, fractions and floating-point numbers) is optional.
The reverse operation is Number names.
Perl
The following code reads a file line-by-line. It echos comment lines starting with a hashmark and blank lines. Remaining lines are output followed by an arrow "=>" and any non-negative integer number names translated into numbers, e.g., a line with "ninety-nine" is output like this: "ninety-nine => 99". <lang perl> use strict; use List::Util qw(sum);
our %nums = (
zero => 0, one => 1, two => 2, three => 3, four => 4, five => 5, six => 6, seven => 7, eight => 8, nine => 9, ten => 10, eleven => 11, twelve => 12, thirteen => 13, fourteen => 14, fifteen => 15, sixteen => 16, seventeen => 17, eighteen => 18, nineteen => 19, twenty => 20, thirty => 30, forty => 40, fifty => 50, sixty => 60, seventy => 70, eighty => 80, ninety => 90, hundred => 100, thousand => 1_000, million => 1_000_000, billion => 1_000_000_000, trillion => 1_000_000_000_000, # My ActiveState Win32 Perl uses e-notation after 999_999_999_999_999 quadrillion => 1e+015, quintillion => 1e+018);
- Groupings for thousands, millions, ..., quintillions
our $groups = qr/\d{4}|\d{7}|\d{10}|\d{13}|1e\+015|1e\+018/;
- Numeral or e-notation
our $num = qr/\d+|\d+e\+\d+/;
while (<>) {
# skip blank lines if(/^\s*$/) { print; next; } # echo comment lines if( /^\s*#.*$/ ) { print; next; }
chomp; my $orig = $_; s/-/ /g; # convert hyphens to spaces s/\s\s+/ /g; # remove duplicate whitespace, convert ws to space s/ $//g; # remove trailing blank s/^ //g; # remove leading blank $_ = lc($_); # convert to lower case # tokenize sentence boundaries s/([\.\?\!]) / $1\n/g; s/([\.\?\!])$/ $1\n/g; # tokenize other punctuation and symbols s/\$(.)/\$ $1/g; # prefix s/(.)([\;\:\%',])/$1 $2/g; # suffix
foreach my $key (keys %nums) { s/\b$key\b/$nums{$key}/eg; }
s/(\d) , (\d)/$1 $2/g; s/(\d) and (\d)/$1 $2/g;
s/\b(\d) 100 (\d\d) (\d) (${groups})\b/($1 * 100 + $2 + $3) * $4/eg;
s/\b(\d) 100 (\d\d) (${groups})\b/($1 * 100 + $2) * $3/eg; s/\b(\d) 100 (\d) (${groups})\b/($1 * 100 + $2) * $3/eg; s/\b(\d) 100 (${groups})\b/$1 * $2 * 100/eg;
s/\b100 (\d\d) (\d) (${groups})\b/(100 + $1 + $2) * $3/eg; s/\b100 (\d\d) (${groups})\b/(100 + $1) * $2/eg; s/\b100 (\d) (${groups})\b/(100 + $1) * $2/eg; s/\b100 (${groups})\b/$1 * 100/eg;
s/\b(\d\d) (\d) (${groups})\b/($1 + $2) * $3/eg; s/\b(\d{1,2}) (${groups})\b/$1 * $2/eg;
s/\b(\d\d) (\d) 100\b/($1 + $2) * 100/eg; s/\b(\d{1,2}) 100\b/$1 * 100/eg;
# Date anomolies: nineteen eighty-four and twenty thirteen
s/\b(\d{2}) (\d{2})\b/$1 * 100 + $2/eg;
s/((?:${num} )*${num})/sum(split(" ",$1))/eg;
print $orig, " => ", $_, "\n"; } </lang> Here is a sample input file:
# For numbers between 21 and 99 inclusive, we're supposed to use a hyphen, # but the bank still cashes our check without the hyphen: Seventy-two dollars Seventy two dollars # For numbers bigger than 100, we're not supposed to use "and," # except we still use "and" anyway, e.g., One Hundred and One Dalmatians A Hundred and One Dalmatians One Hundred One Dalmatians Hundred and One Dalmatians One Thousand and One Nights Two Thousand and One: A Space Odyssey # Date anomolies Twenty Thirteen Nineteen Eighty-Four # Maximum value an "unsigned long int" can hold on a 32-bit machine = 2^32 - 1 # define ULONG_MAX 4294967295 four billion, two hundred ninety-four million, nine hundred sixty-seven thousand, two hundred ninety five # Max positive integer on 32-bit Perl is 9_007_199_254_740_992 = 2^53 # Use Math::BigInt if you need more. # Note Perl usually stringifies to 15 digits of precision and this has 16 Nine quadrillion, seven trillion, one hundred ninety-nine billion, two hundred fifty-four million, seven hundred forty thousand, nine hundred ninety two Nine Hundred Ninety-Nine One Thousand One Hundred Eleven Eleven Hundred Eleven Eight Thousand Eight Hundred Eighty-Eight Eighty-Eight Hundred Eighty-Eight Seven Million Seven Hundred Seventy-Seven Thousand Seven Hundred Seventy-Seven Ninety-Nine Trillion Nine Hundred Ninety-Nine Billion Nine Hundred Ninety-Nine Million Nine Hundred Ninety-Nine Thousand Nine Hundred Ninety-Nine ninety-nine three hundred three hundred and ten one thousand, five hundred and one twelve thousand, six hundred and nine five hundred and twelve thousand, six hundred and nine forty-three million, one hundred and twelve thousand, six hundred and nine two billion, one hundred zero eight one hundred one hundred twenty three one thousand one ninety nine thousand nine hundred ninety nine one hundred thousand nine billion one hundred twenty three million four hundred fifty six thousand seven hundred eighty nine one hundred eleven billion one hundred eleven
And here is the resulting output file:
# For numbers between 21 and 99 inclusive, we're supposed to use a hyphen, # but the bank still cashes our check without the hyphen: Seventy-two dollars => 72 dollars Seventy two dollars => 72 dollars # For numbers bigger than 100, we're not supposed to use "and," # except we still use "and" anyway, e.g., One Hundred and One Dalmatians => 101 dalmatians A Hundred and One Dalmatians => a 101 dalmatians One Hundred One Dalmatians => 101 dalmatians Hundred and One Dalmatians => 101 dalmatians One Thousand and One Nights => 1001 nights Two Thousand and One: A Space Odyssey => 2001 : a space odyssey # Date anomolies Twenty Thirteen => 2013 Nineteen Eighty-Four => 1984 # Maximum value an "unsigned long int" can hold on a 32-bit machine = 2^32 - 1 # define ULONG_MAX 4294967295 four billion, two hundred ninety-four million, nine hundred sixty-seven thousand, two hundred ninety five => 4294967295 # Max positive integer on 32-bit Perl is 9_007_199_254_740_992 = 2^53 # Use Math::BigInt if you need more. # Note Perl usually stringifies to 15 digits of precision and this has 16 Nine quadrillion, seven trillion, one hundred ninety-nine billion, two hundred fifty-four million, seven hundred forty thousand, nine hundred ninety two => 9.00719925474099e+015 Nine Hundred Ninety-Nine => 999 One Thousand One Hundred Eleven => 1111 Eleven Hundred Eleven => 1111 Eight Thousand Eight Hundred Eighty-Eight => 8888 Eighty-Eight Hundred Eighty-Eight => 8888 Seven Million Seven Hundred Seventy-Seven Thousand Seven Hundred Seventy-Seven => 7777777 Ninety-Nine Trillion Nine Hundred Ninety-Nine Billion Nine Hundred Ninety-Nine Million Nine Hundred Ninety-Nine Thousand Nine Hundred Ninety-Nine => 99999999999999 ninety-nine => 99 three hundred => 300 three hundred and ten => 310 one thousand, five hundred and one => 1501 twelve thousand, six hundred and nine => 12609 five hundred and twelve thousand, six hundred and nine => 512609 forty-three million, one hundred and twelve thousand, six hundred and nine => 43112609 two billion, one hundred => 2000000100 zero => 0 eight => 8 one hundred => 100 one hundred twenty three => 123 one thousand one => 1001 ninety nine thousand nine hundred ninety nine => 99999 one hundred thousand => 100000 nine billion one hundred twenty three million four hundred fifty six thousand seven hundred eighty nine => 9123456789 one hundred eleven billion one hundred eleven => 111000000111
Python
This example assumes that the module from Number_names#Python is stored as spell_integer.py.
The example understands the textual format generated from number-to-names module.
Note: This example and Number_names#Python need to be kept in sync <lang python>from spell_integer import spell_integer
def int_from_words(num):
words = num.replace(',',).replace(' and ', ' ').replace('-', ' ').split() if words[0] == 'minus': negmult = -1 words.pop(0) else: negmult = 1 small, total = 0, 0 for word in words: if word in SMALL: small += SMALL.index(word) elif word in TENS: small += TENS.index(word) * 10 elif word == 'hundred': small *= 100 elif word == 'thousand': total += small * 1000 small = 0 elif word in HUGE: total += small * 10 ** (HUGE.index(word) * 3) small = 0 else: raise ValueError("Don't understand %r part of %r" % (word, num)) return negmult * (total + small)
if __name__ == '__main__':
# examples for n in range(-10000, 10000, 17): assert n == int_from_words(spell_integer(n))
for n in range(20): assert 13**n == int_from_words(spell_integer(13**n)) print('\n##\n## These tests show <==> for a successful round trip, otherwise <??>\n##\n') for n in (0, -3, 5, -7, 11, -13, 17, -19, 23, -29): txt = spell_integer(n) num = int_from_words(txt) print('%+4i <%s> %s' % (n, '==' if n == num else '??', txt)) print() n = 201021002001 while n: txt = spell_integer(n) num = int_from_words(txt) print('%12i <%s> %s' % (n, '==' if n == num else '??', txt)) n //= -10 txt = spell_integer(n) num = int_from_words(txt) print('%12i <%s> %s' % (n, '==' if n == num else '??', txt)) print()</lang>
- Output:
## ## These tests show <==> for a successful round trip, otherwise <??> ## +0 <==> zero -3 <==> minus three +5 <==> five -7 <==> minus seven +11 <==> eleven -13 <==> minus thirteen +17 <==> seventeen -19 <==> minus nineteen +23 <==> twenty-three -29 <==> minus twenty-nine 201021002001 <==> two hundred and one billion, twenty-one million, two thousand, and one -20102100201 <==> minus twenty billion, one hundred and two million, one hundred thousand, two hundred and one 2010210020 <==> two billion, ten million, two hundred and ten thousand, and twenty -201021002 <==> minus two hundred and one million, twenty-one thousand, and two 20102100 <==> twenty million, one hundred and two thousand, and one hundred -2010210 <==> minus two million, ten thousand, two hundred and ten 201021 <==> two hundred and one thousand, and twenty-one -20103 <==> minus twenty thousand, one hundred and three 2010 <==> two thousand, and ten -201 <==> minus two hundred and one 20 <==> twenty -2 <==> minus two 0 <==> zero