|
| 1 | +# ***************************************************************************** |
| 2 | +# Copyright (c) 2017 Keith Ito |
| 3 | +# |
| 4 | +# Permission is hereby granted, free of charge, to any person obtaining a copy |
| 5 | +# of this software and associated documentation files (the "Software"), to deal |
| 6 | +# in the Software without restriction, including without limitation the rights |
| 7 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 8 | +# copies of the Software, and to permit persons to whom the Software is |
| 9 | +# furnished to do so, subject to the following conditions: |
| 10 | +# |
| 11 | +# The above copyright notice and this permission notice shall be included in |
| 12 | +# all copies or substantial portions of the Software. |
| 13 | +# |
| 14 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 17 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 18 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 19 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 20 | +# THE SOFTWARE. |
| 21 | +# |
| 22 | +# ***************************************************************************** |
| 23 | +""" |
| 24 | +Modified from https://github.com/keithito/tacotron |
| 25 | +""" |
| 26 | + |
| 27 | +import inflect |
| 28 | +import re |
| 29 | + |
| 30 | + |
| 31 | +_inflect = inflect.engine() |
| 32 | +_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') |
| 33 | +_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') |
| 34 | +_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') |
| 35 | +_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') |
| 36 | +_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') |
| 37 | +_number_re = re.compile(r'[0-9]+') |
| 38 | + |
| 39 | + |
| 40 | +def _remove_commas(m: re.Match) -> str: |
| 41 | + return m.group(1).replace(',', '') |
| 42 | + |
| 43 | + |
| 44 | +def _expand_decimal_point(m: re.Match) -> str: |
| 45 | + return m.group(1).replace('.', ' point ') |
| 46 | + |
| 47 | + |
| 48 | +def _expand_dollars(m: re.Match) -> str: |
| 49 | + match = m.group(1) |
| 50 | + parts = match.split('.') |
| 51 | + if len(parts) > 2: |
| 52 | + return match + ' dollars' # Unexpected format |
| 53 | + dollars = int(parts[0]) if parts[0] else 0 |
| 54 | + cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 |
| 55 | + if dollars and cents: |
| 56 | + dollar_unit = 'dollar' if dollars == 1 else 'dollars' |
| 57 | + cent_unit = 'cent' if cents == 1 else 'cents' |
| 58 | + return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) |
| 59 | + elif dollars: |
| 60 | + dollar_unit = 'dollar' if dollars == 1 else 'dollars' |
| 61 | + return '%s %s' % (dollars, dollar_unit) |
| 62 | + elif cents: |
| 63 | + cent_unit = 'cent' if cents == 1 else 'cents' |
| 64 | + return '%s %s' % (cents, cent_unit) |
| 65 | + else: |
| 66 | + return 'zero dollars' |
| 67 | + |
| 68 | + |
| 69 | +def _expand_ordinal(m: re.Match) -> str: |
| 70 | + return _inflect.number_to_words(m.group(0)) |
| 71 | + |
| 72 | + |
| 73 | +def _expand_number(m: re.Match) -> str: |
| 74 | + num = int(m.group(0)) |
| 75 | + if num > 1000 and num < 3000: |
| 76 | + if num == 2000: |
| 77 | + return 'two thousand' |
| 78 | + elif num > 2000 and num < 2010: |
| 79 | + return 'two thousand ' + _inflect.number_to_words(num % 100) |
| 80 | + elif num % 100 == 0: |
| 81 | + return _inflect.number_to_words(num // 100) + ' hundred' |
| 82 | + else: |
| 83 | + return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') |
| 84 | + else: |
| 85 | + return _inflect.number_to_words(num, andword='') |
| 86 | + |
| 87 | + |
| 88 | +def normalize_numbers(text: str) -> str: |
| 89 | + text = re.sub(_comma_number_re, _remove_commas, text) |
| 90 | + text = re.sub(_pounds_re, r'\1 pounds', text) |
| 91 | + text = re.sub(_dollars_re, _expand_dollars, text) |
| 92 | + text = re.sub(_decimal_number_re, _expand_decimal_point, text) |
| 93 | + text = re.sub(_ordinal_re, _expand_ordinal, text) |
| 94 | + text = re.sub(_number_re, _expand_number, text) |
| 95 | + return text |
0 commit comments