mirror of
https://github.com/ArthurDanjou/handson-ml3.git
synced 2026-01-14 12:14:36 +01:00
Better regex to match numbers in 03_classification
The previous regex would not match any decimal number where there is no "E" notation. Also added the option "+/-" sign in the "E" part.
This commit is contained in:
@@ -2342,7 +2342,7 @@
|
||||
" for url in urls:\n",
|
||||
" text = text.replace(url, \" URL \")\n",
|
||||
" if self.replace_numbers:\n",
|
||||
" text = re.sub(r'\\d+(?:\\.\\d*(?:[eE]\\d+))?', 'NUMBER', text)\n",
|
||||
" text = re.sub(r'\\d+(?:\\.\\d*)?(?:[eE][+-]?\\d+)?', 'NUMBER', text)\n",
|
||||
" if self.remove_punctuation:\n",
|
||||
" text = re.sub(r'\\W+', ' ', text, flags=re.M)\n",
|
||||
" word_counts = Counter(text.split())\n",
|
||||
|
||||
Reference in New Issue
Block a user