75 lines
2.4 KiB
Python
75 lines
2.4 KiB
Python
import re
|
|
from unidecode import unidecode
|
|
from pathlib import Path
|
|
from warnings import warn
|
|
|
|
# TODO Tests
|
|
|
|
|
|
class WordList:
|
|
"""Scrabble wordlist maker"""
|
|
|
|
TILES = {
|
|
"en": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
"es": "ABCDEFGHIJLMNÑOPQRSTUVXYZ",
|
|
}
|
|
|
|
def __init__(self, paths=[], lang="en", max_items=0, word_size=(2, 15)):
|
|
"""Inits
|
|
|
|
:param paths: Text files sources; [] by default
|
|
:type paths: list[Path]
|
|
:param lang: Wordlist language; "en" by default
|
|
:type lang: str
|
|
:param max_items: Max wordlist length; no limit (0) by default
|
|
:type max_items: int
|
|
"""
|
|
self.paths, self.lang = paths, lang
|
|
self.max, self.size = max_items, word_size
|
|
self.tiles, self.texts, self.words = "", [], []
|
|
|
|
def check(self):
|
|
"""Checks self attributes"""
|
|
if self.lang not in self.TILES.keys():
|
|
tiles = ", ".join(self.TILES.keys())
|
|
raise ValueError(f"{self.lang} language is not in {tiles}")
|
|
else:
|
|
self.tiles = re.compile("[" + self.TILES[self.lang] + "]+")
|
|
for path in self.paths:
|
|
try:
|
|
text = Path(path).read_text()
|
|
if text not in self.texts:
|
|
self.texts.append(text)
|
|
except Exception:
|
|
raise IOError(f"{path} couldn't open")
|
|
|
|
def make(self):
|
|
"""Makes wordlist"""
|
|
self.check()
|
|
for text in self.texts:
|
|
for word in re.split(r"\s+", text):
|
|
if len(word) >= self.size[0] and len(word) <= self.size[1]:
|
|
word = unidecode(word).upper()
|
|
if re.match(self.tiles, word) and word not in self.words:
|
|
self.words.append(word)
|
|
if self.max > 0 and len(self.words) == self.max:
|
|
break
|
|
self.words = list(sorted(self.words))
|
|
if self.max > 0 and len(self.words) < self.max:
|
|
warn(f"wordlist ({len(self.words)}) < max_items ({self.max})")
|
|
|
|
def show(self):
|
|
"""Prints wordlist"""
|
|
print("\n".join(self.words))
|
|
|
|
def write(self, raw_path):
|
|
"""Writes wordlist
|
|
|
|
:param raw_path: Output path file
|
|
:type raw_path: str | Path
|
|
"""
|
|
path = Path(raw_path)
|
|
if path.exists():
|
|
warn(f"overriding {path}")
|
|
path.write_text("\n".join(self.words))
|