scrabbleable/scrabbleable/wordlist.py

75 lines
2.4 KiB
Python

import re
from unidecode import unidecode
from pathlib import Path
from warnings import warn
# TODO Tests
class WordList:
"""Scrabble wordlist maker"""
TILES = {
"en": "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"es": "ABCDEFGHIJLMNÑOPQRSTUVXYZ",
}
def __init__(self, paths=[], lang="en", max_items=0, word_size=(2, 15)):
"""Inits
:param paths: Text files sources; [] by default
:type paths: list[Path]
:param lang: Wordlist language; "en" by default
:type lang: str
:param max_items: Max wordlist length; no limit (0) by default
:type max_items: int
"""
self.paths, self.lang = paths, lang
self.max, self.size = max_items, word_size
self.tiles, self.texts, self.words = "", [], []
def check(self):
"""Checks self attributes"""
if self.lang not in self.TILES.keys():
tiles = ", ".join(self.TILES.keys())
raise ValueError(f"{self.lang} language is not in {tiles}")
else:
self.tiles = re.compile("[" + self.TILES[self.lang] + "]+")
for path in self.paths:
try:
text = Path(path).read_text()
if text not in self.texts:
self.texts.append(text)
except Exception:
raise IOError(f"{path} couldn't open")
def make(self):
"""Makes wordlist"""
self.check()
for text in self.texts:
for word in re.split(r"\s+", text):
if len(word) >= self.size[0] and len(word) <= self.size[1]:
word = unidecode(word).upper()
if re.match(self.tiles, word) and word not in self.words:
self.words.append(word)
if self.max > 0 and len(self.words) == self.max:
break
self.words = list(sorted(self.words))
if self.max > 0 and len(self.words) < self.max:
warn(f"wordlist ({len(self.words)}) < max_items ({self.max})")
def show(self):
"""Prints wordlist"""
print("\n".join(self.words))
def write(self, raw_path):
"""Writes wordlist
:param raw_path: Output path file
:type raw_path: str | Path
"""
path = Path(raw_path)
if path.exists():
warn(f"overriding {path}")
path.write_text("\n".join(self.words))