diff --git a/Makefile b/Makefile index fd9fcf4..4f19c76 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,14 @@ -.PHONY: pytest lint test +.PHONY: pytest lint test tests package -test: pytest lint +tests: test -pytest: - pytest -xvv +test: + pytest lint: + pandoc --atx-headers --reference-links --reference-location=document --columns=79 -o README.md README.md black --line-length 79 --quiet colligere/ tests/ scripts/ flake8 --max-line-length 79 colligere/ tests/ scripts/ - pandoc --reference-links --reference-location=document --columns=79 -o README.md README.md package: python3 -m pip install --upgrade build diff --git a/README.md b/README.md index 51220e9..e44abd1 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,85 @@ # πŸ”₯ Book Metadata Tool βš’οΈ -Herramienta para metadatos en [JSON Lines](https://jsonlines.org/) +Herramienta para metadatos en [JSON Lines] -Tool for metadata in [JSON Lines](https://jsonlines.org/) +Tool for metadata in [JSON Lines] # Uso / Usage ## Analizer -``` -$ python colligere/analizer.py metadata.jsonl + $ python colligere/analizer.py metadata.jsonl -[0:00:00][0%][6i][57k] Fragmina verborum Titivillus colligit horum -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ β–„β–„β–„β–„β–„β–„β–„β–„β–„β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β”‚ -β”‚ β–—β–žβ–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–˜ β”‚ -β”‚ β–—β–˜ β”‚ 50k -β”‚ β–žβ–˜ β”‚ -β”‚ β–—β–ž β”‚ -β”‚ β–—β–˜ β”‚ 40k -β”‚ β–žβ–˜ β”‚ -β”‚ β–—β–ž β”‚ -β”‚ β–—β–˜ β”‚ 30k -β”‚β–žβ–˜ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -1i 2i 3i 4i 5i 6i + [0:00:00][0%][6i][57k] Fragmina verborum Titivillus colligit horum + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ β–„β–„β–„β–„β–„β–„β–„β–„β–„β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β”‚ + β”‚ β–—β–žβ–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–˜ β”‚ + β”‚ β–—β–˜ β”‚ 50k + β”‚ β–žβ–˜ β”‚ + β”‚ β–—β–ž β”‚ + β”‚ β–—β–˜ β”‚ 40k + β”‚ β–žβ–˜ β”‚ + β”‚ β–—β–ž β”‚ + β”‚ β–—β–˜ β”‚ 30k + β”‚β–žβ–˜ β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + 1i 2i 3i 4i 5i 6i -$ python colligere/analizer.py -h + $ python colligere/analizer.py -h -usage: Analizer [-h] [-i SAMPLE_LEN] [-k KEY] [-ky KEY] [-d DIR] [--debug] [-q] jsonl + usage: Analizer [-h] [-i SAMPLE_LEN] [-k KEY] [-ky KEY] [-d DIR] [--debug] [-q] jsonl -Analize keys and values from JSON lines. + Analize keys and values from JSON lines. -positional arguments: - jsonl JSON lines file + positional arguments: + jsonl JSON lines file -options: - -h, --help show this help message and exit - -i SAMPLE_LEN, --items SAMPLE_LEN - random sample items size; 1000 by default - -k KEY, --key KEY key value to analize - -ky KEY, --key-axis KEY - like -k, but its values are used for y-axis - -d DIR, --directory DIR - CSV output directory; CWD by default - --debug stop on exception for debugging - -q, --quiet avoid prints; ignored by --debug + options: + -h, --help show this help message and exit + -i SAMPLE_LEN, --items SAMPLE_LEN + random sample items size; 1000 by default + -k KEY, --key KEY key value to analize + -ky KEY, --key-axis KEY + like -k, but its values are used for y-axis + -d DIR, --directory DIR + CSV output directory; CWD by default + --debug stop on exception for debugging + -q, --quiet avoid prints; ignored by --debug -$ python colligere/analizer.py -k metadata.record.catalogingLanguage metadata.jsonl + $ python colligere/analizer.py -k metadata.record.catalogingLanguage metadata.jsonl -β³πŸΊπŸ’ƒπŸ•ΊπŸŽ‰β³ + β³πŸΊπŸ’ƒπŸ•ΊπŸŽ‰β³ -$ cat res.metadata.record.catalogingLanguage.csv | head -key,len,freq -eng,664,0.35622 -und,99,0.05311 -fre,90,0.04828 -spa,65,0.03487 -ger,133,0.07135 -ara,10,0.00536 -zxx,23,0.01234 -heb,6,0.00322 -jpn,20,0.01073 -``` + $ cat res.metadata.record.catalogingLanguage.csv | head + key,len,freq + eng,664,0.35622 + und,99,0.05311 + fre,90,0.04828 + spa,65,0.03487 + ger,133,0.07135 + ara,10,0.00536 + zxx,23,0.01234 + heb,6,0.00322 + jpn,20,0.01073 # Desarrollo / Development ConfiguraciΓ³n inicial / Initial setup: -``` -virtualenv .venv -source .venv/bin/activate -pip install -r requirements.txt -``` + virtualenv .venv + source .venv/bin/activate + pip install -r requirements.txt Pruebas / Tests: -``` -make test -``` + make test AnΓ‘lisis y formateador / Lint: -``` -make lint -``` + make lint Empaquetamiento / Packaging: -``` -make package -``` - + make package + [JSON Lines]: https://jsonlines.org/ diff --git a/colligere/analizer.py b/colligere/analizer.py index a6d00ed..1ad48bf 100644 --- a/colligere/analizer.py +++ b/colligere/analizer.py @@ -148,63 +148,3 @@ class Analizer: def __elapsed_time(self): curr_time = timedelta(seconds=time.process_time() - self.start_time) return str(curr_time).split(".")[0] - - -if __name__ == "__main__": - - def get_args(): - parser = argparse.ArgumentParser( - prog="Analizer", - description="Analize keys and values from JSON lines.", - ) - parser.add_argument("jsonl", type=Path, help="JSON lines file") - parser.add_argument( - "-i", - "--items", - type=int, - default=1000, - dest="sample_len", - help="random sample items size; 1000 by default", - ) - parser.add_argument( - "-k", - "--key", - default=[], - dest="keys", - action="append", - metavar="KEY", - help="key value to analize", - ) - parser.add_argument( - "-ky", - "--key-axis", - default=None, - dest="ky", - metavar="KEY", - help="like -k, but its values are used for y-axis", - ) - parser.add_argument( - "-d", - "--directory", - type=Path, - default=Path.cwd(), - dest="out_dir", - metavar="DIR", - help="CSV output directory; CWD by default", - ) - parser.add_argument( - "--debug", - default=False, - action="store_true", - help="stop on exception for debugging", - ) - parser.add_argument( - "-q", - "--quiet", - default=False, - action="store_true", - help="avoid prints; ignored by --debug", - ) - return parser.parse_args() - - Analizer(**vars(get_args())) diff --git a/colligere/cli.py b/colligere/cli.py index b85ec36..03c409c 100644 --- a/colligere/cli.py +++ b/colligere/cli.py @@ -1,2 +1,85 @@ +import argparse +from pathlib import Path +from colligere.analizer import Analizer + + +def parse(args=None): + parser = argparse.ArgumentParser( + prog="colligere", + description="Tool for metadata in JSON Lines.", + epilog="Under GPLv3.", + ) + parser.add_argument( + "-q", + "--quiet", + default=False, + action="store_true", + help="avoid prints; ignored by --debug", + ) + parser.add_argument( + "--debug", + default=False, + action="store_true", + help="stop on exception for debugging", + ) + subparsers = parser.add_subparsers( + required=True, + dest="subcmd", + title="subcommands", + help="Available tasks to perfom; run 'SUBCMD --help' for more info", + ) + parse_analize(subparsers) + parser.add_argument("jsonl", type=Path, help="JSON lines file") + return parser.parse_args(args) + + +def parse_analize(subparsers): + analizer = subparsers.add_parser( + "analize", + aliases=["a"], + description="Analize keys and values from JSON lines.", + help="analize keys and values", + ) + analizer.add_argument( + "-i", + "--items", + type=int, + default=1000, + dest="sample_len", + help="random sample items size; 1000 by default", + ) + analizer.add_argument( + "-k", + "--key", + default=[], + dest="keys", + action="append", + metavar="KEY", + help="key value to analize", + ) + analizer.add_argument( + "-ky", + "--key-axis", + default=None, + dest="ky", + metavar="KEY", + help="like -k, but its values are used for y-axis", + ) + analizer.add_argument( + "-d", + "--directory", + type=Path, + default=Path.cwd(), + dest="out_dir", + metavar="DIR", + help="CSV output directory; CWD by default", + ) + + def main(): - print("TODO") + args = vars(parse()) + subcmd = args["subcmd"] + del args["subcmd"] + match subcmd: + case "analize": + Analizer(**args) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..a793944 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,15 @@ +from colligere.cli import parse + + +def assert_out(capsys, msg, *args): + try: + parse(args) + except SystemExit: + pass + stdout = capsys.readouterr().out + assert msg in stdout + + +def test_helps(capsys): + for args in [["-h"], ["analize", "-h"]]: + assert_out(capsys, "usage:", *args)