Analizer is now a subcommand; fixed make lint; added first test
This commit is contained in:
parent
cc30b235d0
commit
cecc6b4fd2
10
Makefile
10
Makefile
|
@ -1,14 +1,14 @@
|
|||
.PHONY: pytest lint test
|
||||
.PHONY: pytest lint test tests package
|
||||
|
||||
test: pytest lint
|
||||
tests: test
|
||||
|
||||
pytest:
|
||||
pytest -xvv
|
||||
test:
|
||||
pytest
|
||||
|
||||
lint:
|
||||
pandoc --atx-headers --reference-links --reference-location=document --columns=79 -o README.md README.md
|
||||
black --line-length 79 --quiet colligere/ tests/ scripts/
|
||||
flake8 --max-line-length 79 colligere/ tests/ scripts/
|
||||
pandoc --reference-links --reference-location=document --columns=79 -o README.md README.md
|
||||
|
||||
package:
|
||||
python3 -m pip install --upgrade build
|
||||
|
|
116
README.md
116
README.md
|
@ -1,95 +1,85 @@
|
|||
# π₯ Book Metadata Tool βοΈ
|
||||
|
||||
Herramienta para metadatos en [JSON Lines](https://jsonlines.org/)
|
||||
Herramienta para metadatos en [JSON Lines]
|
||||
|
||||
Tool for metadata in [JSON Lines](https://jsonlines.org/)
|
||||
Tool for metadata in [JSON Lines]
|
||||
|
||||
# Uso / Usage
|
||||
|
||||
## Analizer
|
||||
|
||||
```
|
||||
$ python colligere/analizer.py metadata.jsonl
|
||||
$ python colligere/analizer.py metadata.jsonl
|
||||
|
||||
[0:00:00][0%][6i][57k] Fragmina verborum Titivillus colligit horum
|
||||
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
||||
β βββββββββββββββββββββββββββββββββββββ
|
||||
β βββββββββββββββ β
|
||||
β ββ β 50k
|
||||
β ββ β
|
||||
β ββ β
|
||||
β ββ β 40k
|
||||
β ββ β
|
||||
β ββ β
|
||||
β ββ β 30k
|
||||
βββ β
|
||||
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
||||
1i 2i 3i 4i 5i 6i
|
||||
[0:00:00][0%][6i][57k] Fragmina verborum Titivillus colligit horum
|
||||
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
||||
β βββββββββββββββββββββββββββββββββββββ
|
||||
β βββββββββββββββ β
|
||||
β ββ β 50k
|
||||
β ββ β
|
||||
β ββ β
|
||||
β ββ β 40k
|
||||
β ββ β
|
||||
β ββ β
|
||||
β ββ β 30k
|
||||
βββ β
|
||||
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
||||
1i 2i 3i 4i 5i 6i
|
||||
|
||||
$ python colligere/analizer.py -h
|
||||
$ python colligere/analizer.py -h
|
||||
|
||||
usage: Analizer [-h] [-i SAMPLE_LEN] [-k KEY] [-ky KEY] [-d DIR] [--debug] [-q] jsonl
|
||||
usage: Analizer [-h] [-i SAMPLE_LEN] [-k KEY] [-ky KEY] [-d DIR] [--debug] [-q] jsonl
|
||||
|
||||
Analize keys and values from JSON lines.
|
||||
Analize keys and values from JSON lines.
|
||||
|
||||
positional arguments:
|
||||
jsonl JSON lines file
|
||||
positional arguments:
|
||||
jsonl JSON lines file
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-i SAMPLE_LEN, --items SAMPLE_LEN
|
||||
random sample items size; 1000 by default
|
||||
-k KEY, --key KEY key value to analize
|
||||
-ky KEY, --key-axis KEY
|
||||
like -k, but its values are used for y-axis
|
||||
-d DIR, --directory DIR
|
||||
CSV output directory; CWD by default
|
||||
--debug stop on exception for debugging
|
||||
-q, --quiet avoid prints; ignored by --debug
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-i SAMPLE_LEN, --items SAMPLE_LEN
|
||||
random sample items size; 1000 by default
|
||||
-k KEY, --key KEY key value to analize
|
||||
-ky KEY, --key-axis KEY
|
||||
like -k, but its values are used for y-axis
|
||||
-d DIR, --directory DIR
|
||||
CSV output directory; CWD by default
|
||||
--debug stop on exception for debugging
|
||||
-q, --quiet avoid prints; ignored by --debug
|
||||
|
||||
$ python colligere/analizer.py -k metadata.record.catalogingLanguage metadata.jsonl
|
||||
$ python colligere/analizer.py -k metadata.record.catalogingLanguage metadata.jsonl
|
||||
|
||||
β³πΊππΊπβ³
|
||||
β³πΊππΊπβ³
|
||||
|
||||
$ cat res.metadata.record.catalogingLanguage.csv | head
|
||||
key,len,freq
|
||||
eng,664,0.35622
|
||||
und,99,0.05311
|
||||
fre,90,0.04828
|
||||
spa,65,0.03487
|
||||
ger,133,0.07135
|
||||
ara,10,0.00536
|
||||
zxx,23,0.01234
|
||||
heb,6,0.00322
|
||||
jpn,20,0.01073
|
||||
```
|
||||
$ cat res.metadata.record.catalogingLanguage.csv | head
|
||||
key,len,freq
|
||||
eng,664,0.35622
|
||||
und,99,0.05311
|
||||
fre,90,0.04828
|
||||
spa,65,0.03487
|
||||
ger,133,0.07135
|
||||
ara,10,0.00536
|
||||
zxx,23,0.01234
|
||||
heb,6,0.00322
|
||||
jpn,20,0.01073
|
||||
|
||||
# Desarrollo / Development
|
||||
|
||||
ConfiguraciΓ³n inicial / Initial setup:
|
||||
|
||||
```
|
||||
virtualenv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
virtualenv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
Pruebas / Tests:
|
||||
|
||||
```
|
||||
make test
|
||||
```
|
||||
make test
|
||||
|
||||
AnΓ‘lisis y formateador / Lint:
|
||||
|
||||
```
|
||||
make lint
|
||||
```
|
||||
make lint
|
||||
|
||||
Empaquetamiento / Packaging:
|
||||
|
||||
```
|
||||
make package
|
||||
```
|
||||
|
||||
make package
|
||||
|
||||
[JSON Lines]: https://jsonlines.org/
|
||||
|
|
|
@ -148,63 +148,3 @@ class Analizer:
|
|||
def __elapsed_time(self):
|
||||
curr_time = timedelta(seconds=time.process_time() - self.start_time)
|
||||
return str(curr_time).split(".")[0]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Analizer",
|
||||
description="Analize keys and values from JSON lines.",
|
||||
)
|
||||
parser.add_argument("jsonl", type=Path, help="JSON lines file")
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--items",
|
||||
type=int,
|
||||
default=1000,
|
||||
dest="sample_len",
|
||||
help="random sample items size; 1000 by default",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-k",
|
||||
"--key",
|
||||
default=[],
|
||||
dest="keys",
|
||||
action="append",
|
||||
metavar="KEY",
|
||||
help="key value to analize",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-ky",
|
||||
"--key-axis",
|
||||
default=None,
|
||||
dest="ky",
|
||||
metavar="KEY",
|
||||
help="like -k, but its values are used for y-axis",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
type=Path,
|
||||
default=Path.cwd(),
|
||||
dest="out_dir",
|
||||
metavar="DIR",
|
||||
help="CSV output directory; CWD by default",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="stop on exception for debugging",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="avoid prints; ignored by --debug",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
Analizer(**vars(get_args()))
|
||||
|
|
|
@ -1,2 +1,85 @@
|
|||
import argparse
|
||||
from pathlib import Path
|
||||
from colligere.analizer import Analizer
|
||||
|
||||
|
||||
def parse(args=None):
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="colligere",
|
||||
description="Tool for metadata in JSON Lines.",
|
||||
epilog="Under GPLv3.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="avoid prints; ignored by --debug",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="stop on exception for debugging",
|
||||
)
|
||||
subparsers = parser.add_subparsers(
|
||||
required=True,
|
||||
dest="subcmd",
|
||||
title="subcommands",
|
||||
help="Available tasks to perfom; run 'SUBCMD --help' for more info",
|
||||
)
|
||||
parse_analize(subparsers)
|
||||
parser.add_argument("jsonl", type=Path, help="JSON lines file")
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
||||
def parse_analize(subparsers):
|
||||
analizer = subparsers.add_parser(
|
||||
"analize",
|
||||
aliases=["a"],
|
||||
description="Analize keys and values from JSON lines.",
|
||||
help="analize keys and values",
|
||||
)
|
||||
analizer.add_argument(
|
||||
"-i",
|
||||
"--items",
|
||||
type=int,
|
||||
default=1000,
|
||||
dest="sample_len",
|
||||
help="random sample items size; 1000 by default",
|
||||
)
|
||||
analizer.add_argument(
|
||||
"-k",
|
||||
"--key",
|
||||
default=[],
|
||||
dest="keys",
|
||||
action="append",
|
||||
metavar="KEY",
|
||||
help="key value to analize",
|
||||
)
|
||||
analizer.add_argument(
|
||||
"-ky",
|
||||
"--key-axis",
|
||||
default=None,
|
||||
dest="ky",
|
||||
metavar="KEY",
|
||||
help="like -k, but its values are used for y-axis",
|
||||
)
|
||||
analizer.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
type=Path,
|
||||
default=Path.cwd(),
|
||||
dest="out_dir",
|
||||
metavar="DIR",
|
||||
help="CSV output directory; CWD by default",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
print("TODO")
|
||||
args = vars(parse())
|
||||
subcmd = args["subcmd"]
|
||||
del args["subcmd"]
|
||||
match subcmd:
|
||||
case "analize":
|
||||
Analizer(**args)
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
from colligere.cli import parse
|
||||
|
||||
|
||||
def assert_out(capsys, msg, *args):
|
||||
try:
|
||||
parse(args)
|
||||
except SystemExit:
|
||||
pass
|
||||
stdout = capsys.readouterr().out
|
||||
assert msg in stdout
|
||||
|
||||
|
||||
def test_helps(capsys):
|
||||
for args in [["-h"], ["analize", "-h"]]:
|
||||
assert_out(capsys, "usage:", *args)
|
Reference in New Issue