This commit is contained in:
Titivillus 2023-10-21 15:36:56 +00:00
parent 073c5e28fa
commit e19d3bd632
2 changed files with 70 additions and 9 deletions

49
README.md Normal file
View File

@ -0,0 +1,49 @@
# πŸ”₯ Book Metadata Tool βš’οΈ
Herramienta para metadatos en [JSON Lines](https://jsonlines.org/)
Tool for metadata in [JSON Lines](https://jsonlines.org/)
# Uso / Usage
## Analizer
```
$ python colligere/analizer.py metadata.jsonl
[0:00:00][0%][6i][57k] Fragmina verborum Titivillus colligit horum
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ β–„β–„β–„β–„β–„β–„β–„β–„β–„β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β”‚
β”‚ β–—β–žβ–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–€β–˜ β”‚
β”‚ β–—β–˜ β”‚ 50k
β”‚ β–žβ–˜ β”‚
β”‚ β–—β–ž β”‚
β”‚ β–—β–˜ β”‚ 40k
β”‚ β–žβ–˜ β”‚
β”‚ β–—β–ž β”‚
β”‚ β–—β–˜ β”‚ 30k
β”‚β–žβ–˜ β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
1i 2i 3i 4i 5i 6i
$ python colligere/analizer.py -h
usage: Analizer [-h] [-i SAMPLE_LEN] [-k KEY] [-ky KEY] [-d DIR] [--debug] [-q] jsonl
Analize keys and values from JSON lines.
positional arguments:
jsonl JSON lines file
options:
-h, --help show this help message and exit
-i SAMPLE_LEN, --items SAMPLE_LEN
random sample items size; 1000 by default
-k KEY, --key KEY key value to analize
-ky KEY, --key-axis KEY
like -k, but its values are used for y-axis
-d DIR, --directory DIR
CSV output directory; CWD by default
--debug stop on exception for debugging
-q, --quiet avoid prints; ignored by --debug
```

View File

@ -10,12 +10,13 @@ from uniplot import plot_to_string
class Analizer:
def __init__(self, jsonl, sample_len, keys, ky, debug=False, quiet=True):
def __init__(self, jsonl, sample_len, keys, ky, out_dir, debug, quiet):
self.start_time = time.process_time()
self.jsonl = jsonl
self.sample_len = sample_len
self.ky = ky
self.keys = keys + [self.ky]
self.out_dir = out_dir
self.debug = debug
self.quiet = quiet
self.term_size = shutil.get_terminal_size()
@ -104,7 +105,7 @@ class Analizer:
def __write(self):
for key, val in self.all_vals.items():
file = Path("res." + key + ".csv")
file = self.out_dir / ("res." + key + ".csv")
df = pd.DataFrame(val).sort_index(ascending=False).T
if "len" in df:
df["len"] = df["len"].astype(int)
@ -153,8 +154,8 @@ if __name__ == "__main__":
def get_args():
parser = argparse.ArgumentParser(
prog="Extractor",
description="Extracts keys and values from JSON lines.",
prog="Analizer",
description="Analize keys and values from JSON lines.",
)
parser.add_argument("jsonl", type=Path, help="JSON lines file")
parser.add_argument(
@ -163,15 +164,16 @@ if __name__ == "__main__":
type=int,
default=1000,
dest="sample_len",
help="sample items size; 1000 by default",
help="random sample items size; 1000 by default",
)
parser.add_argument(
"-k",
"--key",
action="append",
default=[],
dest="keys",
action="append",
metavar="KEY",
help="key value to extract",
help="key value to analize",
)
parser.add_argument(
"-ky",
@ -181,16 +183,26 @@ if __name__ == "__main__":
metavar="KEY",
help="like -k, but its values are used for y-axis",
)
parser.add_argument(
"-d",
"--directory",
type=Path,
default=Path.cwd(),
dest="out_dir",
metavar="DIR",
help="CSV output directory; CWD by default",
)
parser.add_argument(
"--debug",
action="store_true",
default=False,
action="store_true",
help="stop on exception for debugging",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
default=False,
action="store_true",
help="avoid prints; ignored by --debug",
)
return parser.parse_args()