2023-12-08 15:41:43 -06:00
|
|
|
import argparse
|
|
|
|
from pathlib import Path
|
|
|
|
from colligere.analizer import Analizer
|
|
|
|
|
|
|
|
|
|
|
|
def parse(args=None):
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog="colligere",
|
|
|
|
description="Tool for metadata in JSON Lines.",
|
|
|
|
epilog="Under GPLv3.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-q",
|
|
|
|
"--quiet",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
|
|
|
help="avoid prints; ignored by --debug",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--debug",
|
|
|
|
default=False,
|
|
|
|
action="store_true",
|
|
|
|
help="stop on exception for debugging",
|
|
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(
|
|
|
|
required=True,
|
|
|
|
dest="subcmd",
|
|
|
|
title="subcommands",
|
|
|
|
help="Available tasks to perfom; run 'SUBCMD --help' for more info",
|
|
|
|
)
|
|
|
|
parse_analize(subparsers)
|
|
|
|
parser.add_argument("jsonl", type=Path, help="JSON lines file")
|
|
|
|
return parser.parse_args(args)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_analize(subparsers):
|
|
|
|
analizer = subparsers.add_parser(
|
|
|
|
"analize",
|
|
|
|
aliases=["a"],
|
|
|
|
description="Analize keys and values from JSON lines.",
|
|
|
|
help="analize keys and values",
|
|
|
|
)
|
|
|
|
analizer.add_argument(
|
|
|
|
"-i",
|
|
|
|
"--items",
|
|
|
|
type=int,
|
|
|
|
default=1000,
|
|
|
|
dest="sample_len",
|
|
|
|
help="random sample items size; 1000 by default",
|
|
|
|
)
|
|
|
|
analizer.add_argument(
|
|
|
|
"-k",
|
|
|
|
"--key",
|
|
|
|
default=[],
|
|
|
|
dest="keys",
|
|
|
|
action="append",
|
|
|
|
metavar="KEY",
|
|
|
|
help="key value to analize",
|
|
|
|
)
|
|
|
|
analizer.add_argument(
|
|
|
|
"-ky",
|
|
|
|
"--key-axis",
|
|
|
|
default=None,
|
|
|
|
dest="ky",
|
|
|
|
metavar="KEY",
|
|
|
|
help="like -k, but its values are used for y-axis",
|
|
|
|
)
|
|
|
|
analizer.add_argument(
|
|
|
|
"-d",
|
|
|
|
"--directory",
|
|
|
|
type=Path,
|
|
|
|
default=Path.cwd(),
|
|
|
|
dest="out_dir",
|
|
|
|
metavar="DIR",
|
|
|
|
help="CSV output directory; CWD by default",
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-12-15 17:11:25 -06:00
|
|
|
def main(args=None):
|
|
|
|
args = vars(parse(args))
|
2023-12-08 15:41:43 -06:00
|
|
|
subcmd = args["subcmd"]
|
|
|
|
del args["subcmd"]
|
|
|
|
match subcmd:
|
|
|
|
case "analize":
|
|
|
|
Analizer(**args)
|