More cli test to analize

This commit is contained in:
Titivillus 2023-12-15 23:11:25 +00:00
parent 805d4cd417
commit 8f1d12ed17
3 changed files with 38 additions and 11 deletions

View File

@ -2,9 +2,7 @@ import time
import json import json
import shutil import shutil
import random import random
import argparse
import pandas as pd import pandas as pd
from pathlib import Path
from datetime import timedelta from datetime import timedelta
from uniplot import plot_to_string from uniplot import plot_to_string
@ -27,13 +25,14 @@ class Analizer:
self.curr_item = 0 self.curr_item = 0
self.sample(analize=True) self.sample(analize=True)
if not self.debug and not self.quiet: if not self.debug and not self.quiet:
print("Extractor finished\nTotal time: " + self.__elapsed_time()) print("\nExtractor finished\nTotal time: " + self.__elapsed_time())
def sample(self, analize=False): def sample(self, analize=False):
""" """
NOTE: this is a refactor version of https://gitlab.com/aapjeisbaas/shuf NOTE: this is a refactor version of https://gitlab.com/aapjeisbaas/shuf
""" """
line, lines, count = None, [], self.__count() line, lines, count = {}, [], self.__count()
history = [0]
bytes_per_line, total_lines_est = count[0], count[1] bytes_per_line, total_lines_est = count[0], count[1]
fh = open(self.jsonl) fh = open(self.jsonl)
while self.curr_item < self.sample_len: while self.curr_item < self.sample_len:
@ -50,6 +49,12 @@ class Analizer:
lines.append(line) lines.append(line)
self.curr_item += 1 self.curr_item += 1
self.__analize(line) if analize else self.__prompt(line) self.__analize(line) if analize else self.__prompt(line)
if history[-1] == self.curr_item:
history.append(self.curr_item)
else:
history = [self.curr_item]
if len(history) > 99:
break
except UnicodeError: except UnicodeError:
pass pass
except Exception: except Exception:

View File

@ -76,8 +76,8 @@ def parse_analize(subparsers):
) )
def main(): def main(args=None):
args = vars(parse()) args = vars(parse(args))
subcmd = args["subcmd"] subcmd = args["subcmd"]
del args["subcmd"] del args["subcmd"]
match subcmd: match subcmd:

View File

@ -1,15 +1,37 @@
from colligere.cli import parse from colligere.cli import main
from pathlib import Path
ASSETS = Path(__file__).parent.parent / "assets"
JSONL = ASSETS / "test.jsonl"
def assert_out(capsys, msg, *args): def assert_out(capsys, msg, *args):
args = tuple(map(lambda a: str(a), args))
try: try:
parse(args) main(args)
except SystemExit: except SystemExit:
pass pass
stdout = capsys.readouterr().out assert msg in capsys.readouterr().out
assert msg in stdout
def test_helps(capsys): def test_helps(capsys):
for args in [["-h"], ["analize", "-h"]]: cmds = [
["-h"],
["analize", "-h"],
]
for args in cmds:
assert_out(capsys, "usage:", *args) assert_out(capsys, "usage:", *args)
def test_analize(capsys):
cmds = [
["analize", "-q", "-d", ASSETS, JSONL],
["analize", "-d", ASSETS, JSONL],
["analize", "-i", 5, "-d", ASSETS, JSONL],
["analize", "-k", "foo", "-d", ASSETS, JSONL],
["analize", "-k", "foo", "-k", "bar", "-d", ASSETS, JSONL],
["analize", "-k", "foo", "-ky", "bar", "-d", ASSETS, JSONL],
]
for args in cmds:
msg = "Total time:" if "-q" not in args else ""
assert_out(capsys, msg, *args)