From 8f1d12ed171db977ddb05e151e5dea5b9f7aaa02 Mon Sep 17 00:00:00 2001 From: Titivillus Date: Fri, 15 Dec 2023 23:11:25 +0000 Subject: [PATCH] More cli test to analize --- colligere/analizer.py | 13 +++++++++---- colligere/cli.py | 4 ++-- tests/test_cli.py | 32 +++++++++++++++++++++++++++----- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/colligere/analizer.py b/colligere/analizer.py index 1ad48bf..0298fe4 100644 --- a/colligere/analizer.py +++ b/colligere/analizer.py @@ -2,9 +2,7 @@ import time import json import shutil import random -import argparse import pandas as pd -from pathlib import Path from datetime import timedelta from uniplot import plot_to_string @@ -27,13 +25,14 @@ class Analizer: self.curr_item = 0 self.sample(analize=True) if not self.debug and not self.quiet: - print("Extractor finished\nTotal time: " + self.__elapsed_time()) + print("\nExtractor finished\nTotal time: " + self.__elapsed_time()) def sample(self, analize=False): """ NOTE: this is a refactor version of https://gitlab.com/aapjeisbaas/shuf """ - line, lines, count = None, [], self.__count() + line, lines, count = {}, [], self.__count() + history = [0] bytes_per_line, total_lines_est = count[0], count[1] fh = open(self.jsonl) while self.curr_item < self.sample_len: @@ -50,6 +49,12 @@ class Analizer: lines.append(line) self.curr_item += 1 self.__analize(line) if analize else self.__prompt(line) + if history[-1] == self.curr_item: + history.append(self.curr_item) + else: + history = [self.curr_item] + if len(history) > 99: + break except UnicodeError: pass except Exception: diff --git a/colligere/cli.py b/colligere/cli.py index 03c409c..62e3425 100644 --- a/colligere/cli.py +++ b/colligere/cli.py @@ -76,8 +76,8 @@ def parse_analize(subparsers): ) -def main(): - args = vars(parse()) +def main(args=None): + args = vars(parse(args)) subcmd = args["subcmd"] del args["subcmd"] match subcmd: diff --git a/tests/test_cli.py b/tests/test_cli.py index a793944..1b98f74 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,15 +1,37 @@ -from colligere.cli import parse +from colligere.cli import main +from pathlib import Path + +ASSETS = Path(__file__).parent.parent / "assets" +JSONL = ASSETS / "test.jsonl" def assert_out(capsys, msg, *args): + args = tuple(map(lambda a: str(a), args)) try: - parse(args) + main(args) except SystemExit: pass - stdout = capsys.readouterr().out - assert msg in stdout + assert msg in capsys.readouterr().out def test_helps(capsys): - for args in [["-h"], ["analize", "-h"]]: + cmds = [ + ["-h"], + ["analize", "-h"], + ] + for args in cmds: assert_out(capsys, "usage:", *args) + + +def test_analize(capsys): + cmds = [ + ["analize", "-q", "-d", ASSETS, JSONL], + ["analize", "-d", ASSETS, JSONL], + ["analize", "-i", 5, "-d", ASSETS, JSONL], + ["analize", "-k", "foo", "-d", ASSETS, JSONL], + ["analize", "-k", "foo", "-k", "bar", "-d", ASSETS, JSONL], + ["analize", "-k", "foo", "-ky", "bar", "-d", ASSETS, JSONL], + ] + for args in cmds: + msg = "Total time:" if "-q" not in args else "" + assert_out(capsys, msg, *args)