More cli test to analize

2023-12-15 23:11:25 +00:00 · 2023-12-15 23:11:25 +00:00 · 8f1d12ed17
parent 805d4cd417
commit 8f1d12ed17
3 changed files with 38 additions and 11 deletions
--- a/colligere/analizer.py
+++ b/colligere/analizer.py
@ -2,9 +2,7 @@ import time
 import json
 import shutil
 import random
 import argparse
 import pandas as pd
 from pathlib import Path
 from datetime import timedelta
 from uniplot import plot_to_string
@ -27,13 +25,14 @@ class Analizer:
        self.curr_item = 0
        self.sample(analize=True)
        if not self.debug and not self.quiet:
-            print("Extractor finished\nTotal time: " + self.__elapsed_time())
+            print("\nExtractor finished\nTotal time: " + self.__elapsed_time())
    def sample(self, analize=False):
        """
        NOTE: this is a refactor version of https://gitlab.com/aapjeisbaas/shuf
        """
-        line, lines, count = None, [], self.__count()
+        line, lines, count = {}, [], self.__count()
        history = [0]
        bytes_per_line, total_lines_est = count[0], count[1]
        fh = open(self.jsonl)
        while self.curr_item < self.sample_len:
@ -50,6 +49,12 @@ class Analizer:
                    lines.append(line)
                    self.curr_item += 1
                    self.__analize(line) if analize else self.__prompt(line)
                if history[-1] == self.curr_item:
                    history.append(self.curr_item)
                else:
                    history = [self.curr_item]
                if len(history) > 99:
                    break
            except UnicodeError:
                pass
            except Exception:
--- a/colligere/cli.py
+++ b/colligere/cli.py
@ -76,8 +76,8 @@ def parse_analize(subparsers):
    )
-def main():
+def main(args=None):
-    args = vars(parse())
+    args = vars(parse(args))
    subcmd = args["subcmd"]
    del args["subcmd"]
    match subcmd:
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -1,15 +1,37 @@
-from colligere.cli import parse
+from colligere.cli import main
 from pathlib import Path
 ASSETS = Path(__file__).parent.parent / "assets"
 JSONL = ASSETS / "test.jsonl"
 def assert_out(capsys, msg, *args):
    args = tuple(map(lambda a: str(a), args))
    try:
-        parse(args)
+        main(args)
    except SystemExit:
        pass
-    stdout = capsys.readouterr().out
+    assert msg in capsys.readouterr().out
    assert msg in stdout
 def test_helps(capsys):
-    for args in [["-h"], ["analize", "-h"]]:
+    cmds = [
        ["-h"],
        ["analize", "-h"],
    ]
    for args in cmds:
        assert_out(capsys, "usage:", *args)
 def test_analize(capsys):
    cmds = [
        ["analize", "-q", "-d", ASSETS, JSONL],
        ["analize", "-d", ASSETS, JSONL],
        ["analize", "-i", 5, "-d", ASSETS, JSONL],
        ["analize", "-k", "foo", "-d", ASSETS, JSONL],
        ["analize", "-k", "foo", "-k", "bar", "-d", ASSETS, JSONL],
        ["analize", "-k", "foo", "-ky", "bar", "-d", ASSETS, JSONL],
    ]
    for args in cmds:
        msg = "Total time:" if "-q" not in args else ""
        assert_out(capsys, msg, *args)