From 933a90d43ce9e360bb139dda2040b4360b9b12ce Mon Sep 17 00:00:00 2001 From: Martin Bielik Date: Fri, 20 Dec 2024 17:34:20 +0100 Subject: parse chat messages tests --- py/utils.py | 4 +- tests/chat_test.py | 140 ++++++++++++++++++++++++++++++++++++++ tests/resources/test1.include.txt | 1 + tests/resources/test2.include.txt | 1 + 4 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 tests/chat_test.py create mode 100644 tests/resources/test1.include.txt create mode 100644 tests/resources/test2.include.txt diff --git a/py/utils.py b/py/utils.py index 931eeab..f5553ee 100644 --- a/py/utils.py +++ b/py/utils.py @@ -160,10 +160,12 @@ def parse_chat_messages(chat_content): try: with open(path, "r") as file: - message["content"] += f"\n\n==> {path} <==\n" + file.read() + file_content = file.read().strip() + message["content"] += f"\n\n==> {path} <==\n" + file_content except UnicodeDecodeError: message["content"] += "\n\n" + f"==> {path} <==" message["content"] += "\n" + "Binary file, cannot display" + message['content'] = message['content'].strip() return messages diff --git a/tests/chat_test.py b/tests/chat_test.py new file mode 100644 index 0000000..9acfecb --- /dev/null +++ b/tests/chat_test.py @@ -0,0 +1,140 @@ +from utils import parse_chat_messages +import os + +curr_dir = os.path.dirname(__file__) + +def strip_text(txt): + txt = txt.strip() + lines = txt.splitlines() + return "\n".join([line.lstrip() for line in lines]) + +def test_parse_user_message(): + chat_content = strip_text( + """ + >>> user + + generate lorem ipsum + """) + messages = parse_chat_messages(chat_content) + assert 1 == len(messages) + assert 'user' == messages[0]['role'] + assert 'generate lorem ipsum' == messages[0]['content'] + + +def test_parse_system_message(): + chat_content = strip_text(""" + >>> system + + you are general assystant + + >>> user + + generate lorem ipsum + """) + messages = parse_chat_messages(chat_content) + assert 2 == len(messages) + assert 'system' == messages[0]['role'] + assert 'you are general assystant' == messages[0]['content'] + assert 'user' == messages[1]['role'] + assert 'generate lorem ipsum' == messages[1]['content'] + +def test_parse_assistant_message(): + chat_content = strip_text(""" + >>> user + + generate lorem ipsum + + <<< assistant + + bla bla bla + + >>> user + + again + """) + messages = parse_chat_messages(chat_content) + assert 3 == len(messages) + assert 'user' == messages[0]['role'] + assert 'generate lorem ipsum' == messages[0]['content'] + assert 'assistant' == messages[1]['role'] + assert 'bla bla bla' == messages[1]['content'] + assert 'user' == messages[2]['role'] + assert 'again' == messages[2]['content'] + +def test_parse_include_single_file_message(): + chat_content = strip_text(f""" + >>> user + + translate to human language + + >>> include + + {curr_dir}/resources/test1.include.txt + """) + messages = parse_chat_messages(chat_content) + assert 2 == len(messages) + assert 'user' == messages[0]['role'] + assert 'translate to human language' == messages[0]['content'] + assert 'user' == messages[1]['role'] + expected_content = strip_text(f""" + ==> {curr_dir}/resources/test1.include.txt <== + hello world + """) + assert expected_content == messages[1]['content'] + +def test_parse_include_multiple_files_message(): + chat_content = strip_text(f""" + >>> user + + translate to human language + + >>> include + + {curr_dir}/resources/test1.include.txt + {curr_dir}/resources/test2.include.txt + """) + messages = parse_chat_messages(chat_content) + assert 2 == len(messages) + assert 'user' == messages[0]['role'] + assert 'translate to human language' == messages[0]['content'] + assert 'user' == messages[1]['role'] + expected_content = strip_text(f""" + ==> {curr_dir}/resources/test1.include.txt <== + hello world + + ==> {curr_dir}/resources/test2.include.txt <== + vim is awesome + """) + assert expected_content == messages[1]['content'] + +def test_parse_include_glob_files_message(): + chat_content = strip_text(f""" + >>> user + + translate to human language + + >>> include + + {curr_dir}/**/*.include.txt + """) + messages = parse_chat_messages(chat_content) + assert 2 == len(messages) + assert 'user' == messages[0]['role'] + assert 'translate to human language' == messages[0]['content'] + assert 'user' == messages[1]['role'] + expected_content = strip_text(f""" + ==> {curr_dir}/resources/test1.include.txt <== + hello world + + ==> {curr_dir}/resources/test2.include.txt <== + vim is awesome + """) + assert expected_content == messages[1]['content'] + +def test_parse_include_image_message(): + # TODO + pass + +def test_parse_include_image_with_files_message(): + # TODO + pass diff --git a/tests/resources/test1.include.txt b/tests/resources/test1.include.txt new file mode 100644 index 0000000..3b18e51 --- /dev/null +++ b/tests/resources/test1.include.txt @@ -0,0 +1 @@ +hello world diff --git a/tests/resources/test2.include.txt b/tests/resources/test2.include.txt new file mode 100644 index 0000000..e915587 --- /dev/null +++ b/tests/resources/test2.include.txt @@ -0,0 +1 @@ +vim is awesome -- cgit v1.2.3