mirror of
https://github.com/karl0ss/comfy_fm24_newgens.git
synced 2025-10-25 04:33:59 +01:00
110 lines
4.5 KiB
Python
110 lines
4.5 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Test script for the CLIP text chunking functionality.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
|
||
|
|
# Add the lib directory to the path so we can import our modules
|
||
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
|
||
|
|
|
||
|
|
from text_chunker import CLIPTextChunker, chunk_prompt_for_clip
|
||
|
|
|
||
|
|
def test_basic_chunking():
|
||
|
|
"""Test basic text chunking functionality."""
|
||
|
|
print("=== Testing Basic Text Chunking ===")
|
||
|
|
|
||
|
|
chunker = CLIPTextChunker(max_tokens=60) # Using conservative limit
|
||
|
|
|
||
|
|
# Test short text (should not be chunked)
|
||
|
|
short_text = "A simple prompt"
|
||
|
|
chunks = chunker.chunk_text(short_text)
|
||
|
|
print(f"Short text: '{short_text}' -> {len(chunks)} chunks")
|
||
|
|
assert len(chunks) == 1, f"Expected 1 chunk, got {len(chunks)}"
|
||
|
|
|
||
|
|
# Test long text (should be chunked)
|
||
|
|
long_text = "This is a very long text that should definitely exceed the token limit when processed by CLIP. " * 10
|
||
|
|
chunks = chunker.chunk_text(long_text)
|
||
|
|
print(f"Long text -> {len(chunks)} chunks")
|
||
|
|
assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}"
|
||
|
|
|
||
|
|
# Verify each chunk is within token limit
|
||
|
|
for i, chunk in enumerate(chunks):
|
||
|
|
token_count = chunker.estimate_token_count(chunk)
|
||
|
|
print(f"Chunk {i+1}: {token_count} tokens (max: {chunker.max_tokens})")
|
||
|
|
assert token_count <= chunker.max_tokens, f"Chunk {i+1} exceeds token limit: {token_count} > {chunker.max_tokens}"
|
||
|
|
|
||
|
|
print("✓ Basic chunking test passed\n")
|
||
|
|
|
||
|
|
def test_prompt_chunking():
|
||
|
|
"""Test chunking with actual prompts similar to the app."""
|
||
|
|
print("=== Testing Prompt Chunking ===")
|
||
|
|
|
||
|
|
# Simulate a long prompt like the one from app_config.json
|
||
|
|
long_prompt = "Ultra-realistic close-up headshot of a Fair skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"
|
||
|
|
|
||
|
|
chunks = chunk_prompt_for_clip(long_prompt)
|
||
|
|
print(f"Long prompt -> {len(chunks)} chunks")
|
||
|
|
|
||
|
|
for i, chunk in enumerate(chunks):
|
||
|
|
print(f"Chunk {i+1}: {chunk[:100]}...")
|
||
|
|
|
||
|
|
print("✓ Prompt chunking test passed\n")
|
||
|
|
|
||
|
|
def test_priority_chunking():
|
||
|
|
"""Test priority-based chunking."""
|
||
|
|
print("=== Testing Priority Chunking ===")
|
||
|
|
|
||
|
|
chunker = CLIPTextChunker(max_tokens=50) # Smaller limit for testing
|
||
|
|
|
||
|
|
text = "This is a long text with important information about soccer players and their characteristics. The most important part is that they are professional athletes."
|
||
|
|
|
||
|
|
essential_info = ["soccer players", "professional athletes", "important information"]
|
||
|
|
|
||
|
|
chunks = chunker.create_priority_chunks(text, essential_info)
|
||
|
|
print(f"Priority chunks -> {len(chunks)} chunks")
|
||
|
|
|
||
|
|
for i, chunk in enumerate(chunks):
|
||
|
|
print(f"Priority chunk {i+1}: {chunk}")
|
||
|
|
|
||
|
|
print("✓ Priority chunking test passed\n")
|
||
|
|
|
||
|
|
def test_edge_cases():
|
||
|
|
"""Test edge cases."""
|
||
|
|
print("=== Testing Edge Cases ===")
|
||
|
|
|
||
|
|
chunker = CLIPTextChunker(max_tokens=60)
|
||
|
|
|
||
|
|
# Test empty text
|
||
|
|
chunks = chunker.chunk_text("")
|
||
|
|
assert len(chunks) == 0, "Empty text should return no chunks"
|
||
|
|
|
||
|
|
# Test text exactly at limit
|
||
|
|
exact_text = "A" * 60 # Text exactly at the character limit
|
||
|
|
chunks = chunker.chunk_text(exact_text)
|
||
|
|
# Should return the text as-is since it's exactly at the limit
|
||
|
|
assert len(chunks) == 1, f"Expected 1 chunk for text at limit, got {len(chunks)}"
|
||
|
|
assert chunks[0] == exact_text, "Text at limit should be returned unchanged"
|
||
|
|
|
||
|
|
# Test text that exceeds limit (with spaces so it can be split)
|
||
|
|
long_text = "This is a very long text that should definitely exceed the character limit when processed. " * 3 # Text that exceeds the limit
|
||
|
|
chunks = chunker.chunk_text(long_text)
|
||
|
|
assert len(chunks) > 1, f"Expected multiple chunks for long text, got {len(chunks)}"
|
||
|
|
for chunk in chunks:
|
||
|
|
assert chunker.estimate_token_count(chunk) <= chunker.max_tokens, f"Chunk exceeds limit: {len(chunk)} > {chunker.max_tokens}"
|
||
|
|
|
||
|
|
print("✓ Edge cases test passed\n")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
try:
|
||
|
|
test_basic_chunking()
|
||
|
|
test_prompt_chunking()
|
||
|
|
test_priority_chunking()
|
||
|
|
test_edge_cases()
|
||
|
|
|
||
|
|
print("🎉 All tests passed! Text chunking functionality is working correctly.")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"❌ Test failed: {e}")
|
||
|
|
sys.exit(1)
|