comfy_fm24_newgens/test_text_chunker.py
2025-09-23 16:00:27 +01:00

83 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify that the text chunker fixes the token sequence length issues.
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from lib.text_chunker import chunk_prompt_for_clip, CLIPTextChunker
def test_long_prompt_chunking():
"""Test that long prompts are properly chunked within CLIP token limits."""
# Create a sample long prompt similar to what the app generates
test_prompt = "Ultra-realistic close-up headshot of a Medium Brown skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and Medium Length Brown curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"
print(f"Original prompt length: {len(test_prompt)} characters")
print(f"Original prompt: {test_prompt}")
print("-" * 80)
# Test the chunking
chunker = CLIPTextChunker(max_tokens=70)
chunks = chunk_prompt_for_clip(test_prompt)
print(f"Number of chunks: {len(chunks)}")
for i, chunk in enumerate(chunks):
token_count = chunker.get_token_count(chunk)
print(f"\nChunk {i+1}:")
print(f" Text: {chunk}")
print(f" Token count: {token_count}")
print(f" Character count: {len(chunk)}")
if token_count > 77:
print(f" ❌ ERROR: Chunk {i+1} exceeds CLIP's 77 token limit!")
return False
elif token_count > 70:
print(f" ⚠️ WARNING: Chunk {i+1} is close to the 77 token limit")
else:
print(f" ✅ Chunk {i+1} is within safe limits")
print("-" * 80)
print("✅ All chunks are within CLIP's token limits!")
return True
def test_edge_cases():
"""Test edge cases for the chunking functionality."""
chunker = CLIPTextChunker(max_tokens=70)
# Test empty string
chunks = chunker.chunk_text("")
assert chunks == [], "Empty string should return empty list"
# Test short string
short_text = "Hello world"
chunks = chunker.chunk_text(short_text)
assert len(chunks) == 1 and chunks[0] == short_text, "Short text should not be chunked"
# Test very long single word (edge case)
long_word = "a" * 200
chunks = chunker.chunk_text(long_word)
# Should handle this gracefully
for chunk in chunks:
assert chunker.get_token_count(chunk) <= 70, "Long word chunks should respect token limit"
print("✅ Edge case tests passed!")
return True
if __name__ == "__main__":
print("Testing text chunker fixes...")
print("=" * 80)
success1 = test_long_prompt_chunking()
success2 = test_edge_cases()
if success1 and success2:
print("\n🎉 All tests passed! The token sequence length issue should be fixed.")
sys.exit(0)
else:
print("\n❌ Some tests failed. The issue may not be fully resolved.")
sys.exit(1)