mirror of
https://github.com/karl0ss/comfy_fm24_newgens.git
synced 2025-10-03 06:40:06 +01:00
83 lines
3.0 KiB
Python
83 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script to verify that the text chunker fixes the token sequence length issues.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from lib.text_chunker import chunk_prompt_for_clip, CLIPTextChunker
|
|
|
|
def test_long_prompt_chunking():
|
|
"""Test that long prompts are properly chunked within CLIP token limits."""
|
|
|
|
# Create a sample long prompt similar to what the app generates
|
|
test_prompt = "Ultra-realistic close-up headshot of a Medium Brown skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and Medium Length Brown curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"
|
|
|
|
print(f"Original prompt length: {len(test_prompt)} characters")
|
|
print(f"Original prompt: {test_prompt}")
|
|
print("-" * 80)
|
|
|
|
# Test the chunking
|
|
chunker = CLIPTextChunker(max_tokens=60)
|
|
chunks = chunk_prompt_for_clip(test_prompt)
|
|
|
|
print(f"Number of chunks: {len(chunks)}")
|
|
|
|
for i, chunk in enumerate(chunks):
|
|
token_count = chunker.get_token_count(chunk)
|
|
print(f"\nChunk {i+1}:")
|
|
print(f" Text: {chunk}")
|
|
print(f" Token count: {token_count}")
|
|
print(f" Character count: {len(chunk)}")
|
|
|
|
if token_count > 77:
|
|
print(f" ❌ ERROR: Chunk {i+1} exceeds CLIP's 77 token limit!")
|
|
return False
|
|
elif token_count > 60:
|
|
print(f" ⚠️ WARNING: Chunk {i+1} is close to the 60 token limit")
|
|
else:
|
|
print(f" ✅ Chunk {i+1} is within safe limits")
|
|
|
|
print("-" * 80)
|
|
print("✅ All chunks are within CLIP's token limits!")
|
|
return True
|
|
|
|
def test_edge_cases():
|
|
"""Test edge cases for the chunking functionality."""
|
|
|
|
chunker = CLIPTextChunker(max_tokens=60)
|
|
|
|
# Test empty string
|
|
chunks = chunker.chunk_text("")
|
|
assert chunks == [], "Empty string should return empty list"
|
|
|
|
# Test short string
|
|
short_text = "Hello world"
|
|
chunks = chunker.chunk_text(short_text)
|
|
assert len(chunks) == 1 and chunks[0] == short_text, "Short text should not be chunked"
|
|
|
|
# Test very long single word (edge case)
|
|
long_word = "a" * 200
|
|
chunks = chunker.chunk_text(long_word)
|
|
# Should handle this gracefully
|
|
for chunk in chunks:
|
|
assert chunker.get_token_count(chunk) <= 60, "Long word chunks should respect token limit"
|
|
|
|
print("✅ Edge case tests passed!")
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing text chunker fixes...")
|
|
print("=" * 80)
|
|
|
|
success1 = test_long_prompt_chunking()
|
|
success2 = test_edge_cases()
|
|
|
|
if success1 and success2:
|
|
print("\n🎉 All tests passed! The token sequence length issue should be fixed.")
|
|
sys.exit(0)
|
|
else:
|
|
print("\n❌ Some tests failed. The issue may not be fully resolved.")
|
|
sys.exit(1) |