comfy_fm24_newgens/test_text_chunker.py

#!/usr/bin/env python3
"""
Test script to verify that the text chunker fixes the token sequence length issues.
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from lib.text_chunker import chunk_prompt_for_clip, CLIPTextChunker

def test_long_prompt_chunking():
    """Test that long prompts are properly chunked within CLIP token limits."""

    # Create a sample long prompt similar to what the app generates
    test_prompt = "Ultra-realistic close-up headshot of a Medium Brown skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and Medium Length Brown curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"

    print(f"Original prompt length: {len(test_prompt)} characters")
    print(f"Original prompt: {test_prompt}")
    print("-" * 80)

    # Test the chunking
    chunker = CLIPTextChunker(max_tokens=60)
    chunks = chunk_prompt_for_clip(test_prompt)

    print(f"Number of chunks: {len(chunks)}")

    for i, chunk in enumerate(chunks):
        token_count = chunker.get_token_count(chunk)
        print(f"\nChunk {i+1}:")
        print(f"  Text: {chunk}")
        print(f"  Token count: {token_count}")
        print(f"  Character count: {len(chunk)}")

        if token_count > 77:
            print(f"  ❌ ERROR: Chunk {i+1} exceeds CLIP's 77 token limit!")
            return False
        elif token_count > 60:
            print(f"  ⚠️  WARNING: Chunk {i+1} is close to the 60 token limit")
        else:
            print(f"  ✅ Chunk {i+1} is within safe limits")

    print("-" * 80)
    print("✅ All chunks are within CLIP's token limits!")
    return True

def test_edge_cases():
    """Test edge cases for the chunking functionality."""

    chunker = CLIPTextChunker(max_tokens=60)

    # Test empty string
    chunks = chunker.chunk_text("")
    assert chunks == [], "Empty string should return empty list"

    # Test short string
    short_text = "Hello world"
    chunks = chunker.chunk_text(short_text)
    assert len(chunks) == 1 and chunks[0] == short_text, "Short text should not be chunked"

    # Test very long single word (edge case)
    long_word = "a" * 200
    chunks = chunker.chunk_text(long_word)
    # Should handle this gracefully
    for chunk in chunks:
        assert chunker.get_token_count(chunk) <= 60, "Long word chunks should respect token limit"

    print("✅ Edge case tests passed!")
    return True

if __name__ == "__main__":
    print("Testing text chunker fixes...")
    print("=" * 80)

    success1 = test_long_prompt_chunking()
    success2 = test_edge_cases()

    if success1 and success2:
        print("\n🎉 All tests passed! The token sequence length issue should be fixed.")
        sys.exit(0)
    else:
        print("\n❌ Some tests failed. The issue may not be fully resolved.")
        sys.exit(1)
fixed clip warning 2025-09-23 15:30:50 +01:00			`#!/usr/bin/env python3`
			`"""`
text chunker 2025-09-23 16:00:27 +01:00			`Test script to verify that the text chunker fixes the token sequence length issues.`
fixed clip warning 2025-09-23 15:30:50 +01:00			`"""`

			`import sys`
			`import os`
text chunker 2025-09-23 16:00:27 +01:00			`sys.path.append(os.path.dirname(os.path.abspath(__file__)))`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`from lib.text_chunker import chunk_prompt_for_clip, CLIPTextChunker`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`def test_long_prompt_chunking():`
			`"""Test that long prompts are properly chunked within CLIP token limits."""`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`# Create a sample long prompt similar to what the app generates`
			`test_prompt = "Ultra-realistic close-up headshot of a Medium Brown skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and Medium Length Brown curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`print(f"Original prompt length: {len(test_prompt)} characters")`
			`print(f"Original prompt: {test_prompt}")`
			`print("-" * 80)`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`# Test the chunking`
text chunking 2025-09-23 17:08:52 +01:00			`chunker = CLIPTextChunker(max_tokens=60)`
text chunker 2025-09-23 16:00:27 +01:00			`chunks = chunk_prompt_for_clip(test_prompt)`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`print(f"Number of chunks: {len(chunks)}")`
fixed clip warning 2025-09-23 15:30:50 +01:00
			`for i, chunk in enumerate(chunks):`
text chunker 2025-09-23 16:00:27 +01:00			`token_count = chunker.get_token_count(chunk)`
			`print(f"\nChunk {i+1}:")`
			`print(f" Text: {chunk}")`
			`print(f" Token count: {token_count}")`
			`print(f" Character count: {len(chunk)}")`

			`if token_count > 77:`
			`print(f" ❌ ERROR: Chunk {i+1} exceeds CLIP's 77 token limit!")`
			`return False`
text chunking 2025-09-23 17:08:52 +01:00			`elif token_count > 60:`
			`print(f" ⚠️ WARNING: Chunk {i+1} is close to the 60 token limit")`
text chunker 2025-09-23 16:00:27 +01:00			`else:`
			`print(f" ✅ Chunk {i+1} is within safe limits")`

			`print("-" * 80)`
			`print("✅ All chunks are within CLIP's token limits!")`
			`return True`
fixed clip warning 2025-09-23 15:30:50 +01:00
			`def test_edge_cases():`
text chunker 2025-09-23 16:00:27 +01:00			`"""Test edge cases for the chunking functionality."""`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunking 2025-09-23 17:08:52 +01:00			`chunker = CLIPTextChunker(max_tokens=60)`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`# Test empty string`
fixed clip warning 2025-09-23 15:30:50 +01:00			`chunks = chunker.chunk_text("")`
text chunker 2025-09-23 16:00:27 +01:00			`assert chunks == [], "Empty string should return empty list"`

			`# Test short string`
			`short_text = "Hello world"`
			`chunks = chunker.chunk_text(short_text)`
			`assert len(chunks) == 1 and chunks[0] == short_text, "Short text should not be chunked"`

			`# Test very long single word (edge case)`
			`long_word = "a" * 200`
			`chunks = chunker.chunk_text(long_word)`
			`# Should handle this gracefully`
fixed clip warning 2025-09-23 15:30:50 +01:00			`for chunk in chunks:`
text chunking 2025-09-23 17:08:52 +01:00			`assert chunker.get_token_count(chunk) <= 60, "Long word chunks should respect token limit"`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`print("✅ Edge case tests passed!")`
			`return True`
fixed clip warning 2025-09-23 15:30:50 +01:00
			`if __name__ == "__main__":`
text chunker 2025-09-23 16:00:27 +01:00			`print("Testing text chunker fixes...")`
			`print("=" * 80)`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`success1 = test_long_prompt_chunking()`
			`success2 = test_edge_cases()`
fixed clip warning 2025-09-23 15:30:50 +01:00
text chunker 2025-09-23 16:00:27 +01:00			`if success1 and success2:`
			`print("\n🎉 All tests passed! The token sequence length issue should be fixed.")`
			`sys.exit(0)`
			`else:`
			`print("\n❌ Some tests failed. The issue may not be fully resolved.")`
fixed clip warning 2025-09-23 15:30:50 +01:00			`sys.exit(1)`