comfy_fm24_newgens/test_text_chunker.py

#!/usr/bin/env python3
"""
Test script to verify that the text chunker fixes the token sequence length issues.
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from lib.text_chunker import chunk_prompt_for_clip, CLIPTextChunker

def test_long_prompt_chunking():
    """Test that long prompts are properly chunked within CLIP token limits."""

    # Create a sample long prompt similar to what the app generates
    test_prompt = "Ultra-realistic close-up headshot of a Medium Brown skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and Medium Length Brown curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"

    print(f"Original prompt length: {len(test_prompt)} characters")
    print(f"Original prompt: {test_prompt}")
    print("-" * 80)

    # Test the chunking
    chunker = CLIPTextChunker(max_tokens=60)
    chunks = chunk_prompt_for_clip(test_prompt)

    print(f"Number of chunks: {len(chunks)}")

    for i, chunk in enumerate(chunks):
        token_count = chunker.get_token_count(chunk)
        print(f"\nChunk {i+1}:")
        print(f"  Text: {chunk}")
        print(f"  Token count: {token_count}")
        print(f"  Character count: {len(chunk)}")

        if token_count > 77:
            print(f"  ❌ ERROR: Chunk {i+1} exceeds CLIP's 77 token limit!")
            return False
        elif token_count > 60:
            print(f"  ⚠️  WARNING: Chunk {i+1} is close to the 60 token limit")
        else:
            print(f"  ✅ Chunk {i+1} is within safe limits")

    print("-" * 80)
    print("✅ All chunks are within CLIP's token limits!")
    return True

def test_edge_cases():
    """Test edge cases for the chunking functionality."""

    chunker = CLIPTextChunker(max_tokens=60)

    # Test empty string
    chunks = chunker.chunk_text("")
    assert chunks == [], "Empty string should return empty list"

    # Test short string
    short_text = "Hello world"
    chunks = chunker.chunk_text(short_text)
    assert len(chunks) == 1 and chunks[0] == short_text, "Short text should not be chunked"

    # Test very long single word (edge case)
    long_word = "a" * 200
    chunks = chunker.chunk_text(long_word)
    # Should handle this gracefully
    for chunk in chunks:
        assert chunker.get_token_count(chunk) <= 60, "Long word chunks should respect token limit"

    print("✅ Edge case tests passed!")
    return True

if __name__ == "__main__":
    print("Testing text chunker fixes...")
    print("=" * 80)

    success1 = test_long_prompt_chunking()
    success2 = test_edge_cases()

    if success1 and success2:
        print("\n🎉 All tests passed! The token sequence length issue should be fixed.")
        sys.exit(0)
    else:
        print("\n❌ Some tests failed. The issue may not be fully resolved.")
        sys.exit(1)