comfy_fm24_newgens/test_text_chunker.py

#!/usr/bin/env python3
"""
Test script for the CLIP text chunking functionality.
"""

import sys
import os

# Add the lib directory to the path so we can import our modules
sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))

from text_chunker import CLIPTextChunker, chunk_prompt_for_clip

def test_basic_chunking():
    """Test basic text chunking functionality."""
    print("=== Testing Basic Text Chunking ===")

    chunker = CLIPTextChunker(max_tokens=60)  # Using conservative limit

    # Test short text (should not be chunked)
    short_text = "A simple prompt"
    chunks = chunker.chunk_text(short_text)
    print(f"Short text: '{short_text}' -> {len(chunks)} chunks")
    assert len(chunks) == 1, f"Expected 1 chunk, got {len(chunks)}"

    # Test long text (should be chunked)
    long_text = "This is a very long text that should definitely exceed the token limit when processed by CLIP. " * 10
    chunks = chunker.chunk_text(long_text)
    print(f"Long text -> {len(chunks)} chunks")
    assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}"

    # Verify each chunk is within token limit
    for i, chunk in enumerate(chunks):
        token_count = chunker.estimate_token_count(chunk)
        print(f"Chunk {i+1}: {token_count} tokens (max: {chunker.max_tokens})")
        assert token_count <= chunker.max_tokens, f"Chunk {i+1} exceeds token limit: {token_count} > {chunker.max_tokens}"

    print("✓ Basic chunking test passed\n")

def test_prompt_chunking():
    """Test chunking with actual prompts similar to the app."""
    print("=== Testing Prompt Chunking ===")

    # Simulate a long prompt like the one from app_config.json
    long_prompt = "Ultra-realistic close-up headshot of a Fair skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"

    chunks = chunk_prompt_for_clip(long_prompt)
    print(f"Long prompt -> {len(chunks)} chunks")

    for i, chunk in enumerate(chunks):
        print(f"Chunk {i+1}: {chunk[:100]}...")

    print("✓ Prompt chunking test passed\n")

def test_priority_chunking():
    """Test priority-based chunking."""
    print("=== Testing Priority Chunking ===")

    chunker = CLIPTextChunker(max_tokens=50)  # Smaller limit for testing

    text = "This is a long text with important information about soccer players and their characteristics. The most important part is that they are professional athletes."

    essential_info = ["soccer players", "professional athletes", "important information"]

    chunks = chunker.create_priority_chunks(text, essential_info)
    print(f"Priority chunks -> {len(chunks)} chunks")

    for i, chunk in enumerate(chunks):
        print(f"Priority chunk {i+1}: {chunk}")

    print("✓ Priority chunking test passed\n")

def test_edge_cases():
    """Test edge cases."""
    print("=== Testing Edge Cases ===")

    chunker = CLIPTextChunker(max_tokens=60)

    # Test empty text
    chunks = chunker.chunk_text("")
    assert len(chunks) == 0, "Empty text should return no chunks"

    # Test text exactly at limit
    exact_text = "A" * 60  # Text exactly at the character limit
    chunks = chunker.chunk_text(exact_text)
    # Should return the text as-is since it's exactly at the limit
    assert len(chunks) == 1, f"Expected 1 chunk for text at limit, got {len(chunks)}"
    assert chunks[0] == exact_text, "Text at limit should be returned unchanged"

    # Test text that exceeds limit (with spaces so it can be split)
    long_text = "This is a very long text that should definitely exceed the character limit when processed. " * 3  # Text that exceeds the limit
    chunks = chunker.chunk_text(long_text)
    assert len(chunks) > 1, f"Expected multiple chunks for long text, got {len(chunks)}"
    for chunk in chunks:
        assert chunker.estimate_token_count(chunk) <= chunker.max_tokens, f"Chunk exceeds limit: {len(chunk)} > {chunker.max_tokens}"

    print("✓ Edge cases test passed\n")

if __name__ == "__main__":
    try:
        test_basic_chunking()
        test_prompt_chunking()
        test_priority_chunking()
        test_edge_cases()

        print("🎉 All tests passed! Text chunking functionality is working correctly.")

    except Exception as e:
        print(f"❌ Test failed: {e}")
        sys.exit(1)
fixed clip warning 2025-09-23 15:30:50 +01:00			`#!/usr/bin/env python3`
			`"""`
			`Test script for the CLIP text chunking functionality.`
			`"""`

			`import sys`
			`import os`

			`# Add the lib directory to the path so we can import our modules`
			`sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))`

			`from text_chunker import CLIPTextChunker, chunk_prompt_for_clip`

			`def test_basic_chunking():`
			`"""Test basic text chunking functionality."""`
			`print("=== Testing Basic Text Chunking ===")`

			`chunker = CLIPTextChunker(max_tokens=60) # Using conservative limit`

			`# Test short text (should not be chunked)`
			`short_text = "A simple prompt"`
			`chunks = chunker.chunk_text(short_text)`
			`print(f"Short text: '{short_text}' -> {len(chunks)} chunks")`
			`assert len(chunks) == 1, f"Expected 1 chunk, got {len(chunks)}"`

			`# Test long text (should be chunked)`
			`long_text = "This is a very long text that should definitely exceed the token limit when processed by CLIP. " * 10`
			`chunks = chunker.chunk_text(long_text)`
			`print(f"Long text -> {len(chunks)} chunks")`
			`assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}"`

			`# Verify each chunk is within token limit`
			`for i, chunk in enumerate(chunks):`
			`token_count = chunker.estimate_token_count(chunk)`
			`print(f"Chunk {i+1}: {token_count} tokens (max: {chunker.max_tokens})")`
			`assert token_count <= chunker.max_tokens, f"Chunk {i+1} exceeds token limit: {token_count} > {chunker.max_tokens}"`

			`print("✓ Basic chunking test passed\n")`

			`def test_prompt_chunking():`
			`"""Test chunking with actual prompts similar to the app."""`
			`print("=== Testing Prompt Chunking ===")`

			`# Simulate a long prompt like the one from app_config.json`
			`long_prompt = "Ultra-realistic close-up headshot of a Fair skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"`

			`chunks = chunk_prompt_for_clip(long_prompt)`
			`print(f"Long prompt -> {len(chunks)} chunks")`

			`for i, chunk in enumerate(chunks):`
			`print(f"Chunk {i+1}: {chunk[:100]}...")`

			`print("✓ Prompt chunking test passed\n")`

			`def test_priority_chunking():`
			`"""Test priority-based chunking."""`
			`print("=== Testing Priority Chunking ===")`

			`chunker = CLIPTextChunker(max_tokens=50) # Smaller limit for testing`

			`text = "This is a long text with important information about soccer players and their characteristics. The most important part is that they are professional athletes."`

			`essential_info = ["soccer players", "professional athletes", "important information"]`

			`chunks = chunker.create_priority_chunks(text, essential_info)`
			`print(f"Priority chunks -> {len(chunks)} chunks")`

			`for i, chunk in enumerate(chunks):`
			`print(f"Priority chunk {i+1}: {chunk}")`

			`print("✓ Priority chunking test passed\n")`

			`def test_edge_cases():`
			`"""Test edge cases."""`
			`print("=== Testing Edge Cases ===")`

			`chunker = CLIPTextChunker(max_tokens=60)`

			`# Test empty text`
			`chunks = chunker.chunk_text("")`
			`assert len(chunks) == 0, "Empty text should return no chunks"`

			`# Test text exactly at limit`
			`exact_text = "A" * 60 # Text exactly at the character limit`
			`chunks = chunker.chunk_text(exact_text)`
			`# Should return the text as-is since it's exactly at the limit`
			`assert len(chunks) == 1, f"Expected 1 chunk for text at limit, got {len(chunks)}"`
			`assert chunks[0] == exact_text, "Text at limit should be returned unchanged"`

			`# Test text that exceeds limit (with spaces so it can be split)`
			`long_text = "This is a very long text that should definitely exceed the character limit when processed. " * 3 # Text that exceeds the limit`
			`chunks = chunker.chunk_text(long_text)`
			`assert len(chunks) > 1, f"Expected multiple chunks for long text, got {len(chunks)}"`
			`for chunk in chunks:`
			`assert chunker.estimate_token_count(chunk) <= chunker.max_tokens, f"Chunk exceeds limit: {len(chunk)} > {chunker.max_tokens}"`

			`print("✓ Edge cases test passed\n")`

			`if __name__ == "__main__":`
			`try:`
			`test_basic_chunking()`
			`test_prompt_chunking()`
			`test_priority_chunking()`
			`test_edge_cases()`

			`print("🎉 All tests passed! Text chunking functionality is working correctly.")`

			`except Exception as e:`
			`print(f"❌ Test failed: {e}")`
			`sys.exit(1)`