fixed clip warning

2025-11-17 15:29:41 +00:00 · 2025-09-23 15:30:50 +01:00 · 2025-09-23 15:30:50 +01:00 · 059b13a8b6
commit 059b13a8b6
parent 243963b252
3 changed files with 262 additions and 2 deletions
--- a/comfy_fm_newgen.py
+++ b/comfy_fm_newgen.py
@ -21,6 +21,7 @@ except ImportError:
 from lib.generate_xml import create_config_xml, append_to_config_xml
 from lib.resize_images import resize_images
 from lib.xml_reader import extract_from_values
+from lib.text_chunker import chunk_prompt_for_clip
 # Profile functions are now handled entirely by GUI
 from lib.logging import LOGGING_CONFIG

@ -202,8 +203,19 @@ def generate_prompts_for_players(players, app_config):
                hair=f"{hair_length} {hair_colour} {hair_extra}",
            )
            logging.debug(f"Generated prompt: {prompt}")
-            prompt = f"{player[0]}:{prompt}"
-            prompts.append(prompt)
+
+            # Chunk the prompt if it's too long for CLIP
+            prompt_chunks = chunk_prompt_for_clip(prompt)
+
+            # Create a prompt entry for each chunk
+            for i, chunk in enumerate(prompt_chunks):
+                if len(prompt_chunks) > 1:
+                    # If we have multiple chunks, add a chunk identifier
+                    chunked_prompt = f"{player[0]}_chunk_{i+1}:{chunk}"
+                else:
+                    chunked_prompt = f"{player[0]}:{chunk}"
+                prompts.append(chunked_prompt)
+
        except KeyError as e:
            logging.warning(f"Key error while generating prompt for player: {e}")
    return prompts
--- a/lib/text_chunker.py
+++ b/lib/text_chunker.py
@ -0,0 +1,138 @@
+import re
+from typing import List, Optional
+
+class CLIPTextChunker:
+    """
+    Utility class for chunking text to fit within CLIP's token limits.
+    CLIP models typically have a maximum sequence length of 77 tokens.
+    Using a conservative limit of 60 tokens to account for special tokens.
+    """
+
+    def __init__(self, max_tokens: int = 60):
+        """
+        Initialize the text chunker.
+
+        Args:
+            max_tokens (int): Maximum number of tokens per chunk (default: 60 for CLIP, being conservative)
+        """
+        self.max_tokens = max_tokens
+
+    def estimate_token_count(self, text: str) -> int:
+        """
+        Estimate the number of tokens in a text string.
+        Uses character count as a simple proxy for token count.
+
+        Args:
+            text (str): Input text
+
+        Returns:
+            int: Estimated token count (using character count as proxy)
+        """
+        # Simple approach: use character count as a proxy for token count
+        # This is much more reliable than trying to estimate actual tokens
+        return len(text)
+
+    def chunk_text(self, text: str, preserve_sentences: bool = True) -> List[str]:
+        """
+        Chunk text into smaller pieces that fit within the token limit.
+        Uses character count as a simple and reliable approach.
+
+        Args:
+            text (str): Input text to chunk
+            preserve_sentences (bool): Whether to try to preserve sentence boundaries
+
+        Returns:
+            List[str]: List of text chunks
+        """
+        if not text.strip():
+            return []
+
+        if self.estimate_token_count(text) <= self.max_tokens:
+            return [text]
+
+        chunks = []
+        words = text.split()
+        current_chunk = []
+        current_length = 0
+
+        for word in words:
+            word_with_space = word + " "
+
+            # If adding this word would exceed the limit, start a new chunk
+            if current_length + len(word_with_space) > self.max_tokens and current_chunk:
+                # Join the current chunk and add it
+                chunks.append(" ".join(current_chunk))
+                current_chunk = [word]
+                current_length = len(word_with_space)
+            else:
+                current_chunk.append(word)
+                current_length += len(word_with_space)
+
+        # Add the last chunk if it exists
+        if current_chunk:
+            chunks.append(" ".join(current_chunk))
+
+        return chunks
+
+    def create_priority_chunks(self, text: str, essential_info: List[str]) -> List[str]:
+        """
+        Create chunks with priority given to essential information.
+
+        Args:
+            text (str): Full text to chunk
+            essential_info (List[str]): List of essential phrases that should be preserved
+
+        Returns:
+            List[str]: List of prioritized chunks
+        """
+        # First, try to create chunks that include essential information
+        essential_chunks = []
+
+        for info in essential_info:
+            if info in text:
+                # Create a chunk focused on this essential info
+                info_index = text.find(info)
+                start = max(0, info_index - 50)  # Include some context before
+                end = min(len(text), info_index + len(info) + 50)  # Include some context after
+                context = text[start:end]
+
+                chunk = self.chunk_text(context)[0]  # Take the first (most relevant) chunk
+                if chunk not in essential_chunks:
+                    essential_chunks.append(chunk)
+
+        # If we have too many essential chunks, combine them
+        if len(essential_chunks) > 1:
+            combined = " ".join(essential_chunks)
+            if self.estimate_token_count(combined) <= self.max_tokens:
+                return [combined]
+            else:
+                # Need to reduce the combined chunk
+                return self.chunk_text(combined)
+
+        return essential_chunks if essential_chunks else self.chunk_text(text)
+
+def chunk_prompt_for_clip(prompt: str, max_tokens: int = 60) -> List[str]:
+    """
+    Convenience function to chunk a prompt for CLIP processing.
+    Uses a conservative 60 token limit to be safe.
+
+    Args:
+        prompt (str): The prompt to chunk
+        max_tokens (int): Maximum tokens per chunk (default: 60 for safety)
+
+    Returns:
+        List[str]: List of prompt chunks
+    """
+    chunker = CLIPTextChunker(max_tokens=max_tokens)
+
+    # Define essential information that should be preserved
+    essential_info = [
+        "Ultra-realistic close-up headshot",
+        "male soccer player",
+        "looking at the camera",
+        "facing the camera",
+        "confident expression",
+        "soccer jersey"
+    ]
+
+    return chunker.create_priority_chunks(prompt, essential_info)
--- a/test_text_chunker.py
+++ b/test_text_chunker.py
@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""
+Test script for the CLIP text chunking functionality.
+"""
+
+import sys
+import os
+
+# Add the lib directory to the path so we can import our modules
+sys.path.append(os.path.join(os.path.dirname(__file__), 'lib'))
+
+from text_chunker import CLIPTextChunker, chunk_prompt_for_clip
+
+def test_basic_chunking():
+    """Test basic text chunking functionality."""
+    print("=== Testing Basic Text Chunking ===")
+
+    chunker = CLIPTextChunker(max_tokens=60)  # Using conservative limit
+
+    # Test short text (should not be chunked)
+    short_text = "A simple prompt"
+    chunks = chunker.chunk_text(short_text)
+    print(f"Short text: '{short_text}' -> {len(chunks)} chunks")
+    assert len(chunks) == 1, f"Expected 1 chunk, got {len(chunks)}"
+
+    # Test long text (should be chunked)
+    long_text = "This is a very long text that should definitely exceed the token limit when processed by CLIP. " * 10
+    chunks = chunker.chunk_text(long_text)
+    print(f"Long text -> {len(chunks)} chunks")
+    assert len(chunks) > 1, f"Expected multiple chunks, got {len(chunks)}"
+
+    # Verify each chunk is within token limit
+    for i, chunk in enumerate(chunks):
+        token_count = chunker.estimate_token_count(chunk)
+        print(f"Chunk {i+1}: {token_count} tokens (max: {chunker.max_tokens})")
+        assert token_count <= chunker.max_tokens, f"Chunk {i+1} exceeds token limit: {token_count} > {chunker.max_tokens}"
+
+    print("✓ Basic chunking test passed\n")
+
+def test_prompt_chunking():
+    """Test chunking with actual prompts similar to the app."""
+    print("=== Testing Prompt Chunking ===")
+
+    # Simulate a long prompt like the one from app_config.json
+    long_prompt = "Ultra-realistic close-up headshot of a Fair skinned male soccer player with a plain background looking at the camera with his whole head in shot. The player is twenty-five years old, from United Kingdom, with clean-shaven and curly hair. He is facing the camera with a confident expression, wearing a soccer jersey. The lighting is natural and soft, emphasizing facial features and skin texture"
+
+    chunks = chunk_prompt_for_clip(long_prompt)
+    print(f"Long prompt -> {len(chunks)} chunks")
+
+    for i, chunk in enumerate(chunks):
+        print(f"Chunk {i+1}: {chunk[:100]}...")
+
+    print("✓ Prompt chunking test passed\n")
+
+def test_priority_chunking():
+    """Test priority-based chunking."""
+    print("=== Testing Priority Chunking ===")
+
+    chunker = CLIPTextChunker(max_tokens=50)  # Smaller limit for testing
+
+    text = "This is a long text with important information about soccer players and their characteristics. The most important part is that they are professional athletes."
+
+    essential_info = ["soccer players", "professional athletes", "important information"]
+
+    chunks = chunker.create_priority_chunks(text, essential_info)
+    print(f"Priority chunks -> {len(chunks)} chunks")
+
+    for i, chunk in enumerate(chunks):
+        print(f"Priority chunk {i+1}: {chunk}")
+
+    print("✓ Priority chunking test passed\n")
+
+def test_edge_cases():
+    """Test edge cases."""
+    print("=== Testing Edge Cases ===")
+
+    chunker = CLIPTextChunker(max_tokens=60)
+
+    # Test empty text
+    chunks = chunker.chunk_text("")
+    assert len(chunks) == 0, "Empty text should return no chunks"
+
+    # Test text exactly at limit
+    exact_text = "A" * 60  # Text exactly at the character limit
+    chunks = chunker.chunk_text(exact_text)
+    # Should return the text as-is since it's exactly at the limit
+    assert len(chunks) == 1, f"Expected 1 chunk for text at limit, got {len(chunks)}"
+    assert chunks[0] == exact_text, "Text at limit should be returned unchanged"
+
+    # Test text that exceeds limit (with spaces so it can be split)
+    long_text = "This is a very long text that should definitely exceed the character limit when processed. " * 3  # Text that exceeds the limit
+    chunks = chunker.chunk_text(long_text)
+    assert len(chunks) > 1, f"Expected multiple chunks for long text, got {len(chunks)}"
+    for chunk in chunks:
+        assert chunker.estimate_token_count(chunk) <= chunker.max_tokens, f"Chunk exceeds limit: {len(chunk)} > {chunker.max_tokens}"
+
+    print("✓ Edge cases test passed\n")
+
+if __name__ == "__main__":
+    try:
+        test_basic_chunking()
+        test_prompt_chunking()
+        test_priority_chunking()
+        test_edge_cases()
+
+        print("🎉 All tests passed! Text chunking functionality is working correctly.")
+
+    except Exception as e:
+        print(f"❌ Test failed: {e}")
+        sys.exit(1)