DeepSeek-R1-0528-AWQ / test_tokenizer_direct.py
ehartford's picture
Add files using upload-large-folder tool
f74b683 verified
raw
history blame
2.44 kB
#!/usr/bin/env python3
"""
Direct test of the tokenizer to verify enable_thinking parameter works.
"""
from transformers import AutoTokenizer
MODEL_PATH = "/home/hotaisle/workspace/models/DeepSeek-R1-0528"
print("Testing tokenizer directly with enable_thinking parameter\n")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
messages = [{"role": "user", "content": "What is 2+2?"}]
# Test 1: Default (no enable_thinking)
print("Test 1: Default (no enable_thinking parameter)")
prompt1 = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
print(f"Prompt ends with: {repr(prompt1[-100:])}")
print(f"Contains <think>: {'<think>' in prompt1}")
# Test 2: enable_thinking=True
print("\n\nTest 2: enable_thinking=True")
prompt2 = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=True
)
print(f"Prompt ends with: {repr(prompt2[-100:])}")
print(f"Contains <think>: {'<think>' in prompt2}")
# Test 3: enable_thinking=False
print("\n\nTest 3: enable_thinking=False")
prompt3 = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
print(f"Prompt ends with: {repr(prompt3[-130:])}")
print(f"Contains empty think block: {'<think>\\n\\n</think>\\n\\n' in prompt3}")
# Show the difference
print("\n\nDifference between prompts:")
print("-" * 60)
if prompt1 == prompt2:
print("Default and enable_thinking=True are identical ✓")
if prompt1 != prompt3:
print("enable_thinking=False is different ✓")
# Find where they differ
for i, (c1, c3) in enumerate(zip(prompt1, prompt3)):
if c1 != c3:
print(f"First difference at position {i}:")
print(f" Default: ...{repr(prompt1[i-20:i+50])}")
print(f" False: ...{repr(prompt3[i-20:i+50])}")
break
else:
print("ERROR: enable_thinking=False produces same output as default!")
# Test the actual template string
print("\n\nChecking template directly:")
template = tokenizer.chat_template
if "enable_thinking" in template:
print("✓ Template contains 'enable_thinking' logic")
# Find the exact part
idx = template.find("enable_thinking")
print(f"Found at position {idx}")
print(f"Context: ...{template[idx-50:idx+100]}...")
else:
print("✗ Template does NOT contain 'enable_thinking' logic!")