#!/bin/bash cd llama.cpp python3 convert_hf_to_gguf.py ../../osmosis-mcp-4b --outtype bf16 cd .. # Input model file INPUT_MODEL="osmosis-mcp-4B-BF16.gguf" # Define quantization formats to generate QUANT_FORMATS=( "Q4_K_S" "Q5_K_M" "Q5_K_S" "Q6_K" "IQ4_XS" "Q8_0" "Q2_K" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q4_K_M" ) # Check if input model exists if [ ! -f "$INPUT_MODEL" ]; then echo "Error: Input model file $INPUT_MODEL not found." exit 1 fi # Path to llama-quantize tool QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize" # Check if quantize tool exists if [ ! -f "$QUANTIZE_TOOL" ]; then echo "Error: Quantize tool not found at $QUANTIZE_TOOL" exit 1 fi # Process each quantization format for format in "${QUANT_FORMATS[@]}"; do echo "------------------------------------------------------" echo "Starting quantization: $format" echo "------------------------------------------------------" # Define output filename with the exact format requested OUTPUT_MODEL="osmosis-mcp-4b.${format}.gguf" # Check if output model already exists if [ -f "$OUTPUT_MODEL" ]; then echo "Model $OUTPUT_MODEL already exists. Skipping..." continue fi # Run quantization echo "Quantizing to $format..." "$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format" # Check if quantization was successful if [ $? -eq 0 ]; then echo "Successfully created $OUTPUT_MODEL" else echo "Failed to create $OUTPUT_MODEL" fi echo "" done echo "All quantizations completed!" echo "Generated models:" ls -lah osmosis-mcp-4b.*.gguf