|
#!/bin/bash |
|
|
|
cd llama.cpp |
|
python3 convert_hf_to_gguf.py ../../osmosis-mcp-4b --outtype bf16 |
|
cd .. |
|
|
|
|
|
INPUT_MODEL="osmosis-mcp-4B-BF16.gguf" |
|
|
|
|
|
QUANT_FORMATS=( |
|
"Q4_K_S" |
|
"Q5_K_M" |
|
"Q5_K_S" |
|
"Q6_K" |
|
"IQ4_XS" |
|
"Q8_0" |
|
"Q2_K" |
|
"Q3_K_L" |
|
"Q3_K_M" |
|
"Q3_K_S" |
|
"Q4_K_M" |
|
) |
|
|
|
|
|
if [ ! -f "$INPUT_MODEL" ]; then |
|
echo "Error: Input model file $INPUT_MODEL not found." |
|
exit 1 |
|
fi |
|
|
|
|
|
QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize" |
|
|
|
|
|
if [ ! -f "$QUANTIZE_TOOL" ]; then |
|
echo "Error: Quantize tool not found at $QUANTIZE_TOOL" |
|
exit 1 |
|
fi |
|
|
|
|
|
for format in "${QUANT_FORMATS[@]}"; do |
|
echo "------------------------------------------------------" |
|
echo "Starting quantization: $format" |
|
echo "------------------------------------------------------" |
|
|
|
|
|
OUTPUT_MODEL="osmosis-mcp-4b.${format}.gguf" |
|
|
|
|
|
if [ -f "$OUTPUT_MODEL" ]; then |
|
echo "Model $OUTPUT_MODEL already exists. Skipping..." |
|
continue |
|
fi |
|
|
|
|
|
echo "Quantizing to $format..." |
|
"$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format" |
|
|
|
|
|
if [ $? -eq 0 ]; then |
|
echo "Successfully created $OUTPUT_MODEL" |
|
else |
|
echo "Failed to create $OUTPUT_MODEL" |
|
fi |
|
|
|
echo "" |
|
done |
|
|
|
echo "All quantizations completed!" |
|
echo "Generated models:" |
|
ls -lah osmosis-mcp-4b.*.gguf |