|
from transformers import AutoModelForCausalLM |
|
|
|
from modeling_gemma_3_omni import Gemma3OmniForConditionalGeneration |
|
|
|
phi_audio_encoder = AutoModelForCausalLM.from_pretrained("microsoft/Phi-4-multimodal-instruct", trust_remote_code=True) |
|
phi_state_dict = phi_audio_encoder.model.embed_tokens_extend.audio_embed.encoder.state_dict() |
|
model = Gemma3OmniForConditionalGeneration.from_pretrained("google/gemma-3-4b-it") |
|
model.eval() |
|
model.audio_projector.encoder.load_state_dict(phi_state_dict, strict=False) |
|
model.push_to_hub('voidful/gemma-3-omni-4b-it') |
|
|