Update README.md
Browse files
README.md
CHANGED
@@ -597,10 +597,10 @@ We conducted a comprehensive evaluation of Qwen2.5-Omni, which demonstrates stro
|
|
597 |
|
598 |
## Quickstart
|
599 |
|
600 |
-
Below, we provide simple examples to show how to use Qwen2.5-Omni with 🤗 Transformers. The codes of Qwen2.5-Omni
|
601 |
```
|
602 |
pip uninstall transformers
|
603 |
-
pip install git+https://github.com/
|
604 |
pip install accelerate
|
605 |
```
|
606 |
or you might encounter the following error:
|
@@ -644,7 +644,9 @@ processor = Qwen2_5OmniProcessor.from_pretrained("Qwen/Qwen2.5-Omni-7B")
|
|
644 |
conversation = [
|
645 |
{
|
646 |
"role": "system",
|
647 |
-
"content":
|
|
|
|
|
648 |
},
|
649 |
{
|
650 |
"role": "user",
|
@@ -710,7 +712,9 @@ The model can batch inputs composed of mixed samples of various types such as te
|
|
710 |
conversation1 = [
|
711 |
{
|
712 |
"role": "system",
|
713 |
-
"content":
|
|
|
|
|
714 |
},
|
715 |
{
|
716 |
"role": "user",
|
@@ -724,7 +728,9 @@ conversation1 = [
|
|
724 |
conversation2 = [
|
725 |
{
|
726 |
"role": "system",
|
727 |
-
"content":
|
|
|
|
|
728 |
},
|
729 |
{
|
730 |
"role": "user",
|
@@ -738,7 +744,9 @@ conversation2 = [
|
|
738 |
conversation3 = [
|
739 |
{
|
740 |
"role": "system",
|
741 |
-
"content":
|
|
|
|
|
742 |
},
|
743 |
{
|
744 |
"role": "user",
|
@@ -751,7 +759,9 @@ conversation3 = [
|
|
751 |
conversation4 = [
|
752 |
{
|
753 |
"role": "system",
|
754 |
-
"content":
|
|
|
|
|
755 |
},
|
756 |
{
|
757 |
"role": "user",
|
@@ -791,7 +801,9 @@ If users need audio output, the system prompt must be set as "You are Qwen, a vi
|
|
791 |
```
|
792 |
{
|
793 |
"role": "system",
|
794 |
-
"content":
|
|
|
|
|
795 |
}
|
796 |
```
|
797 |
#### Use audio in video
|
|
|
597 |
|
598 |
## Quickstart
|
599 |
|
600 |
+
Below, we provide simple examples to show how to use Qwen2.5-Omni with 🤗 Transformers. The codes of Qwen2.5-Omni has been in the latest Hugging face transformers and we advise you to build from source with command:
|
601 |
```
|
602 |
pip uninstall transformers
|
603 |
+
pip install git+https://github.com/huggingface/transformers
|
604 |
pip install accelerate
|
605 |
```
|
606 |
or you might encounter the following error:
|
|
|
644 |
conversation = [
|
645 |
{
|
646 |
"role": "system",
|
647 |
+
"content": [
|
648 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
649 |
+
],
|
650 |
},
|
651 |
{
|
652 |
"role": "user",
|
|
|
712 |
conversation1 = [
|
713 |
{
|
714 |
"role": "system",
|
715 |
+
"content": [
|
716 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
717 |
+
],
|
718 |
},
|
719 |
{
|
720 |
"role": "user",
|
|
|
728 |
conversation2 = [
|
729 |
{
|
730 |
"role": "system",
|
731 |
+
"content": [
|
732 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
733 |
+
],
|
734 |
},
|
735 |
{
|
736 |
"role": "user",
|
|
|
744 |
conversation3 = [
|
745 |
{
|
746 |
"role": "system",
|
747 |
+
"content": [
|
748 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
749 |
+
],
|
750 |
},
|
751 |
{
|
752 |
"role": "user",
|
|
|
759 |
conversation4 = [
|
760 |
{
|
761 |
"role": "system",
|
762 |
+
"content": [
|
763 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
764 |
+
],
|
765 |
},
|
766 |
{
|
767 |
"role": "user",
|
|
|
801 |
```
|
802 |
{
|
803 |
"role": "system",
|
804 |
+
"content": [
|
805 |
+
{"type": "text", "text": "You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech."}
|
806 |
+
],
|
807 |
}
|
808 |
```
|
809 |
#### Use audio in video
|