add Yi-VL-34B model

hiyouga · May 15, 2024 · a388cad · a388cad
1 parent 73845fc
commit a388cad
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -166,7 +166,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | q_proj,v_proj     | -         |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | q_proj,v_proj     | xverse    |
 | [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | q_proj,v_proj     | yi        |
-| [Yi-VL](https://huggingface.co/01-ai)                    | 6B                               | q_proj,v_proj     | yi_vl     |
+| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | q_proj,v_proj     | yi_vl     |
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
 
 > [!NOTE]

diff --git a/README_zh.md b/README_zh.md
@@ -166,7 +166,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
 | [StarCoder2](https://huggingface.co/bigcode)             | 3B/7B/15B                        | q_proj,v_proj     | -         |
 | [XVERSE](https://huggingface.co/xverse)                  | 7B/13B/65B                       | q_proj,v_proj     | xverse    |
 | [Yi (1/1.5)](https://huggingface.co/01-ai)               | 6B/9B/34B                        | q_proj,v_proj     | yi        |
-| [Yi-VL](https://huggingface.co/01-ai)                    | 6B                               | q_proj,v_proj     | yi_vl     |
+| [Yi-VL](https://huggingface.co/01-ai)                    | 6B/34B                           | q_proj,v_proj     | yi_vl     |
 | [Yuan](https://huggingface.co/IEITYuan)                  | 2B/51B/102B                      | q_proj,v_proj     | yuan      |
 
 > [!NOTE]

diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py
@@ -1215,6 +1215,9 @@ def register_model_group(
         "YiVL-6B-Chat": {
             DownloadSource.DEFAULT: "BUAADreamer/Yi-VL-6B-hf",
         },
+        "YiVL-34B-Chat": {
+            DownloadSource.DEFAULT: "BUAADreamer/Yi-VL-34B-hf",
+        },
     },
     template="yi_vl",
     vision=True,

diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py
@@ -78,8 +78,15 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
     patch_tokenizer(tokenizer)
 
     if model_args.visual_inputs:
-        processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
-        setattr(processor, "tokenizer", tokenizer)
+        try:
+            processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
+            setattr(processor, "tokenizer", tokenizer)
+        except Exception:
+            raise ValueError(
+                "This multimodal LLM is not supported.\n"
+                "Download LLaVA-1.5 models from: https://huggingface.co/llava-hf\n"
+                "Download Yi-VL models from: https://huggingface.co/BUAADreamer"
+            )
     else:
         processor = None
 

diff --git a/src/llmtuner/model/utils/visual.py b/src/llmtuner/model/utils/visual.py
@@ -58,7 +58,7 @@ def __init__(self, vision_hidden_size: int, text_hidden_size: int, projector_hid
         self.linear_2 = torch.nn.LayerNorm(text_hidden_size, bias=True)
         self.linear_3 = torch.nn.Linear(text_hidden_size, text_hidden_size, bias=True)
         self.linear_4 = torch.nn.LayerNorm(text_hidden_size, bias=True)
-        self.act = torch.nn.GELU()
+        self.act = ACT2FN[projector_hidden_act]
 
 
 def autocast_projector_dtype(