qwen-1_8b |
qwen/Qwen-1_8B |
c_attn |
default-generation |
✔ |
✔ |
|
- |
Qwen/Qwen-1_8B |
qwen-1_8b-chat |
qwen/Qwen-1_8B-Chat |
c_attn |
qwen |
✔ |
✔ |
|
- |
Qwen/Qwen-1_8B-Chat |
qwen-1_8b-chat-int4 |
qwen/Qwen-1_8B-Chat-Int4 |
c_attn |
qwen |
✔ |
✔ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-1_8B-Chat-Int4 |
qwen-1_8b-chat-int8 |
qwen/Qwen-1_8B-Chat-Int8 |
c_attn |
qwen |
✔ |
✘ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-1_8B-Chat-Int8 |
qwen-7b |
qwen/Qwen-7B |
c_attn |
default-generation |
✔ |
✔ |
|
- |
Qwen/Qwen-7B |
qwen-7b-chat |
qwen/Qwen-7B-Chat |
c_attn |
qwen |
✔ |
✔ |
|
- |
Qwen/Qwen-7B-Chat |
qwen-7b-chat-int4 |
qwen/Qwen-7B-Chat-Int4 |
c_attn |
qwen |
✔ |
✔ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-7B-Chat-Int4 |
qwen-7b-chat-int8 |
qwen/Qwen-7B-Chat-Int8 |
c_attn |
qwen |
✔ |
✘ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-7B-Chat-Int8 |
qwen-14b |
qwen/Qwen-14B |
c_attn |
default-generation |
✔ |
✔ |
|
- |
Qwen/Qwen-14B |
qwen-14b-chat |
qwen/Qwen-14B-Chat |
c_attn |
qwen |
✔ |
✔ |
|
- |
Qwen/Qwen-14B-Chat |
qwen-14b-chat-int4 |
qwen/Qwen-14B-Chat-Int4 |
c_attn |
qwen |
✔ |
✔ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-14B-Chat-Int4 |
qwen-14b-chat-int8 |
qwen/Qwen-14B-Chat-Int8 |
c_attn |
qwen |
✔ |
✘ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-14B-Chat-Int8 |
qwen-72b |
qwen/Qwen-72B |
c_attn |
default-generation |
✔ |
✔ |
|
- |
Qwen/Qwen-72B |
qwen-72b-chat |
qwen/Qwen-72B-Chat |
c_attn |
qwen |
✔ |
✔ |
|
- |
Qwen/Qwen-72B-Chat |
qwen-72b-chat-int4 |
qwen/Qwen-72B-Chat-Int4 |
c_attn |
qwen |
✔ |
✔ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-72B-Chat-Int4 |
qwen-72b-chat-int8 |
qwen/Qwen-72B-Chat-Int8 |
c_attn |
qwen |
✔ |
✘ |
auto_gptq>=0.5 |
- |
Qwen/Qwen-72B-Chat-Int8 |
modelscope-agent-7b |
iic/ModelScope-Agent-7B |
c_attn |
modelscope-agent |
✔ |
✘ |
|
- |
- |
modelscope-agent-14b |
iic/ModelScope-Agent-14B |
c_attn |
modelscope-agent |
✔ |
✘ |
|
- |
- |
qwen1half-0_5b |
qwen/Qwen1.5-0.5B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-0.5B |
qwen1half-1_8b |
qwen/Qwen1.5-1.8B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-1.8B |
qwen1half-4b |
qwen/Qwen1.5-4B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-4B |
qwen1half-7b |
qwen/Qwen1.5-7B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-7B |
qwen1half-14b |
qwen/Qwen1.5-14B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-14B |
qwen1half-32b |
qwen/Qwen1.5-32B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-32B |
qwen1half-72b |
qwen/Qwen1.5-72B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-72B |
qwen1half-110b |
qwen/Qwen1.5-110B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-110B |
codeqwen1half-7b |
qwen/CodeQwen1.5-7B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/CodeQwen1.5-7B |
qwen1half-moe-a2_7b |
qwen/Qwen1.5-MoE-A2.7B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.40 |
- |
Qwen/Qwen1.5-MoE-A2.7B |
qwen1half-0_5b-chat |
qwen/Qwen1.5-0.5B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-0.5B-Chat |
qwen1half-1_8b-chat |
qwen/Qwen1.5-1.8B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-1.8B-Chat |
qwen1half-4b-chat |
qwen/Qwen1.5-4B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-4B-Chat |
qwen1half-7b-chat |
qwen/Qwen1.5-7B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-7B-Chat |
qwen1half-14b-chat |
qwen/Qwen1.5-14B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-14B-Chat |
qwen1half-32b-chat |
qwen/Qwen1.5-32B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-32B-Chat |
qwen1half-72b-chat |
qwen/Qwen1.5-72B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-72B-Chat |
qwen1half-110b-chat |
qwen/Qwen1.5-110B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/Qwen1.5-110B-Chat |
qwen1half-moe-a2_7b-chat |
qwen/Qwen1.5-MoE-A2.7B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.40 |
- |
Qwen/Qwen1.5-MoE-A2.7B-Chat |
codeqwen1half-7b-chat |
qwen/CodeQwen1.5-7B-Chat |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37 |
- |
Qwen/CodeQwen1.5-7B-Chat |
qwen1half-0_5b-chat-int4 |
qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4 |
qwen1half-1_8b-chat-int4 |
qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4 |
qwen1half-4b-chat-int4 |
qwen/Qwen1.5-4B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-4B-Chat-GPTQ-Int4 |
qwen1half-7b-chat-int4 |
qwen/Qwen1.5-7B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-7B-Chat-GPTQ-Int4 |
qwen1half-14b-chat-int4 |
qwen/Qwen1.5-14B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-14B-Chat-GPTQ-Int4 |
qwen1half-32b-chat-int4 |
qwen/Qwen1.5-32B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-32B-Chat-GPTQ-Int4 |
qwen1half-72b-chat-int4 |
qwen/Qwen1.5-72B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-72B-Chat-GPTQ-Int4 |
qwen1half-110b-chat-int4 |
qwen/Qwen1.5-110B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-110B-Chat-GPTQ-Int4 |
qwen1half-0_5b-chat-int8 |
qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8 |
qwen1half-1_8b-chat-int8 |
qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8 |
qwen1half-4b-chat-int8 |
qwen/Qwen1.5-4B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-4B-Chat-GPTQ-Int8 |
qwen1half-7b-chat-int8 |
qwen/Qwen1.5-7B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-7B-Chat-GPTQ-Int8 |
qwen1half-14b-chat-int8 |
qwen/Qwen1.5-14B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-14B-Chat-GPTQ-Int8 |
qwen1half-72b-chat-int8 |
qwen/Qwen1.5-72B-Chat-GPTQ-Int8 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.37 |
- |
Qwen/Qwen1.5-72B-Chat-GPTQ-Int8 |
qwen1half-moe-a2_7b-chat-int4 |
qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4 |
q_proj, k_proj, v_proj |
qwen |
✔ |
✘ |
auto_gptq>=0.5, transformers>=4.40 |
- |
Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4 |
qwen1half-0_5b-chat-awq |
qwen/Qwen1.5-0.5B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-0.5B-Chat-AWQ |
qwen1half-1_8b-chat-awq |
qwen/Qwen1.5-1.8B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-1.8B-Chat-AWQ |
qwen1half-4b-chat-awq |
qwen/Qwen1.5-4B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-4B-Chat-AWQ |
qwen1half-7b-chat-awq |
qwen/Qwen1.5-7B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-7B-Chat-AWQ |
qwen1half-14b-chat-awq |
qwen/Qwen1.5-14B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-14B-Chat-AWQ |
qwen1half-32b-chat-awq |
qwen/Qwen1.5-32B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-32B-Chat-AWQ |
qwen1half-72b-chat-awq |
qwen/Qwen1.5-72B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-72B-Chat-AWQ |
qwen1half-110b-chat-awq |
qwen/Qwen1.5-110B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/Qwen1.5-110B-Chat-AWQ |
codeqwen1half-7b-chat-awq |
qwen/CodeQwen1.5-7B-Chat-AWQ |
q_proj, k_proj, v_proj |
qwen |
✔ |
✔ |
transformers>=4.37, autoawq |
- |
Qwen/CodeQwen1.5-7B-Chat-AWQ |
qwen-vl |
qwen/Qwen-VL |
c_attn |
default-generation |
✔ |
✘ |
|
multi-modal, vision |
Qwen/Qwen-VL |
qwen-vl-chat |
qwen/Qwen-VL-Chat |
c_attn |
qwen |
✔ |
✘ |
|
multi-modal, vision |
Qwen/Qwen-VL-Chat |
qwen-vl-chat-int4 |
qwen/Qwen-VL-Chat-Int4 |
c_attn |
qwen |
✔ |
✘ |
auto_gptq>=0.5 |
multi-modal, vision |
Qwen/Qwen-VL-Chat-Int4 |
qwen-audio |
qwen/Qwen-Audio |
c_attn |
qwen-audio-generation |
✔ |
✘ |
|
multi-modal, audio |
Qwen/Qwen-Audio |
qwen-audio-chat |
qwen/Qwen-Audio-Chat |
c_attn |
qwen-audio |
✔ |
✘ |
|
multi-modal, audio |
Qwen/Qwen-Audio-Chat |
chatglm2-6b |
ZhipuAI/chatglm2-6b |
query_key_value |
chatglm2 |
✘ |
✔ |
|
- |
THUDM/chatglm2-6b |
chatglm2-6b-32k |
ZhipuAI/chatglm2-6b-32k |
query_key_value |
chatglm2 |
✘ |
✔ |
|
- |
THUDM/chatglm2-6b-32k |
chatglm3-6b-base |
ZhipuAI/chatglm3-6b-base |
query_key_value |
chatglm-generation |
✘ |
✔ |
|
- |
THUDM/chatglm3-6b-base |
chatglm3-6b |
ZhipuAI/chatglm3-6b |
query_key_value |
chatglm3 |
✘ |
✔ |
|
- |
THUDM/chatglm3-6b |
chatglm3-6b-32k |
ZhipuAI/chatglm3-6b-32k |
query_key_value |
chatglm3 |
✘ |
✔ |
|
- |
THUDM/chatglm3-6b-32k |
chatglm3-6b-128k |
ZhipuAI/chatglm3-6b-128k |
query_key_value |
chatglm3 |
✘ |
✔ |
|
- |
THUDM/chatglm3-6b-128k |
codegeex2-6b |
ZhipuAI/codegeex2-6b |
query_key_value |
chatglm-generation |
✘ |
✔ |
transformers<4.34 |
coding |
THUDM/codegeex2-6b |
llama2-7b |
modelscope/Llama-2-7b-ms |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
meta-llama/Llama-2-7b-hf |
llama2-7b-chat |
modelscope/Llama-2-7b-chat-ms |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
meta-llama/Llama-2-7b-chat-hf |
llama2-13b |
modelscope/Llama-2-13b-ms |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
meta-llama/Llama-2-13b-hf |
llama2-13b-chat |
modelscope/Llama-2-13b-chat-ms |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
meta-llama/Llama-2-13b-chat-hf |
llama2-70b |
modelscope/Llama-2-70b-ms |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
meta-llama/Llama-2-70b-hf |
llama2-70b-chat |
modelscope/Llama-2-70b-chat-ms |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
meta-llama/Llama-2-70b-chat-hf |
llama2-7b-aqlm-2bit-1x16 |
AI-ModelScope/Llama-2-7b-AQLM-2Bit-1x16-hf |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✘ |
transformers>=4.38, aqlm, torch>=2.2.0 |
- |
ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf |
llama3-8b |
LLM-Research/Meta-Llama-3-8B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
meta-llama/Meta-Llama-3-8B |
llama3-8b-instruct |
LLM-Research/Meta-Llama-3-8B-Instruct |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
|
- |
meta-llama/Meta-Llama-3-8B-Instruct |
llama3-8b-instruct-int4 |
huangjintao/Meta-Llama-3-8B-Instruct-GPTQ-Int4 |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
auto_gptq |
- |
study-hjt/Meta-Llama-3-8B-Instruct-GPTQ-Int4 |
llama3-8b-instruct-int8 |
huangjintao/Meta-Llama-3-8B-Instruct-GPTQ-Int8 |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
auto_gptq |
- |
study-hjt/Meta-Llama-3-8B-Instruct-GPTQ-Int8 |
llama3-8b-instruct-awq |
huangjintao/Meta-Llama-3-8B-Instruct-AWQ |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
autoawq |
- |
study-hjt/Meta-Llama-3-8B-Instruct-AWQ |
llama3-70b |
LLM-Research/Meta-Llama-3-70B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
meta-llama/Meta-Llama-3-70B |
llama3-70b-instruct |
LLM-Research/Meta-Llama-3-70B-Instruct |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
|
- |
meta-llama/Meta-Llama-3-70B-Instruct |
llama3-70b-instruct-int4 |
huangjintao/Meta-Llama-3-70B-Instruct-GPTQ-Int4 |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
auto_gptq |
- |
study-hjt/Meta-Llama-3-70B-Instruct-GPTQ-Int4 |
llama3-70b-instruct-int8 |
huangjintao/Meta-Llama-3-70b-Instruct-GPTQ-Int8 |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
auto_gptq |
- |
study-hjt/Meta-Llama-3-70B-Instruct-GPTQ-Int8 |
llama3-70b-instruct-awq |
huangjintao/Meta-Llama-3-70B-Instruct-AWQ |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
autoawq |
- |
study-hjt/Meta-Llama-3-70B-Instruct-AWQ |
chinese-llama-2-1_3b |
AI-ModelScope/chinese-llama-2-1.3b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-1.3b |
chinese-llama-2-7b |
AI-ModelScope/chinese-llama-2-7b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-7b |
chinese-llama-2-7b-16k |
AI-ModelScope/chinese-llama-2-7b-16k |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-7b-16k |
chinese-llama-2-7b-64k |
AI-ModelScope/chinese-llama-2-7b-64k |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-7b-64k |
chinese-llama-2-13b |
AI-ModelScope/chinese-llama-2-13b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-13b |
chinese-llama-2-13b-16k |
AI-ModelScope/chinese-llama-2-13b-16k |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/chinese-llama-2-13b-16k |
chinese-alpaca-2-1_3b |
AI-ModelScope/chinese-alpaca-2-1.3b |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-1.3b |
chinese-alpaca-2-7b |
AI-ModelScope/chinese-alpaca-2-7b |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-7b |
chinese-alpaca-2-7b-16k |
AI-ModelScope/chinese-alpaca-2-7b-16k |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-7b-16k |
chinese-alpaca-2-7b-64k |
AI-ModelScope/chinese-alpaca-2-7b-64k |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-7b-64k |
chinese-alpaca-2-13b |
AI-ModelScope/chinese-alpaca-2-13b |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-13b |
chinese-alpaca-2-13b-16k |
AI-ModelScope/chinese-alpaca-2-13b-16k |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
|
- |
hfl/chinese-alpaca-2-13b-16k |
llama-3-chinese-8b |
ChineseAlpacaGroup/llama-3-chinese-8b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
hfl/llama-3-chinese-8b |
llama-3-chinese-8b-instruct |
ChineseAlpacaGroup/llama-3-chinese-8b-instruct |
q_proj, k_proj, v_proj |
llama3 |
✔ |
✔ |
|
- |
hfl/llama-3-chinese-8b-instruct |
atom-7b |
FlagAlpha/Atom-7B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
FlagAlpha/Atom-7B |
atom-7b-chat |
FlagAlpha/Atom-7B-Chat |
q_proj, k_proj, v_proj |
atom |
✔ |
✔ |
|
- |
FlagAlpha/Atom-7B-Chat |
llava1d6-mistral-7b-instruct |
AI-ModelScope/llava-v1.6-mistral-7b |
q_proj, k_proj, v_proj |
llava-mistral-instruct |
✔ |
✘ |
transformers>=4.34 |
multi-modal, vision |
liuhaotian/llava-v1.6-mistral-7b |
llava1d6-yi-34b-instruct |
AI-ModelScope/llava-v1.6-34b |
q_proj, k_proj, v_proj |
llava-yi-instruct |
✔ |
✘ |
|
multi-modal, vision |
liuhaotian/llava-v1.6-34b |
llama3-llava-next-8b |
AI-Modelscope/llama3-llava-next-8b |
q_proj, k_proj, v_proj |
llama-llava-next |
✔ |
✘ |
|
multi-modal, vision |
lmms-lab/llama3-llava-next-8b |
llava-next-72b |
AI-Modelscope/llava-next-72b |
q_proj, k_proj, v_proj |
llava-qwen-instruct |
✔ |
✘ |
|
multi-modal, vision |
lmms-lab/llava-next-72b |
llava-next-110b |
AI-Modelscope/llava-next-110b |
q_proj, k_proj, v_proj |
llava-qwen-instruct |
✔ |
✘ |
|
multi-modal, vision |
lmms-lab/llava-next-110b |
yi-6b |
01ai/Yi-6B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-6B |
yi-6b-200k |
01ai/Yi-6B-200K |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-6B-200K |
yi-6b-chat |
01ai/Yi-6B-Chat |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
|
- |
01-ai/Yi-6B-Chat |
yi-6b-chat-awq |
01ai/Yi-6B-Chat-4bits |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
autoawq |
- |
01-ai/Yi-6B-Chat-4bits |
yi-6b-chat-int8 |
01ai/Yi-6B-Chat-8bits |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
auto_gptq |
- |
01-ai/Yi-6B-Chat-8bits |
yi-9b |
01ai/Yi-9B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-9B |
yi-9b-200k |
01ai/Yi-9B-200K |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-9B-200K |
yi-34b |
01ai/Yi-34B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-34B |
yi-34b-200k |
01ai/Yi-34B-200K |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-34B-200K |
yi-34b-chat |
01ai/Yi-34B-Chat |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
|
- |
01-ai/Yi-34B-Chat |
yi-34b-chat-awq |
01ai/Yi-34B-Chat-4bits |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
autoawq |
- |
01-ai/Yi-34B-Chat-4bits |
yi-34b-chat-int8 |
01ai/Yi-34B-Chat-8bits |
q_proj, k_proj, v_proj |
yi |
✔ |
✔ |
auto_gptq |
- |
01-ai/Yi-34B-Chat-8bits |
yi-1_5-6b |
01ai/Yi-1.5-6B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-6B |
yi-1_5-6b-chat |
01ai/Yi-1.5-6B-Chat |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-6B-Chat |
yi-1_5-6b-chat-awq-int4 |
AI-ModelScope/Yi-1.5-6B-Chat-AWQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
autoawq |
- |
modelscope/Yi-1.5-6B-Chat-AWQ |
yi-1_5-6b-chat-gptq-int4 |
AI-ModelScope/Yi-1.5-6B-Chat-GPTQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
auto_gptq>=0.5 |
- |
modelscope/Yi-1.5-6B-Chat-GPTQ |
yi-1_5-9b-chat-awq-int4 |
AI-ModelScope/Yi-1.5-9B-Chat-AWQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
autoawq |
- |
modelscope/Yi-1.5-9B-Chat-AWQ |
yi-1_5-9b-chat-gptq-int4 |
AI-ModelScope/Yi-1.5-9B-Chat-GPTQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
auto_gptq>=0.5 |
- |
modelscope/Yi-1.5-9B-Chat-GPTQ |
yi-1_5-34b-chat-awq-int4 |
AI-ModelScope/Yi-1.5-34B-Chat-AWQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
autoawq |
- |
modelscope/Yi-1.5-34B-Chat-AWQ |
yi-1_5-34b-chat-gptq-int4 |
AI-ModelScope/Yi-1.5-34B-Chat-GPTQ |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
auto_gptq>=0.5 |
- |
modelscope/Yi-1.5-34B-Chat-GPTQ |
yi-1_5-9b |
01ai/Yi-1.5-9B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-9B |
yi-1_5-9b-chat |
01ai/Yi-1.5-9B-Chat |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-9B-Chat |
yi-1_5-34b |
01ai/Yi-1.5-34B |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-34B |
yi-1_5-34b-chat |
01ai/Yi-1.5-34B-Chat |
q_proj, k_proj, v_proj |
yi1_5 |
✔ |
✔ |
|
- |
01-ai/Yi-1.5-34B-Chat |
yi-vl-6b-chat |
01ai/Yi-VL-6B |
q_proj, k_proj, v_proj |
yi-vl |
✔ |
✘ |
transformers>=4.34 |
multi-modal, vision |
01-ai/Yi-VL-6B |
yi-vl-34b-chat |
01ai/Yi-VL-34B |
q_proj, k_proj, v_proj |
yi-vl |
✔ |
✘ |
transformers>=4.34 |
multi-modal, vision |
01-ai/Yi-VL-34B |
llava-llama-3-8b-v1_1 |
AI-ModelScope/llava-llama-3-8b-v1_1-transformers |
q_proj, k_proj, v_proj |
llava-llama-instruct |
✔ |
✘ |
transformers>=4.36 |
multi-modal, vision |
xtuner/llava-llama-3-8b-v1_1-transformers |
internlm-7b |
Shanghai_AI_Laboratory/internlm-7b |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
internlm/internlm-7b |
internlm-7b-chat |
Shanghai_AI_Laboratory/internlm-chat-7b |
q_proj, k_proj, v_proj |
internlm |
✘ |
✔ |
|
- |
internlm/internlm-chat-7b |
internlm-7b-chat-8k |
Shanghai_AI_Laboratory/internlm-chat-7b-8k |
q_proj, k_proj, v_proj |
internlm |
✘ |
✔ |
|
- |
- |
internlm-20b |
Shanghai_AI_Laboratory/internlm-20b |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
internlm/internlm2-20b |
internlm-20b-chat |
Shanghai_AI_Laboratory/internlm-chat-20b |
q_proj, k_proj, v_proj |
internlm |
✘ |
✔ |
|
- |
internlm/internlm2-chat-20b |
internlm2-1_8b |
Shanghai_AI_Laboratory/internlm2-1_8b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-1_8b |
internlm2-1_8b-sft-chat |
Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-1_8b-sft |
internlm2-1_8b-chat |
Shanghai_AI_Laboratory/internlm2-chat-1_8b |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-1_8b |
internlm2-7b-base |
Shanghai_AI_Laboratory/internlm2-base-7b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-base-7b |
internlm2-7b |
Shanghai_AI_Laboratory/internlm2-7b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-7b |
internlm2-7b-sft-chat |
Shanghai_AI_Laboratory/internlm2-chat-7b-sft |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-7b-sft |
internlm2-7b-chat |
Shanghai_AI_Laboratory/internlm2-chat-7b |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-7b |
internlm2-20b-base |
Shanghai_AI_Laboratory/internlm2-base-20b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-base-20b |
internlm2-20b |
Shanghai_AI_Laboratory/internlm2-20b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-20b |
internlm2-20b-sft-chat |
Shanghai_AI_Laboratory/internlm2-chat-20b-sft |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-20b-sft |
internlm2-20b-chat |
Shanghai_AI_Laboratory/internlm2-chat-20b |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
- |
internlm/internlm2-chat-20b |
internlm2-math-7b |
Shanghai_AI_Laboratory/internlm2-math-base-7b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
math |
internlm/internlm2-math-base-7b |
internlm2-math-7b-chat |
Shanghai_AI_Laboratory/internlm2-math-7b |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
math |
internlm/internlm2-math-7b |
internlm2-math-20b |
Shanghai_AI_Laboratory/internlm2-math-base-20b |
wqkv |
default-generation |
✔ |
✔ |
transformers>=4.35 |
math |
internlm/internlm2-math-base-20b |
internlm2-math-20b-chat |
Shanghai_AI_Laboratory/internlm2-math-20b |
wqkv |
internlm2 |
✔ |
✔ |
transformers>=4.35 |
math |
internlm/internlm2-math-20b |
internlm-xcomposer2-7b-chat |
Shanghai_AI_Laboratory/internlm-xcomposer2-7b |
wqkv |
internlm-xcomposer2 |
✔ |
✘ |
|
multi-modal, vision |
internlm/internlm-xcomposer2-7b |
internvl-chat-v1_5 |
AI-ModelScope/InternVL-Chat-V1-5 |
wqkv |
internvl |
✔ |
✘ |
transformers>=4.35, timm |
- |
OpenGVLab/InternVL-Chat-V1-5 |
internvl-chat-v1_5-int8 |
AI-ModelScope/InternVL-Chat-V1-5-int8 |
wqkv |
internvl |
✔ |
✘ |
transformers>=4.35, timm |
- |
OpenGVLab/InternVL-Chat-V1-5-int8 |
deepseek-7b |
deepseek-ai/deepseek-llm-7b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-llm-7b-base |
deepseek-7b-chat |
deepseek-ai/deepseek-llm-7b-chat |
q_proj, k_proj, v_proj |
deepseek |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-llm-7b-chat |
deepseek-moe-16b |
deepseek-ai/deepseek-moe-16b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-moe-16b-base |
deepseek-moe-16b-chat |
deepseek-ai/deepseek-moe-16b-chat |
q_proj, k_proj, v_proj |
deepseek |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-moe-16b-chat |
deepseek-67b |
deepseek-ai/deepseek-llm-67b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-llm-67b-base |
deepseek-67b-chat |
deepseek-ai/deepseek-llm-67b-chat |
q_proj, k_proj, v_proj |
deepseek |
✔ |
✔ |
|
- |
deepseek-ai/deepseek-llm-67b-chat |
deepseek-coder-1_3b |
deepseek-ai/deepseek-coder-1.3b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-1.3b-base |
deepseek-coder-1_3b-instruct |
deepseek-ai/deepseek-coder-1.3b-instruct |
q_proj, k_proj, v_proj |
deepseek-coder |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-1.3b-instruct |
deepseek-coder-6_7b |
deepseek-ai/deepseek-coder-6.7b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-6.7b-base |
deepseek-coder-6_7b-instruct |
deepseek-ai/deepseek-coder-6.7b-instruct |
q_proj, k_proj, v_proj |
deepseek-coder |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-6.7b-instruct |
deepseek-coder-33b |
deepseek-ai/deepseek-coder-33b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-33b-base |
deepseek-coder-33b-instruct |
deepseek-ai/deepseek-coder-33b-instruct |
q_proj, k_proj, v_proj |
deepseek-coder |
✔ |
✔ |
|
coding |
deepseek-ai/deepseek-coder-33b-instruct |
deepseek-math-7b |
deepseek-ai/deepseek-math-7b-base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
math |
deepseek-ai/deepseek-math-7b-base |
deepseek-math-7b-instruct |
deepseek-ai/deepseek-math-7b-instruct |
q_proj, k_proj, v_proj |
deepseek |
✔ |
✔ |
|
math |
deepseek-ai/deepseek-math-7b-instruct |
deepseek-math-7b-chat |
deepseek-ai/deepseek-math-7b-rl |
q_proj, k_proj, v_proj |
deepseek |
✔ |
✔ |
|
math |
deepseek-ai/deepseek-math-7b-rl |
deepseek-vl-1_3b-chat |
deepseek-ai/deepseek-vl-1.3b-chat |
q_proj, k_proj, v_proj |
deepseek-vl |
✔ |
✘ |
attrdict |
multi-modal, vision |
deepseek-ai/deepseek-vl-1.3b-chat |
deepseek-vl-7b-chat |
deepseek-ai/deepseek-vl-7b-chat |
q_proj, k_proj, v_proj |
deepseek-vl |
✔ |
✘ |
attrdict |
multi-modal, vision |
deepseek-ai/deepseek-vl-7b-chat |
deepseek-v2-chat |
deepseek-ai/DeepSeek-V2-Chat |
q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj |
deepseek2 |
✔ |
✔ |
transformers>=4.39.3 |
- |
deepseek-ai/DeepSeek-V2-Chat |
gemma-2b |
AI-ModelScope/gemma-2b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.38 |
- |
google/gemma-2b |
gemma-7b |
AI-ModelScope/gemma-7b |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.38 |
- |
google/gemma-7b |
gemma-2b-instruct |
AI-ModelScope/gemma-2b-it |
q_proj, k_proj, v_proj |
gemma |
✔ |
✔ |
transformers>=4.38 |
- |
google/gemma-2b-it |
gemma-7b-instruct |
AI-ModelScope/gemma-7b-it |
q_proj, k_proj, v_proj |
gemma |
✔ |
✔ |
transformers>=4.38 |
- |
google/gemma-7b-it |
minicpm-1b-sft-chat |
OpenBMB/MiniCPM-1B-sft-bf16 |
q_proj, k_proj, v_proj |
minicpm |
✔ |
✔ |
transformers>=4.36.0 |
- |
openbmb/MiniCPM-1B-sft-bf16 |
minicpm-2b-sft-chat |
OpenBMB/MiniCPM-2B-sft-fp32 |
q_proj, k_proj, v_proj |
minicpm |
✔ |
✔ |
|
- |
openbmb/MiniCPM-2B-sft-fp32 |
minicpm-2b-chat |
OpenBMB/MiniCPM-2B-dpo-fp32 |
q_proj, k_proj, v_proj |
minicpm |
✔ |
✔ |
|
- |
openbmb/MiniCPM-2B-dpo-fp32 |
minicpm-2b-128k |
OpenBMB/MiniCPM-2B-128k |
q_proj, k_proj, v_proj |
chatml |
✔ |
✔ |
transformers>=4.36.0 |
- |
openbmb/MiniCPM-2B-128k |
minicpm-moe-8x2b |
OpenBMB/MiniCPM-MoE-8x2B |
q_proj, k_proj, v_proj |
minicpm |
✔ |
✔ |
transformers>=4.36.0 |
- |
openbmb/MiniCPM-MoE-8x2B |
minicpm-v-3b-chat |
OpenBMB/MiniCPM-V |
q_proj, k_proj, v_proj |
minicpm-v |
✔ |
✘ |
|
- |
openbmb/MiniCPM-V |
minicpm-v-v2 |
OpenBMB/MiniCPM-V-2 |
q_proj, k_proj, v_proj |
minicpm-v |
✔ |
✘ |
timm |
- |
openbmb/MiniCPM-V-2 |
openbuddy-llama2-13b-chat |
OpenBuddy/openbuddy-llama2-13b-v8.1-fp16 |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
|
- |
OpenBuddy/openbuddy-llama2-13b-v8.1-fp16 |
openbuddy-llama3-8b-chat |
OpenBuddy/openbuddy-llama3-8b-v21.1-8k |
q_proj, k_proj, v_proj |
openbuddy2 |
✔ |
✔ |
|
- |
OpenBuddy/openbuddy-llama3-8b-v21.1-8k |
openbuddy-llama-65b-chat |
OpenBuddy/openbuddy-llama-65b-v8-bf16 |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
|
- |
OpenBuddy/openbuddy-llama-65b-v8-bf16 |
openbuddy-llama2-70b-chat |
OpenBuddy/openbuddy-llama2-70b-v10.1-bf16 |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
|
- |
OpenBuddy/openbuddy-llama2-70b-v10.1-bf16 |
openbuddy-mistral-7b-chat |
OpenBuddy/openbuddy-mistral-7b-v17.1-32k |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
transformers>=4.34 |
- |
OpenBuddy/openbuddy-mistral-7b-v17.1-32k |
openbuddy-zephyr-7b-chat |
OpenBuddy/openbuddy-zephyr-7b-v14.1 |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
transformers>=4.34 |
- |
OpenBuddy/openbuddy-zephyr-7b-v14.1 |
openbuddy-deepseek-67b-chat |
OpenBuddy/openbuddy-deepseek-67b-v15.2 |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
|
- |
OpenBuddy/openbuddy-deepseek-67b-v15.2 |
openbuddy-mixtral-moe-7b-chat |
OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k |
q_proj, k_proj, v_proj |
openbuddy |
✔ |
✔ |
transformers>=4.36 |
- |
OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k |
mistral-7b |
AI-ModelScope/Mistral-7B-v0.1 |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.34 |
- |
mistralai/Mistral-7B-v0.1 |
mistral-7b-v2 |
AI-ModelScope/Mistral-7B-v0.2-hf |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.34 |
- |
alpindale/Mistral-7B-v0.2-hf |
mistral-7b-instruct |
AI-ModelScope/Mistral-7B-Instruct-v0.1 |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
transformers>=4.34 |
- |
mistralai/Mistral-7B-Instruct-v0.1 |
mistral-7b-instruct-v2 |
AI-ModelScope/Mistral-7B-Instruct-v0.2 |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
transformers>=4.34 |
- |
mistralai/Mistral-7B-Instruct-v0.2 |
mixtral-moe-7b |
AI-ModelScope/Mixtral-8x7B-v0.1 |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.36 |
- |
mistralai/Mixtral-8x7B-v0.1 |
mixtral-moe-7b-instruct |
AI-ModelScope/Mixtral-8x7B-Instruct-v0.1 |
q_proj, k_proj, v_proj |
llama |
✔ |
✔ |
transformers>=4.36 |
- |
mistralai/Mixtral-8x7B-Instruct-v0.1 |
mixtral-moe-7b-aqlm-2bit-1x16 |
AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✘ |
transformers>=4.38, aqlm, torch>=2.2.0 |
- |
ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf |
mixtral-moe-8x22b-v1 |
AI-ModelScope/Mixtral-8x22B-v0.1 |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
transformers>=4.36 |
- |
mistral-community/Mixtral-8x22B-v0.1 |
wizardlm2-7b-awq |
AI-ModelScope/WizardLM-2-7B-AWQ |
q_proj, k_proj, v_proj |
wizardlm2-awq |
✔ |
✔ |
transformers>=4.34 |
- |
MaziyarPanahi/WizardLM-2-7B-AWQ |
wizardlm2-8x22b |
AI-ModelScope/WizardLM-2-8x22B |
q_proj, k_proj, v_proj |
wizardlm2 |
✔ |
✔ |
transformers>=4.36 |
- |
alpindale/WizardLM-2-8x22B |
baichuan-7b |
baichuan-inc/baichuan-7B |
W_pack |
default-generation |
✘ |
✔ |
transformers<4.34 |
- |
baichuan-inc/Baichuan-7B |
baichuan-13b |
baichuan-inc/Baichuan-13B-Base |
W_pack |
default-generation |
✘ |
✔ |
transformers<4.34 |
- |
baichuan-inc/Baichuan-13B-Base |
baichuan-13b-chat |
baichuan-inc/Baichuan-13B-Chat |
W_pack |
baichuan |
✘ |
✔ |
transformers<4.34 |
- |
baichuan-inc/Baichuan-13B-Chat |
baichuan2-7b |
baichuan-inc/Baichuan2-7B-Base |
W_pack |
default-generation |
✘ |
✔ |
|
- |
baichuan-inc/Baichuan2-7B-Base |
baichuan2-7b-chat |
baichuan-inc/Baichuan2-7B-Chat |
W_pack |
baichuan |
✘ |
✔ |
|
- |
baichuan-inc/Baichuan2-7B-Chat |
baichuan2-7b-chat-int4 |
baichuan-inc/Baichuan2-7B-Chat-4bits |
W_pack |
baichuan |
✘ |
✘ |
bitsandbytes<0.41.2, accelerate<0.26 |
- |
baichuan-inc/Baichuan2-7B-Chat-4bits |
baichuan2-13b |
baichuan-inc/Baichuan2-13B-Base |
W_pack |
default-generation |
✘ |
✔ |
|
- |
baichuan-inc/Baichuan2-13B-Base |
baichuan2-13b-chat |
baichuan-inc/Baichuan2-13B-Chat |
W_pack |
baichuan |
✘ |
✔ |
|
- |
baichuan-inc/Baichuan2-13B-Chat |
baichuan2-13b-chat-int4 |
baichuan-inc/Baichuan2-13B-Chat-4bits |
W_pack |
baichuan |
✘ |
✘ |
bitsandbytes<0.41.2, accelerate<0.26 |
- |
baichuan-inc/Baichuan2-13B-Chat-4bits |
mplug-owl2-chat |
iic/mPLUG-Owl2 |
q_proj, k_proj.multiway.0, k_proj.multiway.1, v_proj.multiway.0, v_proj.multiway.1 |
mplug-owl2 |
✔ |
✘ |
transformers<4.35, icecream |
- |
MAGAer13/mplug-owl2-llama2-7b |
mplug-owl2d1-chat |
iic/mPLUG-Owl2.1 |
c_attn.multiway.0, c_attn.multiway.1 |
mplug-owl2 |
✔ |
✘ |
transformers<4.35, icecream |
- |
Mizukiluke/mplug_owl_2_1 |
yuan2-2b-instruct |
YuanLLM/Yuan2.0-2B-hf |
q_proj, k_proj, v_proj |
yuan |
✔ |
✘ |
|
- |
IEITYuan/Yuan2-2B-hf |
yuan2-2b-janus-instruct |
YuanLLM/Yuan2-2B-Janus-hf |
q_proj, k_proj, v_proj |
yuan |
✔ |
✘ |
|
- |
IEITYuan/Yuan2-2B-Janus-hf |
yuan2-51b-instruct |
YuanLLM/Yuan2.0-51B-hf |
q_proj, k_proj, v_proj |
yuan |
✔ |
✘ |
|
- |
IEITYuan/Yuan2-51B-hf |
yuan2-102b-instruct |
YuanLLM/Yuan2.0-102B-hf |
q_proj, k_proj, v_proj |
yuan |
✔ |
✘ |
|
- |
IEITYuan/Yuan2-102B-hf |
xverse-7b |
xverse/XVERSE-7B |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
xverse/XVERSE-7B |
xverse-7b-chat |
xverse/XVERSE-7B-Chat |
q_proj, k_proj, v_proj |
xverse |
✘ |
✔ |
|
- |
xverse/XVERSE-7B-Chat |
xverse-13b |
xverse/XVERSE-13B |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
xverse/XVERSE-13B |
xverse-13b-chat |
xverse/XVERSE-13B-Chat |
q_proj, k_proj, v_proj |
xverse |
✘ |
✔ |
|
- |
xverse/XVERSE-13B-Chat |
xverse-65b |
xverse/XVERSE-65B |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
xverse/XVERSE-65B |
xverse-65b-v2 |
xverse/XVERSE-65B-2 |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
xverse/XVERSE-65B-2 |
xverse-65b-chat |
xverse/XVERSE-65B-Chat |
q_proj, k_proj, v_proj |
xverse |
✘ |
✔ |
|
- |
xverse/XVERSE-65B-Chat |
xverse-13b-256k |
xverse/XVERSE-13B-256K |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✔ |
|
- |
xverse/XVERSE-13B-256K |
xverse-moe-a4_2b |
xverse/XVERSE-MoE-A4.2B |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✘ |
|
- |
xverse/XVERSE-MoE-A4.2B |
orion-14b |
OrionStarAI/Orion-14B-Base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✘ |
|
- |
OrionStarAI/Orion-14B-Base |
orion-14b-chat |
OrionStarAI/Orion-14B-Chat |
q_proj, k_proj, v_proj |
orion |
✔ |
✘ |
|
- |
OrionStarAI/Orion-14B-Chat |
bluelm-7b |
vivo-ai/BlueLM-7B-Base |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✘ |
|
- |
vivo-ai/BlueLM-7B-Base |
bluelm-7b-32k |
vivo-ai/BlueLM-7B-Base-32K |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✘ |
|
- |
vivo-ai/BlueLM-7B-Base-32K |
bluelm-7b-chat |
vivo-ai/BlueLM-7B-Chat |
q_proj, k_proj, v_proj |
bluelm |
✘ |
✘ |
|
- |
vivo-ai/BlueLM-7B-Chat |
bluelm-7b-chat-32k |
vivo-ai/BlueLM-7B-Chat-32K |
q_proj, k_proj, v_proj |
bluelm |
✘ |
✘ |
|
- |
vivo-ai/BlueLM-7B-Chat-32K |
ziya2-13b |
Fengshenbang/Ziya2-13B-Base |
q_proj, k_proj, v_proj |
default-generation |
✔ |
✔ |
|
- |
IDEA-CCNL/Ziya2-13B-Base |
ziya2-13b-chat |
Fengshenbang/Ziya2-13B-Chat |
q_proj, k_proj, v_proj |
ziya |
✔ |
✔ |
|
- |
IDEA-CCNL/Ziya2-13B-Chat |
skywork-13b |
skywork/Skywork-13B-base |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✘ |
|
- |
Skywork/Skywork-13B-base |
skywork-13b-chat |
skywork/Skywork-13B-chat |
q_proj, k_proj, v_proj |
skywork |
✘ |
✘ |
|
- |
- |
zephyr-7b-beta-chat |
modelscope/zephyr-7b-beta |
q_proj, k_proj, v_proj |
zephyr |
✔ |
✔ |
transformers>=4.34 |
- |
HuggingFaceH4/zephyr-7b-beta |
polylm-13b |
damo/nlp_polylm_13b_text_generation |
c_attn |
default-generation |
✘ |
✘ |
|
- |
DAMO-NLP-MT/polylm-13b |
seqgpt-560m |
damo/nlp_seqgpt-560m |
query_key_value |
default-generation |
✘ |
✔ |
|
- |
DAMO-NLP/SeqGPT-560M |
sus-34b-chat |
SUSTC/SUS-Chat-34B |
q_proj, k_proj, v_proj |
sus |
✔ |
✔ |
|
- |
SUSTech/SUS-Chat-34B |
tongyi-finance-14b |
TongyiFinance/Tongyi-Finance-14B |
c_attn |
default-generation |
✔ |
✔ |
|
financial |
- |
tongyi-finance-14b-chat |
TongyiFinance/Tongyi-Finance-14B-Chat |
c_attn |
qwen |
✔ |
✔ |
|
financial |
jxy/Tongyi-Finance-14B-Chat |
tongyi-finance-14b-chat-int4 |
TongyiFinance/Tongyi-Finance-14B-Chat-Int4 |
c_attn |
qwen |
✔ |
✔ |
auto_gptq>=0.5 |
financial |
jxy/Tongyi-Finance-14B-Chat-Int4 |
codefuse-codellama-34b-chat |
codefuse-ai/CodeFuse-CodeLlama-34B |
q_proj, k_proj, v_proj |
codefuse-codellama |
✔ |
✔ |
|
coding |
codefuse-ai/CodeFuse-CodeLlama-34B |
codefuse-codegeex2-6b-chat |
codefuse-ai/CodeFuse-CodeGeeX2-6B |
query_key_value |
codefuse |
✘ |
✔ |
transformers<4.34 |
coding |
codefuse-ai/CodeFuse-CodeGeeX2-6B |
codefuse-qwen-14b-chat |
codefuse-ai/CodeFuse-QWen-14B |
c_attn |
codefuse |
✔ |
✔ |
|
coding |
codefuse-ai/CodeFuse-QWen-14B |
phi2-3b |
AI-ModelScope/phi-2 |
Wqkv |
default-generation |
✔ |
✔ |
|
coding |
microsoft/phi-2 |
phi3-4b-4k-instruct |
LLM-Research/Phi-3-mini-4k-instruct |
qkv_proj |
phi3 |
✔ |
✘ |
transformers>=4.36 |
general |
microsoft/Phi-3-mini-4k-instruct |
phi3-4b-128k-instruct |
LLM-Research/Phi-3-mini-128k-instruct |
qkv_proj |
phi3 |
✔ |
✘ |
transformers>=4.36 |
general |
microsoft/Phi-3-mini-128k-instruct |
phi3-mini-128k-instruct |
LLM-Research/Phi-3-mini-128k-instruct |
qkv_proj |
phi3 |
✔ |
✘ |
transformers>=4.36 |
general |
microsoft/Phi-3-mini-128k-instruct |
cogvlm-17b-instruct |
ZhipuAI/cogvlm-chat |
vision_expert_query_key_value, vision_expert_dense, language_expert_query_key_value, language_expert_dense |
cogvlm-instruct |
✘ |
✘ |
|
multi-modal, vision |
THUDM/cogvlm-chat-hf |
cogagent-18b-chat |
ZhipuAI/cogagent-chat |
vision_expert_query_key_value, vision_expert_dense, language_expert_query_key_value, language_expert_dense, query, key_value, dense |
cogagent-chat |
✘ |
✘ |
timm |
multi-modal, vision |
THUDM/cogagent-chat-hf |
cogagent-18b-instruct |
ZhipuAI/cogagent-vqa |
vision_expert_query_key_value, vision_expert_dense, language_expert_query_key_value, language_expert_dense, query, key_value, dense |
cogagent-instruct |
✘ |
✘ |
timm |
multi-modal, vision |
THUDM/cogagent-vqa-hf |
mamba-130m |
AI-ModelScope/mamba-130m-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-130m-hf |
mamba-370m |
AI-ModelScope/mamba-370m-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-370m-hf |
mamba-390m |
AI-ModelScope/mamba-390m-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-390m-hf |
mamba-790m |
AI-ModelScope/mamba-790m-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-790m-hf |
mamba-1.4b |
AI-ModelScope/mamba-1.4b-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-1.4b-hf |
mamba-2.8b |
AI-ModelScope/mamba-2.8b-hf |
in_proj, x_proj, embeddings, out_proj |
default-generation |
✘ |
✘ |
transformers>=4.39.0 |
- |
state-spaces/mamba-2.8b-hf |
telechat-7b |
TeleAI/TeleChat-7B |
key_value, query |
telechat |
✔ |
✘ |
|
- |
Tele-AI/telechat-7B |
telechat-12b |
TeleAI/TeleChat-12B |
key_value, query |
telechat |
✔ |
✘ |
|
- |
Tele-AI/TeleChat-12B |
grok-1 |
colossalai/grok-1-pytorch |
q_proj, k_proj, v_proj |
default-generation |
✘ |
✘ |
|
- |
hpcai-tech/grok-1 |
dbrx-instruct |
AI-ModelScope/dbrx-instruct |
attn.Wqkv |
dbrx |
✔ |
✔ |
transformers>=4.36 |
- |
databricks/dbrx-instruct |
dbrx-base |
AI-ModelScope/dbrx-base |
attn.Wqkv |
dbrx |
✔ |
✔ |
transformers>=4.36 |
- |
databricks/dbrx-base |
mengzi3-13b-base |
langboat/Mengzi3-13B-Base |
q_proj, k_proj, v_proj |
mengzi |
✔ |
✔ |
|
- |
Langboat/Mengzi3-13B-Base |
c4ai-command-r-v01 |
AI-ModelScope/c4ai-command-r-v01 |
q_proj, k_proj, v_proj |
c4ai |
✔ |
✘ |
transformers>=4.39.1 |
- |
CohereForAI/c4ai-command-r-v01 |
c4ai-command-r-plus |
AI-ModelScope/c4ai-command-r-plus |
q_proj, k_proj, v_proj |
c4ai |
✔ |
✘ |
transformers>4.39 |
- |
CohereForAI/c4ai-command-r-plus |