|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """
- PengChengMind predict run
- """
- import json
- import os
- import requests
- import datetime
- import glob
-
- import numpy as np
- from tqdm import tqdm
-
- import mindspore.common.dtype as mstype
- import mindspore.communication.management as D
- import mindspore as ms
- from mindspore import context, Tensor
- from mindspore import export
- from mindspore.context import ParallelMode
- from mindspore.parallel import set_algo_parameters
- from mindspore.parallel._cost_model_context import _set_multi_subgraphs
- from mindspore.train.model import Model
- from mindspore.train.serialization import load_distributed_checkpoint, load_checkpoint
- from mindspore.nn.transformer.transformer import TransformerOpParallelConfig
-
- from src.generate import get_scores
- from src.pengcheng_mind_7B import EvalNet, PengChengMindModel
- from src.pengcheng_mind_config import set_parse, PengChengMindConfig
- from src.utils import get_args
-
- from mindspore.common import Parameter
- from mindspore.common.tensor import Tensor
- from mindspore.train.serialization import load_checkpoint, load_param_into_net
-
-
- project_root = os.path.abspath(os.path.dirname(os.path.realpath(__file__)) + os.path.sep + "..")
- _PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
- hf_project = lambda *x: os.path.join(_PROJECT_ROOT, *x)
-
- os.system("cat /home/ma-user/anaconda3/envs/MindSpore/lib/python3.7/site-packages/mindspore/.commit_id")
-
- def restore_checkpoint(args_param, network, cache_url='/cache/Ckpt/'):
- r"""
- Load checkpoint process.
- """
- restore_ranks = D.get_rank()
- print("======start single checkpoint", flush=True)
- ckpt_name = os.path.join(cache_url, f"rank_{restore_ranks}.ckpt")
-
- if not ckpt_name:
- print(f"There is no ckpt file in {ckpt_name}, "
- f"current ckpt_files found is {ckpt_name} "
- f"with pattern {ckpt_name}, so skip the loading.")
-
- time_stamp = datetime.datetime.now()
- print(f"time stamp {time_stamp.strftime('%Y.%m.%d-%H:%M:%S')} pre trained ckpt model {ckpt_name} loading",
- flush=True)
- # Load checkpoint files latest file
- print(f'Start to load from {ckpt_name}')
- param_dict = load_checkpoint(ckpt_name)
- # for k, v in param_dict.items():
- # print("rank: ", restore_ranks, k)
- load_param_into_net(network, param_dict, strict_load=False)
-
- def set_auto_parallel_context(args_opt):
- """Set the auto parallel context"""
- rank = 0
- device_num = 1
- context.reset_auto_parallel_context()
- # context.set_auto_parallel_context(
- # strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path)
- if args_opt.distribute == "true":
- D.init()
- device_num = D.get_group_size()
- rank = D.get_rank()
- print("rank_id is {}, device_num is {}".format(rank, device_num))
- context.set_auto_parallel_context(
- parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
- gradients_mean=False,
- full_batch=True,
- loss_repeated_mean=True,
- enable_parallel_optimizer=False,
- strategy_ckpt_save_file=f'/cache/strategy_{rank}.ckpt',
- pipeline_stages=args_opt.stage_num)
- set_algo_parameters(elementwise_op_strategy_follow=True)
- _set_multi_subgraphs()
-
- return rank, device_num
-
- def load_model(args_opt):
- r"""
- The main function for load model
- """
- context.set_context(mode=context.GRAPH_MODE)
- # Set parallel context
- rank, device_num = set_auto_parallel_context(args_opt)
-
- context.set_context(variable_memory_max_size="30GB")
- context.set_context(save_graphs=2,
- save_graphs_path="/cache/graphs_of_device_id_" + str(rank),
- device_target=args_opt.device_target)
-
- strategy_local_file = f"/cache/inference_strategy_100b_d8_mp8_dp1-{rank}.ckpt"
- ms.set_auto_parallel_context(strategy_ckpt_save_file=strategy_local_file)
-
- if args_opt.eval_task:
- use_past = False
- else:
- use_past = args_opt.use_past
- print('local_rank:{}, start to run...'.format(rank), flush=True)
-
- # Set model property, rewrite the model parallel
- if device_num < args_opt.op_level_model_parallel_num:
- print(f"The op_level_model_parallel_num {args_opt.op_level_model_parallel_num} is smaller than the device num,"
- f"so change it to the {device_num}", flush=True)
- args_opt.op_level_model_parallel_num = device_num
- model_parallel_num = args_opt.op_level_model_parallel_num
- data_parallel_num = int(device_num / (model_parallel_num*args_opt.stage_num))
- micro_batch_interleaved = args_opt.micro_batch_interleaved
-
- parallel_config = TransformerOpParallelConfig(data_parallel=data_parallel_num,
- model_parallel=model_parallel_num,
- pipeline_stage=args_opt.stage_num,
- micro_batch_num=args_opt.micro_size,
- vocab_emb_dp=False,
- recompute=False)
- # add sequence_parallel
- parallel_config.sequence_parallel = args_opt.sequence_parallel
- # add select_recompute
- parallel_config.select_recompute = args_opt.select_recompute
-
- per_batch_size = args_opt.per_batch_size
- batch_size = per_batch_size * data_parallel_num
- # Now only support single batch_size for predict
- if args_opt.run_type == "predict":
- batch_size = 1
-
- if args_opt.softmax_compute_fp32 == "FP32":
- softmax_compute_type = mstype.float32
- elif args_opt.softmax_compute_fp32 == "FP16":
- softmax_compute_type = mstype.float16
- else:
- raise ValueError(f"Unknown softmax_compute_fp32 {args_opt.softmax_compute_fp32}")
-
- if args_opt.top_query_softmax_fp32 == "FP32":
- top_query_softmax = mstype.float32
- elif args_opt.top_query_softmax_fp32 == "FP16":
- top_query_softmax = mstype.float16
- else:
- raise ValueError(f"Unknown top_query_softmax_fp32 {args_opt.top_query_softmax_fp32}")
-
- print(f"softmax_compute_type: {softmax_compute_type}")
- print(f"top_query_softmax: {top_query_softmax}")
-
- ################################################## pc-mind v2 #######################################################
- config = PengChengMindConfig(batch_size=batch_size // parallel_config.micro_batch_num // micro_batch_interleaved,
- num_heads=args_opt.num_heads,
- hidden_size=args_opt.embedding_size,
- seq_length=args_opt.seq_length,
- vocab_size=args_opt.vocab_size,
- use_moe=bool(args_opt.use_moe),
- eod_token=args_opt.eod_id,
- num_layers=args_opt.num_layers,
- ffn_hidden_size=int((2 * args_opt.embedding_size * 4 / 3) / 64) * 64,
- eod_reset=bool(args_opt.eod_reset),
- use_past=use_past,
- load_ckpt_path=args_opt.load_ckpt_path,
- param_init_type=mstype.float32 if args_opt.param_init_type == 'fp32' else mstype.float16,
- enable_offload=bool(args_opt.opt_offload),
- parallel_config=parallel_config,
- use_rope=args_opt.use_rope,
- use_flash_attention=args_opt.use_flash_attention,
- pipeline_config_filename=args_opt.pipeline_config_filename)
- # softmax_compute_fp32=softmax_compute_type)
- print(">>> FFN_Hidden_Size for PengChengMind-new7B is: {} >>>\n".format(config.ffn_hidden_size))
- config.softmax_compute_fp32 = mstype.float32 #softmax_compute_type
- config.top_query_softmax_fp32 = mstype.float32 #top_query_softmax
- # config.hidden_act = "swiglu"
- #####################################################################################################################
-
- print("===config is: ", config, flush=True)
- print("=====args_opt is: ", args_opt, flush=True)
-
- # Define network
- pengcheng_mind = PengChengMindModel(config)
- # # 完整模型加载,要在构图之前
- from src.pengcheng_mind_7B import PengChengMindLossWith_notPrompt
- from src.loss import CrossEntropyLoss_eval
- loss = CrossEntropyLoss_eval()
- eval_net = PengChengMindLossWith_notPrompt(config,
- pengcheng_mind,
- loss,
- pad_token=args_opt.padding_id,
- seq_length=args_opt.seq_length)
- eval_net.set_train(False)
- model_predict = Model(eval_net)
-
- # Compile network and obtain tensor layout for loading ckpt
- inputs_np = Tensor(np.ones(shape=(config.batch_size, config.seq_length)), mstype.int32)
- current_index = Tensor(np.array([0]), mstype.int32)
-
- if args_opt.distribute == "false":
- predict_layout = None
- elif args_opt.eval_task:
- # Compiling only needs the shape
- predict_layout = model_predict.infer_predict_layout(inputs_np, inputs_np)
- elif config.use_past:
- batch_valid_length = Tensor(np.array([0]), mstype.int32)
- init_true = Tensor([True], mstype.bool_)
- inputs_np_1 = Tensor(np.ones(shape=(config.batch_size, 1)), mstype.int32)
- model_predict.predict_network.add_flags_recursive(is_first_iteration=True)
- predict_layout = model_predict.infer_predict_layout(inputs_np, current_index, init_true, batch_valid_length)
- model_predict.predict_network.add_flags_recursive(is_first_iteration=False)
- _ = model_predict.infer_predict_layout(inputs_np_1, current_index, init_true, batch_valid_length)
- else:
- predict_layout = model_predict.infer_predict_layout(inputs_np, current_index)
-
- local_ckpt_path = '/cache/pretrained_7b_fp16.ckpt'
- if args_opt.pre_trained:
- # STEP = 500
- # download_OneCKPT_from_obs(
- # obs_ckpt_url=f'obs://pcmind7b/ckpts/PengCheng-Mind-7B-1000_tokens_fp16.ckpt',
- # local_ckpt_url=local_ckpt_path,
- # rank=rank)
- # param_dict = load_checkpoint(local_ckpt_path)
- # load_param_into_net(eval_net, param_dict)
- # print("================load param ok=================", flush=True)
-
- #######################################################################
-
- # #### method1: loading [.npy file]
- # if not args_opt.offline:
- # import moxing as mox
- # gpu_model_npy_obspath = "obs://test-zy/merged_ckpt_pt.npy"
- # local_npy_path = "/cache/ckpt.npy"
- # mox.file.copy(gpu_model_npy_obspath, local_npy_path)
- # gpu_param_dict_array = np.load(local_npy_path, allow_pickle=True)
- # #######################################################################
- #
- # for key, value in pengcheng_mind.parameters_dict().items():
- # print(key, value)
- #
- # for param in pengcheng_mind.get_parameters():
- # for gpu_param_dict in gpu_param_dict_array:
- # key = gpu_param_dict['name']
- # value = gpu_param_dict['data']
- # if key == param.name or ('position' in key and 'position' in param.name):
- # # param.data.set_data(Tensor(value))
- # param.data.set_data(Tensor(value, config.param_init_type))
- # print('> setting: {}'.format(param.name))
- # print("================load param ok=================", flush=True)
- # # #######################################################################
- # # # saving npu-ckpt for yunnao2-obs
- # # tokens_flag = "227B"
- # # save_local_ckpt = "pcmind_new7B_{}tks.ckpt".format(tokens_flag)
- # # ms.save_checkpoint(pengcheng_mind, save_local_ckpt)
- # # if not args_opt.offline:
- # # if config.param_init_type == mstype.float32:
- # # mox.file.copy(save_local_ckpt,
- # # "obs://"
- # # "fp32-{}".format(save_local_ckpt))
- # # else:
- # # mox.file.copy(save_local_ckpt,
- # # "obs://"
- # # "fp16-{}".format(save_local_ckpt))
- # # print("================save param to OBS ok=================", flush=True)
-
- #### method2: loading [.ckpt file]
- if not args_opt.offline:
- import moxing as mox
- # gpu_model_npy_obspath = "obs://pcl-verify/yizx/2023_7b_ckpts/mind7b_to_ckpts/new7b_gpu_npy2msckpt/" \
- # "fp16-pcmind_new7B_227Btks.ckpt"
- # gpu_model_npy_obspath = "obs://pcl-verify/yizx/2023_7b_ckpts/mind7b_to_ckpts/new7b_gpu_npy2msckpt/" \
- # "new-fp16-iter47200-pcmind_new7B_227Btks.ckpt"
- gpu_model_npy_obspath = "obs://pcl-verify/yizx/2023_7b_ckpts/mind7b_to_ckpts/new7b_gpu_npy2msckpt/" \
- "new-fp32-iter47200-pcmind_new7B_227Btks.ckpt"
- mox.file.copy(gpu_model_npy_obspath, local_ckpt_path)
- param_dict = load_checkpoint(local_ckpt_path)
- load_param_into_net(eval_net, param_dict)
- print("================load param ok=================", flush=True)
- # #######################################################################
-
- return model_predict, config
-
-
- def run_predict(model_predict, config, args_opt):
- """run predict"""
- from src.generate import generate, generate_increment
- import time
-
- D.init()
- rank_id = D.get_rank()
- device_num = D.get_group_size()
-
- generate_func = generate_increment if config.use_past else generate
- # Define tokenizer
- from transformers import AutoTokenizer
- pcmind7b_tokenizer_dir = hf_project("tokenizer/baichuan2")
- print(pcmind7b_tokenizer_dir)
- print(os.listdir(pcmind7b_tokenizer_dir))
- vocab_path = pcmind7b_tokenizer_dir
- tokenizer = AutoTokenizer.from_pretrained(vocab_path, trust_remote_code=True)
- EOT = tokenizer.eos_token_id
- PAD = tokenizer.unk_token_id
-
- id_label = {0: "A", 1: "B", 2: "C", 3: "D"}
-
- File_Dir = "/home/ma-user/modelarts/user-job-dir/pcmind_new_7B/task_dataset/chid"
- file_dir = os.path.join(File_Dir, "test_public.json")
- count = 0
- correct_num = 0
- acc = 0
- with open(file_dir, "r", encoding="utf8") as f:
- for line in f.readlines():
- count += 1
- line = json.loads(line)
- id, candidates, content, answer = line["id"], line["candidates"], line["content"], line["answer"]
-
- input_str_one = content.replace("#idiom#", candidates[0])
- input_str_two = content.replace("#idiom#", candidates[1])
- input_str_thr = content.replace("#idiom#", candidates[2])
- input_str_fou = content.replace("#idiom#", candidates[3])
- input_str_fiv = content.replace("#idiom#", candidates[4])
- input_str_six = content.replace("#idiom#", candidates[5])
- input_str_sev = content.replace("#idiom#", candidates[6])
-
- input_ids_one = np.array([tokenizer.encode(input_str_one, add_special_tokens=False)]).reshape(1, -1)
- input_ids_two = np.array([tokenizer.encode(input_str_two, add_special_tokens=False)]).reshape(1, -1)
- input_ids_thr = np.array([tokenizer.encode(input_str_thr, add_special_tokens=False)]).reshape(1, -1)
- input_ids_fou = np.array([tokenizer.encode(input_str_fou, add_special_tokens=False)]).reshape(1, -1)
- input_ids_fiv = np.array([tokenizer.encode(input_str_fiv, add_special_tokens=False)]).reshape(1, -1)
- input_ids_six = np.array([tokenizer.encode(input_str_six, add_special_tokens=False)]).reshape(1, -1)
- input_ids_sev = np.array([tokenizer.encode(input_str_sev, add_special_tokens=False)]).reshape(1, -1)
-
- # mask_length = len(tokenizer.encode(f"{example}问题:{question}\n答案:", add_special_tokens=False))
- mask_length = 0
-
- mask_ids_one = np.array([[0] * mask_length +
- [1] * (input_ids_one.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_one.shape[-1])])
- mask_ids_two = np.array([[0] * mask_length +
- [1] * (input_ids_two.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_two.shape[-1])])
- mask_ids_thr = np.array([[0] * mask_length +
- [1] * (input_ids_thr.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_thr.shape[-1])])
- mask_ids_fou = np.array([[0] * mask_length +
- [1] * (input_ids_fou.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_fou.shape[-1])])
- mask_ids_fiv = np.array([[0] * mask_length +
- [1] * (input_ids_fiv.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_fiv.shape[-1])])
- mask_ids_six = np.array([[0] * mask_length +
- [1] * (input_ids_six.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_six.shape[-1])])
- mask_ids_sev = np.array([[0] * mask_length +
- [1] * (input_ids_sev.shape[-1] - mask_length) +
- [0] * (config.seq_length - input_ids_sev.shape[-1])])
-
- input_ids_one = np.pad(input_ids_one, ((0, 0), (0, config.seq_length - input_ids_one.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_two = np.pad(input_ids_two, ((0, 0), (0, config.seq_length - input_ids_two.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_thr = np.pad(input_ids_thr, ((0, 0), (0, config.seq_length - input_ids_thr.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_fou = np.pad(input_ids_fou, ((0, 0), (0, config.seq_length - input_ids_fou.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_fiv = np.pad(input_ids_fiv, ((0, 0), (0, config.seq_length - input_ids_fiv.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_six = np.pad(input_ids_six, ((0, 0), (0, config.seq_length - input_ids_six.shape[-1])),
- 'constant', constant_values=(0, PAD))
- input_ids_sev = np.pad(input_ids_sev, ((0, 0), (0, config.seq_length - input_ids_sev.shape[-1])),
- 'constant', constant_values=(0, PAD))
-
- input_ids_one = Tensor(input_ids_one, mstype.int32)
- input_ids_two = Tensor(input_ids_two, mstype.int32)
- input_ids_thr = Tensor(input_ids_thr, mstype.int32)
- input_ids_fou = Tensor(input_ids_fou, mstype.int32)
- input_ids_fiv = Tensor(input_ids_fiv, mstype.int32)
- input_ids_six = Tensor(input_ids_six, mstype.int32)
- input_ids_sev = Tensor(input_ids_sev, mstype.int32)
-
- mask_ids_one = Tensor(mask_ids_one, mstype.float32)
- mask_ids_two = Tensor(mask_ids_two, mstype.float32)
- mask_ids_thr = Tensor(mask_ids_thr, mstype.float32)
- mask_ids_fou = Tensor(mask_ids_fou, mstype.float32)
- mask_ids_fiv = Tensor(mask_ids_fiv, mstype.float32)
- mask_ids_six = Tensor(mask_ids_six, mstype.float32)
- mask_ids_sev = Tensor(mask_ids_sev, mstype.float32)
-
- loss1 = model_predict.predict(input_ids_one, mask_ids_one).asnumpy().tolist()
- loss2 = model_predict.predict(input_ids_two, mask_ids_two).asnumpy().tolist()
- loss3 = model_predict.predict(input_ids_thr, mask_ids_thr).asnumpy().tolist()
- loss4 = model_predict.predict(input_ids_fou, mask_ids_fou).asnumpy().tolist()
- loss5 = model_predict.predict(input_ids_fiv, mask_ids_fiv).asnumpy().tolist()
- loss6 = model_predict.predict(input_ids_six, mask_ids_six).asnumpy().tolist()
- loss7 = model_predict.predict(input_ids_sev, mask_ids_sev).asnumpy().tolist()
-
- loss = np.concatenate([loss1, loss2, loss3, loss4, loss5, loss6, loss7])
- answers_pred = int(np.argmin(loss))
- if answers_pred == answer:
- correct_num += 1
- acc = correct_num / count
- if rank_id == 0:
- print("CHID ", " ,zero shot , 准确率Acc:", acc, "number: ", count)
- if rank_id == 0:
- print("\n\n=========================================================")
- print("CHID ", " ,zero shot , 准确率Acc:", acc, "number: ", count)
-
- def main():
- """Main process for predict or export model"""
- opt = get_args(True)
- set_parse(opt)
- model_predict, config = load_model(opt)
- run_predict(model_predict, config, opt)
-
- if __name__ == "__main__":
- main()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|