|
- import argparse
- import subprocess
- import os
- import torch
- import torch_npu
- # subprocess.run('pip install c2net -i https://pypi.tuna.tsinghua.edu.cn/simple')
- # 导入c2net包
-
- import json
- from c2net.context import prepare, upload_output
-
- parser = argparse.ArgumentParser(description='MindSpore FineTune Example')
-
- # model
- parser.add_argument('--model_name',
- type=str,
- default='')
-
- # mehtod
- parser.add_argument('--stage',
- type=str,
- default='sft',
- help='stage')
- parser.add_argument('--do_train',
- type=str,
- default='true',
- help='do train')
- parser.add_argument('--finetuning_type',
- type=str,
- default='lora')
- parser.add_argument("--lora_target",
- type=str,
- default='all')
- parser.add_argument("--pref_beta",
- type=str,
- default='')
- parser.add_argument("--pref_loss",
- type=str,
- default='')
- parser.add_argument("--deep_speed",
- type=str,
- default='')
-
- # dataset
- parser.add_argument("--dataset",
- type=str,
- default='')
- parser.add_argument('--template',
- type=str,
- default='')
-
- parser.add_argument("--cutoff_len",
- type=str,
- default='1024')
- parser.add_argument("--max_samples",
- type=str,
- default='100000')
- parser.add_argument('--preprocessing_num_workers',
- type=str,
- default='16')
- parser.add_argument('--overwrite_cache',
- type=str,
- default='false')
- # output
- parser.add_argument("--logging_steps",
- type=str,
- default='5')
-
- parser.add_argument("--save_steps",
- type=str,
- default='100')
- parser.add_argument("--plot_loss",
- type=str,
- default='')
- parser.add_argument("--overwrite_output_dir",
- type=str,
- default='')
-
- # train
- parser.add_argument("--per_device_train_batch_size",
- type=str,
- default='')
- parser.add_argument("--gradient_accumulation_steps",
- type=str,
- default='')
- parser.add_argument("--learning_rate",
- type=str,
- default='')
-
- parser.add_argument("--num_train_epochs",
- type=str,
- default='')
- parser.add_argument("--lr_scheduler_type",
- type=str,
- default='')
-
- parser.add_argument('--flash_attn',
- type=str,
- default='')
-
- parser.add_argument("--fp16",
- type=str,
- default='')
-
- parser.add_argument("--bf16",
- type=str,
- default='')
- parser.add_argument("--pure_bf16",
- type=str,
- default='')
-
- parser.add_argument("--ddp_timeout",
- type=str,
- default='')
- parser.add_argument("--warmup_ratio",
- type=str,
- default='')
-
- # eval
- parser.add_argument("--val_size",
- type=str,
- default='')
- parser.add_argument("--per_device_eval_batch_size",
- type=str,
- default='')
- parser.add_argument("--eval_strategy",
- type=str,
- default='')
- parser.add_argument("--eval_steps",
- type=str,
- default='')
- # generate
- parser.add_argument("--max_new_tokens",
- type=str,
- default='')
- parser.add_argument("--top_k",
- type=str,
- default='')
- parser.add_argument("--top_p",
- type=str,
- default='')
-
- parser.add_argument("--max_grad_norm",
- type=str,
- default='')
-
- parser.add_argument("--warmup_steps",
- type=str,
- default='')
- parser.add_argument("--optim",
- type=str,
- default='')
-
- parser.add_argument("--packing",
- type=str,
- default='')
-
- parser.add_argument("--report_to",
- type=str,
- default='')
-
- parser.add_argument("--include_num_input_tokens_seen",
- type=str,
- default='')
-
- parser.add_argument("--lora_rank",
- type=str,
- default='')
- parser.add_argument("--lora_alpha",
- type=str,
- default='')
-
- parser.add_argument("--lora_dropout",
- type=str,
- default='')
- parser.add_argument("--loraplus_lr_ratio",
- type=str,
- default='')
- parser.add_argument("--use_rslora",
- type=str,
- default='')
- parser.add_argument("--use_dora",
- type=str,
- default='')
-
- parser.add_argument("--additional_target",
- type=str,
- default='')
- def model_adapter(model_name, c2net_context):
- cmd=''
- if 'chatglm3' in model_name.lower() or 'gemma' in model_name.lower():
- cmd='pip install -U torch==2.2.1 -i https://pypi.tuna.tsinghua.edu.cn/simple;'
- if 'deepseek' in model_name.lower():
- cmd='pip install -U flash_attn -i https://pypi.tuna.tsinghua.edu.cn/simple;'
- return cmd
-
- def get_cmd(args, c2net_context):
-
- devices = ''
- gpu_count = torch.cuda.device_count()
- if gpu_count > 0:
- devices = 'CUDA_VISIBLE_DEVICES=' + ','.join(str(i) for i in range(gpu_count))
- else:
- import torch_npu
- npu_count = torch.npu.device_count()
- if npu_count > 0:
- devices = 'ASCEND_RT_VISIBLE_DEVICES=' + ','.join(str(i) for i in range(npu_count))
- if devices == '':
- raise Exception('can not find acc card.')
-
- cmd = [devices, 'llamafactory-cli', 'train', '--stage', args.stage, '--do_train', args.do_train,
- '--finetuning_type',
- args.finetuning_type]
- # model
- # 获取预训练模型路径
- model_path = c2net_context.pretrain_model_path + "/" + args.model_name
- cmd.append('--model_name_or_path')
- cmd.append(model_path)
-
- # methond
- if args.lora_target != '':
- cmd.append('--lora_target')
- cmd.append(args.lora_target)
- if args.pref_beta != '':
- cmd.append('--pref_beta')
- cmd.append(args.pref_beta)
- if args.deep_speed != '':
- cmd.append('--deep_speed')
- cmd.append(args.deep_speed)
-
- # dataset
- if args.dataset != '':
- cmd.append('--dataset')
- cmd.append(args.dataset)
- cmd.append('--dataset_dir')
- cmd.append(c2net_context.dataset_path)
- if args.template != '':
- cmd.append('--template')
- cmd.append(args.template)
- if args.cutoff_len != '':
- cmd.append('--cutoff_len')
- cmd.append(args.cutoff_len)
- if args.max_samples != '':
- cmd.append('--max_samples')
- cmd.append(args.max_samples)
- if args.preprocessing_num_workers != '':
- cmd.append('--preprocessing_num_workers')
- cmd.append(args.preprocessing_num_workers)
- if args.overwrite_cache != '':
- cmd.append('--overwrite_cache')
- cmd.append(args.overwrite_cache)
- # output
- if args.logging_steps != '':
- cmd.append('--logging_steps')
- cmd.append(args.logging_steps)
- if args.save_steps != '':
- cmd.append('--save_steps')
- cmd.append(args.save_steps)
- if args.plot_loss != '':
- cmd.append('--plot_loss')
- cmd.append(args.plot_loss)
- if args.overwrite_output_dir != '':
- cmd.append('--overwrite_output_dir')
- cmd.append(args.overwrite_output_dir)
-
- cmd.append('--output_dir')
- output_dir = c2net_context.output_path + "/adapter"
- if not os.path.isdir(output_dir):
- os.makedirs(output_dir)
-
- cmd.append(output_dir)
- # train
- if args.per_device_train_batch_size != '':
- cmd.append('--per_device_train_batch_size')
- cmd.append(args.per_device_train_batch_size)
-
- if args.gradient_accumulation_steps != '':
- cmd.append('--gradient_accumulation_steps')
- cmd.append(args.gradient_accumulation_steps)
- if args.learning_rate != '':
- cmd.append('--learning_rate')
- cmd.append(args.learning_rate)
- if args.num_train_epochs != '':
- cmd.append('--num_train_epochs')
- cmd.append(args.num_train_epochs)
- if args.lr_scheduler_type != '':
- cmd.append('--lr_scheduler_type')
- cmd.append(args.lr_scheduler_type)
-
- if args.flash_attn != '':
- cmd.append('--flash_attn')
- cmd.append(args.flash_attn)
- if args.fp16 != '':
- cmd.append('--fp16')
- cmd.append(args.fp16)
- if args.bf16 != '':
- cmd.append('--bf16 ')
- cmd.append(args.bf16)
-
- if args.pure_bf16 != '':
- cmd.append('--pure_bf16 ')
- cmd.append(args.pure_bf16)
-
- # eval
- if args.val_size != '':
- cmd.append('--val_size')
- cmd.append(args.val_size)
- if args.per_device_eval_batch_size != '':
- cmd.append('--per_device_eval_batch_size')
- cmd.append(args.per_device_eval_batch_size)
- if args.eval_strategy != '':
- cmd.append('--eval_strategy')
- cmd.append(args.eval_strategy)
- if args.eval_steps != '':
- cmd.append('--eval_steps')
- cmd.append(args.eval_steps)
- # generate
- if args.max_new_tokens != '':
- cmd.append('--max_new_tokens')
- cmd.append(args.max_new_tokens)
- if args.top_k != '':
- cmd.append('--top_k')
- cmd.append(args.top_k)
- if args.top_p != '':
- cmd.append('--top_p')
- cmd.append(args.top_p)
- # lora detail
- if args.lora_rank != '':
- cmd.append('--lora_rank')
- cmd.append(args.lora_rank)
- if args.lora_alpha != '':
- cmd.append('--lora_alpha')
- cmd.append(args.lora_alpha)
- if args.lora_dropout != '':
- cmd.append('--lora_dropout')
- cmd.append(args.lora_dropout)
- if args.loraplus_lr_ratio != '':
- cmd.append('--loraplus_lr_ratio')
- cmd.append(args.loraplus_lr_ratio)
-
- if args.use_rslora != '':
- cmd.append('--use_rslora')
- cmd.append(args.use_rslora)
- if args.use_dora != '':
- cmd.append('--use_dora')
- cmd.append(args.use_dora)
-
- # other
- if args.max_grad_norm != '':
- cmd.append('--max_grad_norm')
- cmd.append(args.max_grad_norm)
-
- if args.warmup_steps != '':
- cmd.append('--warmup_steps')
- cmd.append(args.warmup_steps)
-
- if args.optim != '':
- cmd.append('--optim')
- cmd.append(args.optim)
-
- if args.packing != '':
- cmd.append('--packing')
- cmd.append(args.packing)
- if args.report_to != '':
- cmd.append('--report_to')
- cmd.append(args.report_to)
- if args.include_num_input_tokens_seen != '':
- cmd.append('--include_num_input_tokens_seen')
- cmd.append(args.include_num_input_tokens_seen)
-
- if args.additional_target != '':
- cmd.append('--additional_target')
- cmd.append(args.additional_target)
- cmdstr = ' '.join(i for i in cmd)
- print('cmd is:' + cmdstr)
- return cmdstr
-
-
- def get_merger_model_cmd(args, c2net_context):
- cmd = ['llamafactory-cli', 'export', '--finetuning_type',
- args.finetuning_type]
-
- model_path = c2net_context.pretrain_model_path + "/" + args.model_name
- cmd.append('--model_name_or_path')
- cmd.append(model_path)
- if args.template != '':
- cmd.append('--template')
- cmd.append(args.template)
- cmd.append('--adapter_name_or_path')
- cmd.append(c2net_context.output_path + "/adapter")
- export_dir = c2net_context.output_path + "/model"
- if not os.path.isdir(export_dir):
- os.makedirs(export_dir)
-
- cmd.append('--export_dir')
- cmd.append(export_dir)
- cmd.append('--export_size')
- cmd.append('2')
- cmd.append('--export_device')
- cmd.append('cpu')
- cmd.append('--export_legacy_format')
- cmd.append('false')
- return cmd
-
-
- def prepare_llama_factory_and_finetune():
- global code_path
- c2net_context = prepare()
- # 安装llama factory环境
- code_path = c2net_context.code_path
- args, unknown = parser.parse_known_args()
- print('set up LLaMA-Factory')
- # clone
-
- pInstall = subprocess.Popen(
- 'export USE_MODELSCOPE_HUB=1;cd ' + code_path + ';' + ' git clone https://openi.pcl.ac.cn/laich/LLaMA-Factory.git;cd ./LLaMA-Factory;pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple;cp -R ./data/* ' + c2net_context.dataset_path + ';'+model_adapter(args.model_name,c2net_context),
- shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- for line in iter(pInstall.stdout.readline, b''):
- print(line.decode('utf-8').strip())
- pInstall.wait()
-
-
- # 处理定制数据集的格式信息,写到dataset_info.json,并删除数据集格式文件
- with open(c2net_context.dataset_path + '/dataset_info.json', 'r+') as f:
- dataset_info = json.load(f)
-
- dataset_names = args.dataset.split(',')
- for dataset_name in dataset_names:
- if dataset_name not in dataset_info:
- dataset_format_path = c2net_context.dataset_path + '/' + dataset_name + '/dataset_format.json'
- if os.path.exists(dataset_format_path):
- with open(c2net_context.dataset_path + '/' + dataset_name + '/dataset_format.json') as format:
- dataset_format = json.load(format)
- dataset_format.update({'file_name': dataset_name})
- dataset_info[dataset_name] = dataset_format
-
- try:
- os.remove(dataset_format_path)
- except OSError as e:
- # If it fails, inform the user.
- print("Error: %s - %s." % (e.filename, e.strerror))
-
- else:
- dataset_info[dataset_name] = {'file_name': dataset_name}
-
- f.seek(0) # rewind
- json.dump(dataset_info, f)
- f.truncate()
- print('LLaMA-Factory cli begin')
- p = subprocess.Popen(get_cmd(args, c2net_context), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- # 获取实时输出
- for line in iter(p.stdout.readline, b''):
- print(line.decode('utf-8').strip())
- # 等待命令执行完成
- p.wait()
- print('LLaMA-Factory cli end')
- if p.returncode != 0:
- raise Exception('LLaMA-Factory cli failed.')
- print('LLaMA-Factory cli merge begin')
- pMerge = subprocess.Popen(get_merger_model_cmd(args, c2net_context), stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- # 获取实时输出
- for line in iter(pMerge.stdout.readline, b''):
- print(line.decode('utf-8').strip())
- # 等待命令执行完成
- pMerge.wait()
- print('LLaMA-Factory cli merge end')
- if pMerge.returncode != 0:
- raise Exception('LLaMA-Factory cli merge failed.')
-
-
- if __name__ == "__main__":
- # 初始化导入数据集和预训练模型到容器内
- print('prepare c2net')
- #GPU
- if torch.cuda.device_count()>0:
- prepare_llama_factory_and_finetune()
- #NPU
- else:
- rank_id = os.getenv('RANK_ID')
- if rank_id is None: # 非modelarts的NPU环境没有这个环境变量,直接执行
- prepare_llama_factory_and_finetune()
- else:
- # modelarts 多卡任务的脚本会在每张卡上调用一次,llamafactory命令只需要执行一次。
- local_rank = int(rank_id)
- if local_rank == 0:
- prepare_llama_factory_and_finetune()
-
|