diff --git a/models/action.go b/models/action.go index 2a9d88399b..9b92b4192d 100755 --- a/models/action.go +++ b/models/action.go @@ -57,6 +57,7 @@ const ( ActionCreateInferenceTask // 28 ActionCreateBenchMarkTask //29 ActionCreateNewModelTask //30 + ActionCreateGPUTrainTask //31 ) // Action represents user operation type and other information to diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 17761a1dc5..8ffbace257 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -20,9 +20,17 @@ type CloudbrainStatus string type JobType string type ModelArtsJobStatus string +const ( + TypeCloudBrainOne int = iota + TypeCloudBrainTwo + + TypeCloudBrainAll = -1 +) + const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + AllResource = "all" //notebook storage category EVSCategory = "EVS" diff --git a/models/file_chunk.go b/models/file_chunk.go index 76c926dc5f..0fc3a88794 100755 --- a/models/file_chunk.go +++ b/models/file_chunk.go @@ -13,11 +13,6 @@ const ( FileUploaded ) -const ( - TypeCloudBrainOne int = iota - TypeCloudBrainTwo -) - type FileChunk struct { ID int64 `xorm:"pk autoincr"` UUID string `xorm:"uuid UNIQUE"` diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 9949feddc1..9d3d6290f5 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -20,6 +20,9 @@ type CreateCloudBrainForm struct { ResourceSpecId int `form:"resource_spec_id" binding:"Required"` BenchmarkTypeID int `form:"benchmark_types_id"` BenchmarkChildTypeID int `form:"benchmark_child_types_id"` + BootFile string `form:"boot_file"` + Params string `form:"run_para_list"` + BranchName string `form:"branch_name"` } type CommitImageCloudBrainForm struct { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 9aae447b03..bd0f11507e 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -15,14 +15,13 @@ import ( ) const ( - Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple; - service ssh stop; - jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` + Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` CodeMountPath = "/code" DataSetMountPath = "/dataset" ModelMountPath = "/model" + LogFile = "log.txt" BenchMarkMountPath = "/benchmark" BenchMarkResourceID = 1 Snn4imagenetMountPath = "/snn4imagenet" @@ -32,10 +31,13 @@ const ( SubTaskName = "task1" Success = "S000" + + DefaultBranchName = "master" ) var ( - ResourceSpecs *models.ResourceSpecs + ResourceSpecs *models.ResourceSpecs + TrainResourceSpecs *models.ResourceSpecs ) func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { @@ -147,7 +149,7 @@ func AdminOrJobCreaterRightForTrain(ctx *context.Context) { } -func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { +func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { dataActualPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.BasePath + @@ -155,13 +157,25 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid var resourceSpec *models.ResourceSpec - if ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) - } - - for _, spec := range ResourceSpecs.ResourceSpec { - if resourceSpecId == spec.Id { - resourceSpec = spec + var versionCount int + if jobType == string(models.JobTypeTrain) { + versionCount = 1 + if TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) + } + for _, spec := range TrainResourceSpecs.ResourceSpec { + if resourceSpecId == spec.Id { + resourceSpec = spec + } + } + } else { + if ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) + } + for _, spec := range ResourceSpecs.ResourceSpec { + if resourceSpecId == spec.Id { + resourceSpec = spec + } } } @@ -171,6 +185,15 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, return errors.New("no such resourceSpec") } + var datasetName string + attach, err := models.GetAttachmentByUUID(uuid) + if err != nil { + //for benchmark, do not return error + log.Error("GetAttachmentByUUID failed:%v", err) + } else { + datasetName = attach.Name + } + jobResult, err := CreateJob(jobName, models.CreateJobParams{ JobName: jobName, RetryCount: 1, @@ -265,6 +288,12 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, BenchmarkTypeID: benchmarkTypeID, BenchmarkChildTypeID: benchmarkChildTypeID, Description: description, + IsLatestVersion: "1", + VersionCount: versionCount, + BranchName: branchName, + BootFile: bootFile, + DatasetName: datasetName, + Parameters: params, }) if err != nil { @@ -280,6 +309,8 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, if string(models.JobTypeBenchmark) == jobType { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask) + } else if string(models.JobTypeTrain) == jobType { + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask) } else { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask) } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index e30d0100c0..538fcfbd90 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -56,7 +56,6 @@ const ( PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" - DebugType = -1 VersionCount = 1 SortByCreateTime = "create_time" diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 946fb73b48..3fc66c426c 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -452,16 +452,18 @@ var ( DecompressOBSTaskName string //cloudbrain config - CBAuthUser string - CBAuthPassword string - RestServerHost string - JobPath string - CBCodePathPrefix string - JobType string - GpuTypes string - DebugServerHost string - ResourceSpecs string - MaxDuration int64 + CBAuthUser string + CBAuthPassword string + RestServerHost string + JobPath string + CBCodePathPrefix string + JobType string + GpuTypes string + DebugServerHost string + ResourceSpecs string + MaxDuration int64 + TrainGpuTypes string + TrainResourceSpecs string //benchmark config IsBenchmarkEnabled bool @@ -1285,6 +1287,8 @@ func NewContext() { GpuTypes = sec.Key("GPU_TYPES").MustString("") ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) + TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") + TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index a990e9aee4..c715ba8ce2 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1010,7 +1010,8 @@ modelarts.train_job.parameter_value=Parameter Value modelarts.train_job.resource_setting=resource_setting modelarts.train_job.resource_setting_info=resource_setting_info modelarts.train_job.resource_pool=resource_pool -modelarts.train_job.resource_type=resource_type +modelarts.train_job.resource_type=Resource Type +modelarts.train_job.train_dataset=Train Dataset modelarts.train_job.standard=Standard modelarts.train_job.NAS_address=NAS Address modelarts.train_job.NAS_mount_path=NAS Mount Path @@ -2790,10 +2791,11 @@ reject_pull_request = `suggested changes for %s#%[2]s` upload_dataset=`upload dataset %s` task_gpudebugjob=`created CPU/GPU type debugging task%s` task_npudebugjob=`created NPU type debugging task %s` -task_trainjob=`created training task%s` +task_nputrainjob=`created NPU training task%s` task_inferencejob=`created reasoning task %s` task_benchmark=`created profiling task %s` task_createmodel=`created new model %s` +task_gputrainjob=`created CPU/GPU training task%s` [tool] ago = %s ago diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 4e73fbad5f..b47a6bafad 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -2797,10 +2797,11 @@ reject_pull_request=`建议变更 %s#%[2]s` upload_dataset=`上传了数据集文件 %s` task_gpudebugjob=`创建了CPU/GPU类型调试任务 %s` task_npudebugjob=`创建了NPU类型调试任务 %s` -task_trainjob=`创建了训练任务 %s` +task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` task_benchmark=`创建了评测任务 %s` task_createmodel=`导入了新模型 %s` +task_gputrainjob=`创建了CPU/GPU类型训练任务 %s` [tool] ago=%s前 diff --git a/public/home/home.js b/public/home/home.js old mode 100644 new mode 100755 index 7512a44232..478c70f217 --- a/public/home/home.js +++ b/public/home/home.js @@ -135,7 +135,7 @@ socket.onmessage = function (e) { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31"){ html += recordPrefix + actionName; html += " " + record.RefName + "" } @@ -175,6 +175,8 @@ function getTaskLink(record){ re = re + "/cloudbrain/benchmark/" + record.Content; }else if(record.OpType == 30){ re = re + "/modelmanage/show_model_info?name=" + record.RefName; + }else if(record.OpType == 31){ + re = re + "/cloudbrain/train-job/" + record.Content; } re = encodeURI(re); return re; @@ -321,10 +323,11 @@ var actionNameZH={ "24":"上传了数据集文件", "25":"创建了CPU/GPU类型调试任务", "26":"创建了NPU类型调试任务", - "27":"创建了训练任务", + "27":"创建了NPU类型训练任务", "28":"创建了推理任务", "29":"创建了评测任务", - "30":"导入了新模型" + "30":"导入了新模型", + "31":"创建了CPU/GPU类型训练任务" }; var actionNameEN={ @@ -346,10 +349,11 @@ var actionNameEN={ "24":" upload dataset ", "25":" created CPU/GPU type debugging task ", "26":" created NPU type debugging task ", - "27":" created training task", + "27":" created NPU type training task", "28":" created reasoning task", "29":" created profiling task", - "30":" created new model" + "30":" created new model", + "31":" created CPU/GPU type training task", }; var repoAndOrgZH={ diff --git a/routers/admin/cloudbrains.go b/routers/admin/cloudbrains.go old mode 100644 new mode 100755 index 6bbd534b9b..884ed6b9b7 --- a/routers/admin/cloudbrains.go +++ b/routers/admin/cloudbrains.go @@ -41,7 +41,7 @@ func CloudBrains(ctx *context.Context) { if page <= 0 { page = 1 } - debugType := modelarts.DebugType + debugType := models.TypeCloudBrainAll if listType == models.GPUResource { debugType = models.TypeCloudBrainOne } else if listType == models.NPUResource { @@ -121,7 +121,7 @@ func DownloadCloudBrains(ctx *context.Context) { Page: page, PageSize: 1, }, - Type: modelarts.DebugType, + Type: models.TypeCloudBrainAll, NeedRepoInfo: false, IsLatestVersion: modelarts.IsLatestVersion, }) @@ -151,7 +151,7 @@ func DownloadCloudBrains(ctx *context.Context) { Page: page, PageSize: pageSize, }, - Type: modelarts.DebugType, + Type: models.TypeCloudBrainAll, NeedRepoInfo: true, IsLatestVersion: modelarts.IsLatestVersion, }) diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 306854af37..1868edcb5f 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -62,10 +62,10 @@ import ( "net/http" "strings" - "code.gitea.io/gitea/routers/authentication" - "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/auth" + + "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -77,6 +77,7 @@ import ( "code.gitea.io/gitea/routers/api/v1/repo" _ "code.gitea.io/gitea/routers/api/v1/swagger" // for swagger generation "code.gitea.io/gitea/routers/api/v1/user" + "code.gitea.io/gitea/routers/authentication" repo_ext "code.gitea.io/gitea/routers/repo" "gitea.com/macaron/binding" @@ -882,6 +883,13 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/cloudbrain", func() { m.Get("/:id", repo.GetCloudbrainTask) m.Get("/:id/log", repo.CloudbrainGetLog) + m.Group("/train-job", func() { + m.Group("/:jobid", func() { + m.Get("", repo.GetModelArtsTrainJobVersion) + m.Get("/model_list", repo.CloudBrainModelList) + m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.CloudBrainStop) + }) + }) }, reqRepoReader(models.UnitTypeCloudBrain)) m.Group("/modelarts", func() { m.Group("/notebook", func() { diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index b2f529dfb5..d31943d42e 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -6,16 +6,19 @@ package repo import ( - "code.gitea.io/gitea/modules/timeutil" + "encoding/json" "net/http" "sort" + "strings" "time" - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/timeutil" + routerRepo "code.gitea.io/gitea/routers/repo" ) // cloudbrain get job task by jobid @@ -161,3 +164,55 @@ func CloudbrainGetLog(ctx *context.Context) { return } + +func CloudBrainModelList(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + parentDir := ctx.Query("parentDir") + dirArray := strings.Split(parentDir, "/") + + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + + //get dirs + dirs, err := routerRepo.GetModelDirs(task.JobName, parentDir) + if err != nil { + log.Error("GetModelDirs failed:%v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetModelDirs failed:", err) + return + } + + var fileInfos []storage.FileInfo + err = json.Unmarshal([]byte(dirs), &fileInfos) + if err != nil { + log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("json.Unmarshal failed:", err) + return + } + + for i, fileInfo := range fileInfos { + temp, _ := time.Parse("2006-01-02 15:04:05", fileInfo.ModTime) + fileInfos[i].ModTime = temp.Local().Format("2006-01-02 15:04:05") + } + + sort.Slice(fileInfos, func(i, j int) bool { + return fileInfos[i].ModTime > fileInfos[j].ModTime + }) + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "VersionName": versionName, + "StatusOK": 0, + "Path": dirArray, + "Dirs": fileInfos, + "task": task, + "PageIsCloudBrain": true, + }) +} diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index d7d011e073..dc60eb3ff7 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -6,16 +6,17 @@ package repo import ( - "code.gitea.io/gitea/modules/timeutil" "net/http" "strconv" "strings" "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/timeutil" routerRepo "code.gitea.io/gitea/routers/repo" ) @@ -133,27 +134,61 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { ctx.NotFound(err) return } - result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) - if err != nil { - ctx.NotFound(err) - return - } - if job.StartTime == 0 && result.StartTime > 0 { - job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) - } - job.Status = modelarts.TransTrainJobStatus(result.IntStatus) - job.Duration = result.Duration / 1000 - job.TrainJobDuration = result.TrainJobDuration - job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) + if job.Type == models.TypeCloudBrainOne { + jobResult, err := cloudbrain.GetJob(job.JobID) + if err != nil { + ctx.NotFound(err) + log.Error("GetJob failed:", err) + return + } + result, err := models.ConvertToJobResultPayload(jobResult.Payload) + if err != nil { + ctx.NotFound(err) + log.Error("ConvertToJobResultPayload failed:", err) + return + } - if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { - job.EndTime = job.StartTime.Add(job.Duration) - } + job.Status = result.JobStatus.State + if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) { + taskRoles := result.TaskRoles + taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) - err = models.UpdateTrainJobVersion(job) - if err != nil { - log.Error("UpdateJob failed:", err) + job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP + job.ContainerID = taskRes.TaskStatuses[0].ContainerID + job.Status = taskRes.TaskStatuses[0].State + } + + if result.JobStatus.State != string(models.JobWaiting) { + err = models.UpdateJob(job) + if err != nil { + log.Error("UpdateJob failed:", err) + } + } + } else { + result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) + if err != nil { + ctx.NotFound(err) + return + } + + if job.StartTime == 0 && result.StartTime > 0 { + job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) + } + job.Status = modelarts.TransTrainJobStatus(result.IntStatus) + job.Duration = result.Duration / 1000 + job.TrainJobDuration = result.TrainJobDuration + + job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) + + if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { + job.EndTime = job.StartTime.Add(job.Duration) + } + + err = models.UpdateTrainJobVersion(job) + if err != nil { + log.Error("UpdateJob failed:", err) + } } ctx.JSON(http.StatusOK, map[string]interface{}{ diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index fc1dbfbd00..a74152b9f9 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -37,6 +37,9 @@ const ( tplCloudBrainBenchmarkIndex base.TplName = "repo/cloudbrain/benchmark/index" tplCloudBrainBenchmarkNew base.TplName = "repo/cloudbrain/benchmark/new" tplCloudBrainBenchmarkShow base.TplName = "repo/cloudbrain/benchmark/show" + + tplCloudBrainTrainJobNew base.TplName = "repo/cloudbrain/trainjob/new" + tplCloudBrainTrainJobShow base.TplName = "repo/cloudbrain/trainjob/show" ) var ( @@ -45,6 +48,7 @@ var ( benchmarkTypes *models.BenchmarkTypes benchmarkGpuInfos *models.GpuInfos benchmarkResourceSpecs *models.ResourceSpecs + trainGpuInfos *models.GpuInfos ) const BENCHMARK_TYPE_CODE = "repo.cloudbrain.benchmark.types" @@ -143,6 +147,11 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { } ctx.Data["gpu_types"] = gpuInfos.GpuInfo + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + if benchmarkGpuInfos == nil { json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) } @@ -157,6 +166,14 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) } ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled @@ -184,38 +201,52 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { image := form.Image uuid := form.Attachment jobType := form.JobType - command := cloudbrain.Command gpuQueue := form.GpuType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath resourceSpecId := form.ResourceSpecId + branchName := form.BranchName repo := ctx.Repo.Repository - tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName) + tpl := tplCloudBrainNew + command := cloudbrain.Command + if jobType == string(models.JobTypeTrain) { + tpl = tplCloudBrainTrainJobNew + commandTrain, err := getTrainJobCommand(form) + if err != nil { + log.Error("getTrainJobCommand failed: %v", err) + ctx.RenderWithErr(err.Error(), tpl, &form) + return + } + + command = commandTrain + } + + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("the job name did already exist", tplCloudBrainNew, &form) + ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } - if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { + if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) && jobType != string(models.JobTypeTrain) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("jobtype error", tplCloudBrainNew, &form) + ctx.RenderWithErr("jobtype error", tpl, &form) return } @@ -223,18 +254,21 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainNew, &form) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) return } } - downloadCode(repo, codePath) + if branchName == "" { + branchName = cloudbrain.DefaultBranchName + } + downloadCode(repo, codePath, branchName) uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" @@ -268,15 +302,19 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, + storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params, 0, 0, resourceSpecId) if err != nil { cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr(err.Error(), tplCloudBrainNew, &form) + ctx.RenderWithErr(err.Error(), tpl, &form) return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") + if jobType == string(models.JobTypeTrain) { + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=all") + } else { + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") + } } func CloudBrainRestart(ctx *context.Context) { @@ -342,18 +380,29 @@ func CloudBrainRestart(ctx *context.Context) { } func CloudBrainBenchMarkShow(ctx *context.Context) { - cloudBrainShow(ctx, tplCloudBrainBenchmarkShow) + cloudBrainShow(ctx, tplCloudBrainBenchmarkShow, models.JobTypeBenchmark) } func CloudBrainShow(ctx *context.Context) { - cloudBrainShow(ctx, tplCloudBrainShow) + cloudBrainShow(ctx, tplCloudBrainShow, models.JobTypeDebug) +} + +func CloudBrainTrainJobShow(ctx *context.Context) { + cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain) } -func cloudBrainShow(ctx *context.Context, tpName base.TplName) { +func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.JobType) { ctx.Data["PageIsCloudBrain"] = true - var ID = ctx.Params(":id") debugListType := ctx.Query("debugListType") - task, err := models.GetCloudbrainByID(ID) + + var task *models.Cloudbrain + var err error + if jobType == models.JobTypeTrain { + task, err = models.GetCloudbrainByJobID(ctx.Params(":jobid")) + } else { + task, err = models.GetCloudbrainByID(ctx.Params(":id")) + } + if err != nil { log.Info("error:" + err.Error()) ctx.Data["error"] = err.Error() @@ -368,6 +417,16 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) { jobRes.Resource.Memory = strings.ReplaceAll(jobRes.Resource.Memory, "Mi", "MB") spec := "GPU数:" + strconv.Itoa(jobRes.Resource.NvidiaComGpu) + ",CPU数:" + strconv.Itoa(jobRes.Resource.CPU) + ",内存(MB):" + jobRes.Resource.Memory ctx.Data["resource_spec"] = spec + if task.JobType == string(models.JobTypeTrain) { + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + for _, resourceType := range trainGpuInfos.GpuInfo { + if resourceType.Queue == jobRes.Config.GpuType { + ctx.Data["resource_type"] = resourceType.Value + } + } + } taskRoles := jobRes.TaskRoles if jobRes.JobStatus.State != string(models.JobFailed) { @@ -431,6 +490,29 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) { task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) } ctx.Data["duration"] = task.TrainJobDuration + + if len(task.Parameters) > 0 { + var parameters models.Parameters + + err := json.Unmarshal([]byte(task.Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) + task.Parameters = "" + } else { + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param + } + task.Parameters = paramTemp[:len(paramTemp)-2] + } else { + task.Parameters = "" + } + } + + } + ctx.Data["task"] = task ctx.Data["jobName"] = task.JobName ctx.Data["displayJobName"] = task.DisplayJobName @@ -438,6 +520,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) { version_list_task = append(version_list_task, task) ctx.Data["version_list_task"] = version_list_task ctx.Data["debugListType"] = debugListType + ctx.Data["canDownload"] = cloudbrain.CanDeleteJob(ctx, task) ctx.HTML(200, tpName) } @@ -509,11 +592,12 @@ func CloudBrainStop(ctx *context.Context) { break } - ctx.JSON(200, map[string]string{ + ctx.JSON(200, map[string]interface{}{ "result_code": resultCode, "error_msg": errorMsg, "status": status, "id": ID, + "StatusOK": 0, }) } @@ -765,8 +849,8 @@ func GetRate(ctx *context.Context) { } } -func downloadCode(repo *models.Repository, codePath string) error { - if err := git.Clone(repo.RepoPath(), codePath, git.CloneRepoOptions{}); err != nil { +func downloadCode(repo *models.Repository, codePath, branchName string) error { + if err := git.Clone(repo.RepoPath(), codePath, git.CloneRepoOptions{Branch: branchName}); err != nil { log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err) return err } @@ -1471,7 +1555,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF } os.RemoveAll(codePath) - if err := downloadCode(repo, codePath); err != nil { + if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil { log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"]) cloudBrainNewDataPrepare(ctx) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) @@ -1536,7 +1620,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), - storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, + storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, cloudbrain.DefaultBranchName, "", "", benchmarkTypeID, benchmarkChildTypeID, resourceSpecId) if err != nil { cloudBrainNewDataPrepare(ctx) @@ -1562,10 +1646,66 @@ func BenchmarkDel(ctx *context.Context) { } } +func CloudBrainTrainJobNew(ctx *context.Context) { + err := cloudBrainNewDataPrepare(ctx) + if err != nil { + ctx.ServerError("get new train-job info failed", err) + return + } + ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) +} + +func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { + var command string + bootFile := form.BootFile + params := form.Params + + if !strings.HasSuffix(bootFile, ".py") { + log.Error("bootFile(%s) format error", bootFile) + return command, errors.New("bootFile format error") + } + + var parameters models.Parameters + var param string + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + param += " --" + parameter.Label + "=" + parameter.Value + } + } + + command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile + + return command, nil +} + +func CloudBrainTrainJobDel(ctx *context.Context) { + var listType = ctx.Query("listType") + if err := deleteCloudbrainJob(ctx); err != nil { + log.Error("deleteCloudbrainJob failed: %v", err, ctx.Data["msgID"]) + ctx.ServerError(err.Error(), err) + return + } + + var isAdminPage = ctx.Query("isadminpage") + if ctx.IsUserSiteAdmin() && isAdminPage == "true" { + ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") + } else { + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) + } +} + func GetBenchmarkTypes(ctx *context.Context) *models.BenchmarkTypes { var lang = ctx.Locale.Language() if benchmarkTypesMap[lang] == nil { var val = i18n.Tr(lang, BENCHMARK_TYPE_CODE) + //use config + val = setting.BenchmarkTypes var tempType *models.BenchmarkTypes if err := json.Unmarshal([]byte(val), &tempType); err != nil { log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", val, err, ctx.Data["MsgID"]) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 7d4c203bc6..95322f194f 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -47,20 +47,26 @@ const ( ) func DebugJobIndex(ctx *context.Context) { - debugListType := ctx.Query("debugListType") - ctx.Data["ListType"] = debugListType + listType := ctx.Query("debugListType") + ctx.Data["ListType"] = listType MustEnableCloudbrain(ctx) repo := ctx.Repo.Repository page := ctx.QueryInt("page") if page <= 0 { page = 1 } - debugType := modelarts.DebugType + typeCloudBrain := models.TypeCloudBrainAll jobTypeNot := false - if debugListType == models.GPUResource { - debugType = models.TypeCloudBrainOne - } else if debugListType == models.NPUResource { - debugType = models.TypeCloudBrainTwo + if listType == models.GPUResource { + typeCloudBrain = models.TypeCloudBrainOne + } else if listType == models.NPUResource { + typeCloudBrain = models.TypeCloudBrainTwo + } else if listType == models.AllResource { + typeCloudBrain = models.TypeCloudBrainAll + } else { + log.Error("listType(%s) error", listType) + ctx.ServerError("listType error", errors.New("listType error")) + return } var jobTypes []string @@ -71,7 +77,7 @@ func DebugJobIndex(ctx *context.Context) { PageSize: setting.UI.IssuePagingNum, }, RepoID: repo.ID, - Type: debugType, + Type: typeCloudBrain, JobTypeNot: jobTypeNot, JobTypes: jobTypes, }) @@ -93,7 +99,7 @@ func DebugJobIndex(ctx *context.Context) { ctx.Data["Tasks"] = ciTasks ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) ctx.Data["RepoIsEmpty"] = repo.IsEmpty - ctx.Data["debugListType"] = debugListType + ctx.Data["debugListType"] = listType ctx.HTML(200, tplDebugJobIndex) } @@ -480,6 +486,26 @@ func TrainJobIndex(ctx *context.Context) { page = 1 } + listType := ctx.Query("listType") + if len(listType) == 0 { + listType = models.AllResource + } + ctx.Data["ListType"] = listType + + typeCloudBrain := models.TypeCloudBrainAll + if listType == models.GPUResource { + typeCloudBrain = models.TypeCloudBrainOne + } else if listType == models.NPUResource { + typeCloudBrain = models.TypeCloudBrainTwo + } else if listType == models.AllResource { + typeCloudBrain = models.TypeCloudBrainAll + } + //else { + // log.Error("listType(%s) error", listType) + // ctx.ServerError("listType error", errors.New("listType error")) + // return + //} + var jobTypes []string jobTypes = append(jobTypes, string(models.JobTypeTrain)) tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ @@ -488,7 +514,7 @@ func TrainJobIndex(ctx *context.Context) { PageSize: setting.UI.IssuePagingNum, }, RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, + Type: typeCloudBrain, JobTypeNot: false, JobTypes: jobTypes, IsLatestVersion: modelarts.IsLatestVersion, @@ -501,11 +527,16 @@ func TrainJobIndex(ctx *context.Context) { for i, task := range tasks { tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) - tasks[i].ComputeResource = models.NPUResource + if task.Cloudbrain.Type == models.TypeCloudBrainOne { + tasks[i].ComputeResource = models.GPUResource + } else if task.Cloudbrain.Type == models.TypeCloudBrainTwo { + tasks[i].ComputeResource = models.NPUResource + } } pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) + pager.AddParam(ctx, "listType", "ListType") ctx.Data["Page"] = pager ctx.Data["PageIsCloudBrain"] = true @@ -1555,6 +1586,7 @@ func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *model func TrainJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") + var listType = ctx.Query("listType") repo := ctx.Repo.Repository var jobTypes []string @@ -1596,12 +1628,13 @@ func TrainJobDel(ctx *context.Context) { if ctx.IsUserSiteAdmin() && isAdminPage == "true" { ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") } else { - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) } } func TrainJobStop(ctx *context.Context) { var jobID = ctx.Params(":jobid") + var listType = ctx.Query("listType") task := ctx.Cloudbrain _, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) @@ -1611,7 +1644,7 @@ func TrainJobStop(ctx *context.Context) { return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) } func canUserCreateTrainJob(uid int64) (bool, error) { @@ -2276,7 +2309,7 @@ func SetJobCount(ctx *context.Context) { repoId := ctx.Repo.Repository.ID _, jobCount, err := models.Cloudbrains(&models.CloudbrainsOptions{ RepoID: repoId, - Type: modelarts.DebugType, + Type: models.TypeCloudBrainAll, }) if err != nil { ctx.ServerError("Get job faild:", err) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index c1ff24216c..b15941dfcb 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1035,6 +1035,19 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainBenchmarkCreate) m.Get("/get_child_types", repo.GetChildTypes) }) + + m.Group("/train-job", func() { + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.CloudBrainTrainJobShow) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainTrainJobDel) + //m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels) + m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel) + //m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion) + //m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + }) + m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) + }) }, context.RepoRef()) m.Group("/modelmanage", func() { m.Post("/create_model", reqRepoModelManageWriter, repo.SaveModel) diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go old mode 100644 new mode 100755 index 98b0e0aa98..61f356a664 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 25, 26, 27, 28, 29, 30} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 25, 26, 27, 28, 29, 30, 31} type ClientsManager struct { Clients *orderedmap.OrderedMap diff --git a/templates/repo/cloudbrain/trainjob/new.tmpl b/templates/repo/cloudbrain/trainjob/new.tmpl new file mode 100755 index 0000000000..50fd00c0fc --- /dev/null +++ b/templates/repo/cloudbrain/trainjob/new.tmpl @@ -0,0 +1,451 @@ +{{template "base/head" .}} + + +
+
+
+
+
+
+
+
+
+
+ {{template "repo/header" .}} +
+ {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + +
+
+ + + {{.i18n.Tr "cloudbrain.job_name_rule"}} +
+ +
+ + +
+
+ +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+ + +
+ + +
+ + + +
+ + +
+ +
+ + + + + {{range .images}} + + {{end}} + {{range .public_images}} + + {{end}} + +
+ +
+ + {{if .bootFile}} + + {{else}} + + {{end}} + + + + 查看样例 +
+ +
+ + + 训练脚本存储在/code中,数据集存储在/dataset中,训练输出请存储在/model中以供后续下载。 +
+ +
+ + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ {{if .params}} + {{if ne 0 (len .params)}} + {{range $k ,$v := .params}} +
+
+ +
+
+ +
+ + + + +
+ {{end}} + {{end}} + {{end}} +
+
+ +
+ + +
+ +
+ + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ + + +
+
+
+
+{{template "base/footer" .}} + + \ No newline at end of file diff --git a/templates/repo/cloudbrain/trainjob/show.tmpl b/templates/repo/cloudbrain/trainjob/show.tmpl new file mode 100755 index 0000000000..18518f0152 --- /dev/null +++ b/templates/repo/cloudbrain/trainjob/show.tmpl @@ -0,0 +1,653 @@ +{{template "base/head" .}} + +
+
+
+
+
+
+
+
+
+
+{{template "repo/header" .}} +
+

+ +

+ {{range $k ,$v := .version_list_task}} +
+ +
+
+
+ + + +
+ {{TimeSinceUnix1 .CreatedUnix}} + + {{$.i18n.Tr "repo.modelarts.status"}}: + {{.Status}} + + {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}: + {{$.duration}} + +
+
+
+
+
+
+
+
+ +
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ {{$.i18n.Tr "repo.cloudbrain_task"}} + +
+ {{.DisplayJobName}} +
+
+ {{$.i18n.Tr "repo.modelarts.status"}} + +
+ {{.Status}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.start_time"}} + +
+ {{TimeSinceUnix1 .CreatedUnix}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} + +
+ {{$.duration}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.resource_type"}} + +
+ {{$.resource_type}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} + +
+ {{$.resource_spec}} +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ 镜像 + +
+ {{.Image}} +
+
+ {{$.i18n.Tr "repo.modelarts.code_version"}} + +
+ {{.BranchName}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.start_file"}} + +
+ {{.BootFile}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.train_dataset"}} + +
+ {{.DatasetName}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}} + +
+ {{.Parameters}} +
+
+ {{$.i18n.Tr "repo.modelarts.train_job.description"}} + +
+ {{.Description}} +
+
+
+
+
+ +
+
+
+
+ +
+ + +

+                            
+ +
+ +
+ +
+ + + +
+ +
+
+ +
+
+
+ {{end}} {{template "base/paginate" .}} +
+ +
+ +
+ + +
+{{template "base/footer" .}} + + \ No newline at end of file diff --git a/templates/repo/editor/upload.tmpl b/templates/repo/editor/upload.tmpl index 8466a6e9a0..7794c2a2d3 100644 --- a/templates/repo/editor/upload.tmpl +++ b/templates/repo/editor/upload.tmpl @@ -27,7 +27,7 @@
-
+
{{template "repo/editor/commit_form" .}} diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 19c6b38413..8166e0952b 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -39,8 +39,18 @@
+ {{if .Permission.CanWrite $.UnitTypeCloudBrain}} - {{$.i18n.Tr "repo.modelarts.train_job.new_train"}} + {{$.i18n.Tr "repo.modelarts.train_job.new_train"}} {{else}} {{$.i18n.Tr "repo.modelarts.train_job.new_train"}} {{end}} @@ -102,7 +112,7 @@
- + {{.DisplayJobName}} @@ -143,7 +153,7 @@
{{$.CsrfTokenHtml}} {{if .CanDel}} - + {{$.i18n.Tr "repo.stop"}} {{else}} @@ -154,7 +164,8 @@
-
+ + {{$.CsrfTokenHtml}} {{if .CanDel}} @@ -206,3 +217,27 @@
{{template "base/footer" .}} + + diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index f0ac98b17d..963250bcb4 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -78,6 +78,24 @@

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + +
diff --git a/templates/user/dashboard/feeds.tmpl b/templates/user/dashboard/feeds.tmpl old mode 100644 new mode 100755 index 9724de2697..614c328c67 --- a/templates/user/dashboard/feeds.tmpl +++ b/templates/user/dashboard/feeds.tmpl @@ -77,13 +77,15 @@ {{else if eq .GetOpType 26}} {{$.i18n.Tr "action.task_npudebugjob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 27}} - {{$.i18n.Tr "action.task_trainjob" .GetRepoLink .Content .RefName | Str2html}} + {{$.i18n.Tr "action.task_nputrainjob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 28}} {{$.i18n.Tr "action.task_inferencejob" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 29}} {{$.i18n.Tr "action.task_benchmark" .GetRepoLink .Content .RefName | Str2html}} {{else if eq .GetOpType 30}} {{$.i18n.Tr "action.task_createmodel" .GetRepoLink .RefName .RefName | Str2html}} + {{else if eq .GetOpType 31}} + {{$.i18n.Tr "action.task_gputrainjob" .GetRepoLink .Content .RefName | Str2html}} {{end}}

{{if or (eq .GetOpType 5) (eq .GetOpType 18)}} @@ -129,6 +131,8 @@ {{else if eq .GetOpType 30}} + {{else if eq .GetOpType 31}} + {{else}} {{svg (printf "octicon-%s" (ActionIcon .GetOpType)) 32}} {{end}}