#1722 GPU训练任务

Merged
ychao_1983 merged 44 commits from cb-train into V20220328 2 years ago
  1. +1
    -0
      models/action.go
  2. +8
    -0
      models/cloudbrain.go
  3. +0
    -5
      models/file_chunk.go
  4. +3
    -0
      modules/auth/cloudbrain.go
  5. +43
    -12
      modules/cloudbrain/cloudbrain.go
  6. +0
    -1
      modules/modelarts/modelarts.go
  7. +14
    -10
      modules/setting/setting.go
  8. +4
    -2
      options/locale/locale_en-US.ini
  9. +2
    -1
      options/locale/locale_zh-CN.ini
  10. +9
    -5
      public/home/home.js
  11. +3
    -3
      routers/admin/cloudbrains.go
  12. +10
    -2
      routers/api/v1/api.go
  13. +58
    -3
      routers/api/v1/repo/cloudbrain.go
  14. +54
    -19
      routers/api/v1/repo/modelarts.go
  15. +163
    -23
      routers/repo/cloudbrain.go
  16. +47
    -14
      routers/repo/modelarts.go
  17. +13
    -0
      routers/routes/routes.go
  18. +1
    -1
      services/socketwrap/clientManager.go
  19. +451
    -0
      templates/repo/cloudbrain/trainjob/new.tmpl
  20. +653
    -0
      templates/repo/cloudbrain/trainjob/show.tmpl
  21. +1
    -1
      templates/repo/editor/upload.tmpl
  22. +39
    -4
      templates/repo/modelarts/trainjob/index.tmpl
  23. +18
    -0
      templates/repo/modelarts/trainjob/new.tmpl
  24. +5
    -1
      templates/user/dashboard/feeds.tmpl

+ 1
- 0
models/action.go View File

@@ -57,6 +57,7 @@ const (
ActionCreateInferenceTask // 28
ActionCreateBenchMarkTask //29
ActionCreateNewModelTask //30
ActionCreateGPUTrainTask //31
)

// Action represents user operation type and other information to


+ 8
- 0
models/cloudbrain.go View File

@@ -20,9 +20,17 @@ type CloudbrainStatus string
type JobType string
type ModelArtsJobStatus string

const (
TypeCloudBrainOne int = iota
TypeCloudBrainTwo

TypeCloudBrainAll = -1
)

const (
NPUResource = "NPU"
GPUResource = "CPU/GPU"
AllResource = "all"

//notebook storage category
EVSCategory = "EVS"


+ 0
- 5
models/file_chunk.go View File

@@ -13,11 +13,6 @@ const (
FileUploaded
)

const (
TypeCloudBrainOne int = iota
TypeCloudBrainTwo
)

type FileChunk struct {
ID int64 `xorm:"pk autoincr"`
UUID string `xorm:"uuid UNIQUE"`


+ 3
- 0
modules/auth/cloudbrain.go View File

@@ -20,6 +20,9 @@ type CreateCloudBrainForm struct {
ResourceSpecId int `form:"resource_spec_id" binding:"Required"`
BenchmarkTypeID int `form:"benchmark_types_id"`
BenchmarkChildTypeID int `form:"benchmark_child_types_id"`
BootFile string `form:"boot_file"`
Params string `form:"run_para_list"`
BranchName string `form:"branch_name"`
}

type CommitImageCloudBrainForm struct {


+ 43
- 12
modules/cloudbrain/cloudbrain.go View File

@@ -15,14 +15,13 @@ import (
)

const (
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;
service ssh stop;
jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"
LogFile = "log.txt"
BenchMarkMountPath = "/benchmark"
BenchMarkResourceID = 1
Snn4imagenetMountPath = "/snn4imagenet"
@@ -32,10 +31,13 @@ const (
SubTaskName = "task1"

Success = "S000"

DefaultBranchName = "master"
)

var (
ResourceSpecs *models.ResourceSpecs
ResourceSpecs *models.ResourceSpecs
TrainResourceSpecs *models.ResourceSpecs
)

func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool {
@@ -147,7 +149,7 @@ func AdminOrJobCreaterRightForTrain(ctx *context.Context) {

}

func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error {
func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description, branchName, bootFile, params string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error {
dataActualPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
@@ -155,13 +157,25 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
uuid

var resourceSpec *models.ResourceSpec
if ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
}

for _, spec := range ResourceSpecs.ResourceSpec {
if resourceSpecId == spec.Id {
resourceSpec = spec
var versionCount int
if jobType == string(models.JobTypeTrain) {
versionCount = 1
if TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
}
for _, spec := range TrainResourceSpecs.ResourceSpec {
if resourceSpecId == spec.Id {
resourceSpec = spec
}
}
} else {
if ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
}
for _, spec := range ResourceSpecs.ResourceSpec {
if resourceSpecId == spec.Id {
resourceSpec = spec
}
}

}
@@ -171,6 +185,15 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
return errors.New("no such resourceSpec")
}

var datasetName string
attach, err := models.GetAttachmentByUUID(uuid)
if err != nil {
//for benchmark, do not return error
log.Error("GetAttachmentByUUID failed:%v", err)
} else {
datasetName = attach.Name
}

jobResult, err := CreateJob(jobName, models.CreateJobParams{
JobName: jobName,
RetryCount: 1,
@@ -265,6 +288,12 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,
BenchmarkTypeID: benchmarkTypeID,
BenchmarkChildTypeID: benchmarkChildTypeID,
Description: description,
IsLatestVersion: "1",
VersionCount: versionCount,
BranchName: branchName,
BootFile: bootFile,
DatasetName: datasetName,
Parameters: params,
})

if err != nil {
@@ -280,6 +309,8 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command,

if string(models.JobTypeBenchmark) == jobType {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask)
} else if string(models.JobTypeTrain) == jobType {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, displayJobName, models.ActionCreateGPUTrainTask)
} else {
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask)
}


+ 0
- 1
modules/modelarts/modelarts.go View File

@@ -56,7 +56,6 @@ const (
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
DebugType = -1
VersionCount = 1

SortByCreateTime = "create_time"


+ 14
- 10
modules/setting/setting.go View File

@@ -452,16 +452,18 @@ var (
DecompressOBSTaskName string

//cloudbrain config
CBAuthUser string
CBAuthPassword string
RestServerHost string
JobPath string
CBCodePathPrefix string
JobType string
GpuTypes string
DebugServerHost string
ResourceSpecs string
MaxDuration int64
CBAuthUser string
CBAuthPassword string
RestServerHost string
JobPath string
CBCodePathPrefix string
JobType string
GpuTypes string
DebugServerHost string
ResourceSpecs string
MaxDuration int64
TrainGpuTypes string
TrainResourceSpecs string

//benchmark config
IsBenchmarkEnabled bool
@@ -1285,6 +1287,8 @@ func NewContext() {
GpuTypes = sec.Key("GPU_TYPES").MustString("")
ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("")
MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400)
TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("")
TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("")

sec = Cfg.Section("benchmark")
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false)


+ 4
- 2
options/locale/locale_en-US.ini View File

@@ -1010,7 +1010,8 @@ modelarts.train_job.parameter_value=Parameter Value
modelarts.train_job.resource_setting=resource_setting
modelarts.train_job.resource_setting_info=resource_setting_info
modelarts.train_job.resource_pool=resource_pool
modelarts.train_job.resource_type=resource_type
modelarts.train_job.resource_type=Resource Type
modelarts.train_job.train_dataset=Train Dataset
modelarts.train_job.standard=Standard
modelarts.train_job.NAS_address=NAS Address
modelarts.train_job.NAS_mount_path=NAS Mount Path
@@ -2790,10 +2791,11 @@ reject_pull_request = `suggested changes for <a href="%s/pulls/%s">%s#%[2]s</a>`
upload_dataset=`upload dataset <a href="%s/datasets?type=%s">%s</a>`
task_gpudebugjob=`created CPU/GPU type debugging task<a href="%s/cloudbrain/%s">%s</a>`
task_npudebugjob=`created NPU type debugging task <a href="%s/modelarts/notebook/%s">%s</a>`
task_trainjob=`created training task<a href="%s/modelarts/train-job/%s">%s</a>`
task_nputrainjob=`created NPU training task<a href="%s/modelarts/train-job/%s">%s</a>`
task_inferencejob=`created reasoning task <a href="%s/modelarts/inference-job/%s">%s</a>`
task_benchmark=`created profiling task <a href="%s/cloudbrain/benchmark/%s">%s</a>`
task_createmodel=`created new model <a href="%s/modelmanage/show_model_info?name=%s">%s</a>`
task_gputrainjob=`created CPU/GPU training task<a href="%s/cloudbrain/train-job/%s">%s</a>`

[tool]
ago = %s ago


+ 2
- 1
options/locale/locale_zh-CN.ini View File

@@ -2797,10 +2797,11 @@ reject_pull_request=`建议变更 <a href="%s/pulls/%s">%s#%[2]s</a>`
upload_dataset=`上传了数据集文件 <a href="%s/datasets?type=%s">%s</a>`
task_gpudebugjob=`创建了CPU/GPU类型调试任务 <a href="%s/cloudbrain/%s">%s</a>`
task_npudebugjob=`创建了NPU类型调试任务 <a href="%s/modelarts/notebook/%s">%s</a>`
task_trainjob=`创建了训练任务 <a href="%s/modelarts/train-job/%s">%s</a>`
task_nputrainjob=`创建了NPU类型训练任务 <a href="%s/modelarts/train-job/%s">%s</a>`
task_inferencejob=`创建了推理任务 <a href="%s/modelarts/inference-job/%s">%s</a>`
task_benchmark=`创建了评测任务 <a href="%s/cloudbrain/benchmark/%s">%s</a>`
task_createmodel=`导入了新模型 <a href="%s/modelmanage/show_model_info?name=%s">%s</a>`
task_gputrainjob=`创建了CPU/GPU类型训练任务 <a href="%s/cloudbrain/train-job/%s">%s</a>`

[tool]
ago=%s前


+ 9
- 5
public/home/home.js View File

@@ -135,7 +135,7 @@ socket.onmessage = function (e) {
html += recordPrefix + actionName;
html += " <a href=\"" + getRepoLink(record) + "\" rel=\"nofollow\">" + getRepotext(record) + "</a>"
}
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30"){
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" || record.OpType == "31"){
html += recordPrefix + actionName;
html += " <a href=\"" + getTaskLink(record) + "\" rel=\"nofollow\">" + record.RefName + "</a>"
}
@@ -175,6 +175,8 @@ function getTaskLink(record){
re = re + "/cloudbrain/benchmark/" + record.Content;
}else if(record.OpType == 30){
re = re + "/modelmanage/show_model_info?name=" + record.RefName;
}else if(record.OpType == 31){
re = re + "/cloudbrain/train-job/" + record.Content;
}
re = encodeURI(re);
return re;
@@ -321,10 +323,11 @@ var actionNameZH={
"24":"上传了数据集文件",
"25":"创建了CPU/GPU类型调试任务",
"26":"创建了NPU类型调试任务",
"27":"创建了训练任务",
"27":"创建了NPU类型训练任务",
"28":"创建了推理任务",
"29":"创建了评测任务",
"30":"导入了新模型"
"30":"导入了新模型",
"31":"创建了CPU/GPU类型训练任务"
};

var actionNameEN={
@@ -346,10 +349,11 @@ var actionNameEN={
"24":" upload dataset ",
"25":" created CPU/GPU type debugging task ",
"26":" created NPU type debugging task ",
"27":" created training task",
"27":" created NPU type training task",
"28":" created reasoning task",
"29":" created profiling task",
"30":" created new model"
"30":" created new model",
"31":" created CPU/GPU type training task",
};

var repoAndOrgZH={


+ 3
- 3
routers/admin/cloudbrains.go View File

@@ -41,7 +41,7 @@ func CloudBrains(ctx *context.Context) {
if page <= 0 {
page = 1
}
debugType := modelarts.DebugType
debugType := models.TypeCloudBrainAll
if listType == models.GPUResource {
debugType = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
@@ -121,7 +121,7 @@ func DownloadCloudBrains(ctx *context.Context) {
Page: page,
PageSize: 1,
},
Type: modelarts.DebugType,
Type: models.TypeCloudBrainAll,
NeedRepoInfo: false,
IsLatestVersion: modelarts.IsLatestVersion,
})
@@ -151,7 +151,7 @@ func DownloadCloudBrains(ctx *context.Context) {
Page: page,
PageSize: pageSize,
},
Type: modelarts.DebugType,
Type: models.TypeCloudBrainAll,
NeedRepoInfo: true,
IsLatestVersion: modelarts.IsLatestVersion,
})


+ 10
- 2
routers/api/v1/api.go View File

@@ -62,10 +62,10 @@ import (
"net/http"
"strings"

"code.gitea.io/gitea/routers/authentication"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth"

"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
@@ -77,6 +77,7 @@ import (
"code.gitea.io/gitea/routers/api/v1/repo"
_ "code.gitea.io/gitea/routers/api/v1/swagger" // for swagger generation
"code.gitea.io/gitea/routers/api/v1/user"
"code.gitea.io/gitea/routers/authentication"
repo_ext "code.gitea.io/gitea/routers/repo"

"gitea.com/macaron/binding"
@@ -882,6 +883,13 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/cloudbrain", func() {
m.Get("/:id", repo.GetCloudbrainTask)
m.Get("/:id/log", repo.CloudbrainGetLog)
m.Group("/train-job", func() {
m.Group("/:jobid", func() {
m.Get("", repo.GetModelArtsTrainJobVersion)
m.Get("/model_list", repo.CloudBrainModelList)
m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.CloudBrainStop)
})
})
}, reqRepoReader(models.UnitTypeCloudBrain))
m.Group("/modelarts", func() {
m.Group("/notebook", func() {


+ 58
- 3
routers/api/v1/repo/cloudbrain.go View File

@@ -6,16 +6,19 @@
package repo

import (
"code.gitea.io/gitea/modules/timeutil"
"encoding/json"
"net/http"
"sort"
"strings"
"time"

"code.gitea.io/gitea/modules/log"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
routerRepo "code.gitea.io/gitea/routers/repo"
)

// cloudbrain get job task by jobid
@@ -161,3 +164,55 @@ func CloudbrainGetLog(ctx *context.Context) {

return
}

func CloudBrainModelList(ctx *context.APIContext) {
var (
err error
)

var jobID = ctx.Params(":jobid")
var versionName = ctx.Query("version_name")
parentDir := ctx.Query("parentDir")
dirArray := strings.Split(parentDir, "/")

task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
return
}

//get dirs
dirs, err := routerRepo.GetModelDirs(task.JobName, parentDir)
if err != nil {
log.Error("GetModelDirs failed:%v", err.Error(), ctx.Data["msgID"])
ctx.ServerError("GetModelDirs failed:", err)
return
}

var fileInfos []storage.FileInfo
err = json.Unmarshal([]byte(dirs), &fileInfos)
if err != nil {
log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
ctx.ServerError("json.Unmarshal failed:", err)
return
}

for i, fileInfo := range fileInfos {
temp, _ := time.Parse("2006-01-02 15:04:05", fileInfo.ModTime)
fileInfos[i].ModTime = temp.Local().Format("2006-01-02 15:04:05")
}

sort.Slice(fileInfos, func(i, j int) bool {
return fileInfos[i].ModTime > fileInfos[j].ModTime
})

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"VersionName": versionName,
"StatusOK": 0,
"Path": dirArray,
"Dirs": fileInfos,
"task": task,
"PageIsCloudBrain": true,
})
}

+ 54
- 19
routers/api/v1/repo/modelarts.go View File

@@ -6,16 +6,17 @@
package repo

import (
"code.gitea.io/gitea/modules/timeutil"
"net/http"
"strconv"
"strings"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
routerRepo "code.gitea.io/gitea/routers/repo"
)

@@ -133,27 +134,61 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
ctx.NotFound(err)
return
}
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
if err != nil {
ctx.NotFound(err)
return
}
if job.StartTime == 0 && result.StartTime > 0 {
job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
job.Duration = result.Duration / 1000
job.TrainJobDuration = result.TrainJobDuration

job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
if job.Type == models.TypeCloudBrainOne {
jobResult, err := cloudbrain.GetJob(job.JobID)
if err != nil {
ctx.NotFound(err)
log.Error("GetJob failed:", err)
return
}
result, err := models.ConvertToJobResultPayload(jobResult.Payload)
if err != nil {
ctx.NotFound(err)
log.Error("ConvertToJobResultPayload failed:", err)
return
}

if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
job.EndTime = job.StartTime.Add(job.Duration)
}
job.Status = result.JobStatus.State
if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
taskRoles := result.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))

err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
job.ContainerID = taskRes.TaskStatuses[0].ContainerID
job.Status = taskRes.TaskStatuses[0].State
}

if result.JobStatus.State != string(models.JobWaiting) {
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}
}
} else {
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
if err != nil {
ctx.NotFound(err)
return
}

if job.StartTime == 0 && result.StartTime > 0 {
job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
job.Duration = result.Duration / 1000
job.TrainJobDuration = result.TrainJobDuration

job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)

if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
job.EndTime = job.StartTime.Add(job.Duration)
}

err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}
}

ctx.JSON(http.StatusOK, map[string]interface{}{


+ 163
- 23
routers/repo/cloudbrain.go View File

@@ -37,6 +37,9 @@ const (
tplCloudBrainBenchmarkIndex base.TplName = "repo/cloudbrain/benchmark/index"
tplCloudBrainBenchmarkNew base.TplName = "repo/cloudbrain/benchmark/new"
tplCloudBrainBenchmarkShow base.TplName = "repo/cloudbrain/benchmark/show"

tplCloudBrainTrainJobNew base.TplName = "repo/cloudbrain/trainjob/new"
tplCloudBrainTrainJobShow base.TplName = "repo/cloudbrain/trainjob/show"
)

var (
@@ -45,6 +48,7 @@ var (
benchmarkTypes *models.BenchmarkTypes
benchmarkGpuInfos *models.GpuInfos
benchmarkResourceSpecs *models.ResourceSpecs
trainGpuInfos *models.GpuInfos
)

const BENCHMARK_TYPE_CODE = "repo.cloudbrain.benchmark.types"
@@ -143,6 +147,11 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
}
ctx.Data["gpu_types"] = gpuInfos.GpuInfo

if trainGpuInfos == nil {
json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos)
}
ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo

if benchmarkGpuInfos == nil {
json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos)
}
@@ -157,6 +166,14 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error {
json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
}
ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec

if cloudbrain.TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
}
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec
ctx.Data["params"] = ""
ctx.Data["branchName"] = ctx.Repo.BranchName

ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath
ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled

@@ -184,38 +201,52 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
image := form.Image
uuid := form.Attachment
jobType := form.JobType
command := cloudbrain.Command
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName
repo := ctx.Repo.Repository

tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
tpl := tplCloudBrainNew
command := cloudbrain.Command
if jobType == string(models.JobTypeTrain) {
tpl = tplCloudBrainTrainJobNew
commandTrain, err := getTrainJobCommand(form)
if err != nil {
log.Error("getTrainJobCommand failed: %v", err)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}

command = commandTrain
}

tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
if err == nil {
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplCloudBrainNew, &form)
ctx.RenderWithErr("the job name did already exist", tpl, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplCloudBrainNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
}
}

if !jobNamePattern.MatchString(displayJobName) {
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainNew, &form)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return
}

if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) {
if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) && jobType != string(models.JobTypeTrain) {
log.Error("jobtype error:", jobType, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("jobtype error", tplCloudBrainNew, &form)
ctx.RenderWithErr("jobtype error", tpl, &form)
return
}

@@ -223,18 +254,21 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
if err != nil {
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplCloudBrainNew, &form)
ctx.RenderWithErr("system error", tpl, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplCloudBrainNew, &form)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return
}
}

downloadCode(repo, codePath)
if branchName == "" {
branchName = cloudbrain.DefaultBranchName
}
downloadCode(repo, codePath, branchName)
uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/")

modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
@@ -268,15 +302,19 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description,
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, branchName, form.BootFile, form.Params,
0, 0, resourceSpecId)
if err != nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tplCloudBrainNew, &form)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}

ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
if jobType == string(models.JobTypeTrain) {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=all")
} else {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
}
}

func CloudBrainRestart(ctx *context.Context) {
@@ -342,18 +380,29 @@ func CloudBrainRestart(ctx *context.Context) {
}

func CloudBrainBenchMarkShow(ctx *context.Context) {
cloudBrainShow(ctx, tplCloudBrainBenchmarkShow)
cloudBrainShow(ctx, tplCloudBrainBenchmarkShow, models.JobTypeBenchmark)
}

func CloudBrainShow(ctx *context.Context) {
cloudBrainShow(ctx, tplCloudBrainShow)
cloudBrainShow(ctx, tplCloudBrainShow, models.JobTypeDebug)
}

func CloudBrainTrainJobShow(ctx *context.Context) {
cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain)
}

func cloudBrainShow(ctx *context.Context, tpName base.TplName) {
func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.JobType) {
ctx.Data["PageIsCloudBrain"] = true
var ID = ctx.Params(":id")
debugListType := ctx.Query("debugListType")
task, err := models.GetCloudbrainByID(ID)

var task *models.Cloudbrain
var err error
if jobType == models.JobTypeTrain {
task, err = models.GetCloudbrainByJobID(ctx.Params(":jobid"))
} else {
task, err = models.GetCloudbrainByID(ctx.Params(":id"))
}

if err != nil {
log.Info("error:" + err.Error())
ctx.Data["error"] = err.Error()
@@ -368,6 +417,16 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) {
jobRes.Resource.Memory = strings.ReplaceAll(jobRes.Resource.Memory, "Mi", "MB")
spec := "GPU数:" + strconv.Itoa(jobRes.Resource.NvidiaComGpu) + ",CPU数:" + strconv.Itoa(jobRes.Resource.CPU) + ",内存(MB):" + jobRes.Resource.Memory
ctx.Data["resource_spec"] = spec
if task.JobType == string(models.JobTypeTrain) {
if trainGpuInfos == nil {
json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos)
}
for _, resourceType := range trainGpuInfos.GpuInfo {
if resourceType.Queue == jobRes.Config.GpuType {
ctx.Data["resource_type"] = resourceType.Value
}
}
}
taskRoles := jobRes.TaskRoles
if jobRes.JobStatus.State != string(models.JobFailed) {

@@ -431,6 +490,29 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) {
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
}
ctx.Data["duration"] = task.TrainJobDuration

if len(task.Parameters) > 0 {
var parameters models.Parameters

err := json.Unmarshal([]byte(task.Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err)
task.Parameters = ""
} else {
if len(parameters.Parameter) > 0 {
paramTemp := ""
for _, Parameter := range parameters.Parameter {
param := Parameter.Label + " = " + Parameter.Value + "; "
paramTemp = paramTemp + param
}
task.Parameters = paramTemp[:len(paramTemp)-2]
} else {
task.Parameters = ""
}
}

}

ctx.Data["task"] = task
ctx.Data["jobName"] = task.JobName
ctx.Data["displayJobName"] = task.DisplayJobName
@@ -438,6 +520,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) {
version_list_task = append(version_list_task, task)
ctx.Data["version_list_task"] = version_list_task
ctx.Data["debugListType"] = debugListType
ctx.Data["canDownload"] = cloudbrain.CanDeleteJob(ctx, task)
ctx.HTML(200, tpName)
}

@@ -509,11 +592,12 @@ func CloudBrainStop(ctx *context.Context) {
break
}

ctx.JSON(200, map[string]string{
ctx.JSON(200, map[string]interface{}{
"result_code": resultCode,
"error_msg": errorMsg,
"status": status,
"id": ID,
"StatusOK": 0,
})
}

@@ -765,8 +849,8 @@ func GetRate(ctx *context.Context) {
}
}

func downloadCode(repo *models.Repository, codePath string) error {
if err := git.Clone(repo.RepoPath(), codePath, git.CloneRepoOptions{}); err != nil {
func downloadCode(repo *models.Repository, codePath, branchName string) error {
if err := git.Clone(repo.RepoPath(), codePath, git.CloneRepoOptions{Branch: branchName}); err != nil {
log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
return err
}
@@ -1471,7 +1555,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF
}

os.RemoveAll(codePath)
if err := downloadCode(repo, codePath); err != nil {
if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil {
log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form)
@@ -1536,7 +1620,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF
err = cloudbrain.GenerateTask(ctx, displayJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description,
storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, cloudbrain.DefaultBranchName, "", "",
benchmarkTypeID, benchmarkChildTypeID, resourceSpecId)
if err != nil {
cloudBrainNewDataPrepare(ctx)
@@ -1562,10 +1646,66 @@ func BenchmarkDel(ctx *context.Context) {
}
}

func CloudBrainTrainJobNew(ctx *context.Context) {
err := cloudBrainNewDataPrepare(ctx)
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}
ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew)
}

func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) {
var command string
bootFile := form.BootFile
params := form.Params

if !strings.HasSuffix(bootFile, ".py") {
log.Error("bootFile(%s) format error", bootFile)
return command, errors.New("bootFile format error")
}

var parameters models.Parameters
var param string
if len(params) != 0 {
err := json.Unmarshal([]byte(params), &parameters)
if err != nil {
log.Error("Failed to Unmarshal params: %s (%v)", params, err)
return command, err
}

for _, parameter := range parameters.Parameter {
param += " --" + parameter.Label + "=" + parameter.Value
}
}

command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile

return command, nil
}

func CloudBrainTrainJobDel(ctx *context.Context) {
var listType = ctx.Query("listType")
if err := deleteCloudbrainJob(ctx); err != nil {
log.Error("deleteCloudbrainJob failed: %v", err, ctx.Data["msgID"])
ctx.ServerError(err.Error(), err)
return
}

var isAdminPage = ctx.Query("isadminpage")
if ctx.IsUserSiteAdmin() && isAdminPage == "true" {
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains")
} else {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}
}

func GetBenchmarkTypes(ctx *context.Context) *models.BenchmarkTypes {
var lang = ctx.Locale.Language()
if benchmarkTypesMap[lang] == nil {
var val = i18n.Tr(lang, BENCHMARK_TYPE_CODE)
//use config
val = setting.BenchmarkTypes
var tempType *models.BenchmarkTypes
if err := json.Unmarshal([]byte(val), &tempType); err != nil {
log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", val, err, ctx.Data["MsgID"])


+ 47
- 14
routers/repo/modelarts.go View File

@@ -47,20 +47,26 @@ const (
)

func DebugJobIndex(ctx *context.Context) {
debugListType := ctx.Query("debugListType")
ctx.Data["ListType"] = debugListType
listType := ctx.Query("debugListType")
ctx.Data["ListType"] = listType
MustEnableCloudbrain(ctx)
repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}
debugType := modelarts.DebugType
typeCloudBrain := models.TypeCloudBrainAll
jobTypeNot := false
if debugListType == models.GPUResource {
debugType = models.TypeCloudBrainOne
} else if debugListType == models.NPUResource {
debugType = models.TypeCloudBrainTwo
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
} else {
log.Error("listType(%s) error", listType)
ctx.ServerError("listType error", errors.New("listType error"))
return
}

var jobTypes []string
@@ -71,7 +77,7 @@ func DebugJobIndex(ctx *context.Context) {
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: debugType,
Type: typeCloudBrain,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
})
@@ -93,7 +99,7 @@ func DebugJobIndex(ctx *context.Context) {
ctx.Data["Tasks"] = ciTasks
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx)
ctx.Data["RepoIsEmpty"] = repo.IsEmpty
ctx.Data["debugListType"] = debugListType
ctx.Data["debugListType"] = listType
ctx.HTML(200, tplDebugJobIndex)
}

@@ -480,6 +486,26 @@ func TrainJobIndex(ctx *context.Context) {
page = 1
}

listType := ctx.Query("listType")
if len(listType) == 0 {
listType = models.AllResource
}
ctx.Data["ListType"] = listType

typeCloudBrain := models.TypeCloudBrainAll
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
}
//else {
// log.Error("listType(%s) error", listType)
// ctx.ServerError("listType error", errors.New("listType error"))
// return
//}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
@@ -488,7 +514,7 @@ func TrainJobIndex(ctx *context.Context) {
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
Type: typeCloudBrain,
JobTypeNot: false,
JobTypes: jobTypes,
IsLatestVersion: modelarts.IsLatestVersion,
@@ -501,11 +527,16 @@ func TrainJobIndex(ctx *context.Context) {
for i, task := range tasks {
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
tasks[i].ComputeResource = models.NPUResource
if task.Cloudbrain.Type == models.TypeCloudBrainOne {
tasks[i].ComputeResource = models.GPUResource
} else if task.Cloudbrain.Type == models.TypeCloudBrainTwo {
tasks[i].ComputeResource = models.NPUResource
}
}

pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
pager.SetDefaultParams(ctx)
pager.AddParam(ctx, "listType", "ListType")
ctx.Data["Page"] = pager

ctx.Data["PageIsCloudBrain"] = true
@@ -1555,6 +1586,7 @@ func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *model

func TrainJobDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
var listType = ctx.Query("listType")
repo := ctx.Repo.Repository

var jobTypes []string
@@ -1596,12 +1628,13 @@ func TrainJobDel(ctx *context.Context) {
if ctx.IsUserSiteAdmin() && isAdminPage == "true" {
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains")
} else {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}
}

func TrainJobStop(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
var listType = ctx.Query("listType")
task := ctx.Cloudbrain

_, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
@@ -1611,7 +1644,7 @@ func TrainJobStop(ctx *context.Context) {
return
}

ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}

func canUserCreateTrainJob(uid int64) (bool, error) {
@@ -2276,7 +2309,7 @@ func SetJobCount(ctx *context.Context) {
repoId := ctx.Repo.Repository.ID
_, jobCount, err := models.Cloudbrains(&models.CloudbrainsOptions{
RepoID: repoId,
Type: modelarts.DebugType,
Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Get job faild:", err)


+ 13
- 0
routers/routes/routes.go View File

@@ -1035,6 +1035,19 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainBenchmarkCreate)
m.Get("/get_child_types", repo.GetChildTypes)
})

m.Group("/train-job", func() {
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.CloudBrainTrainJobShow)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainTrainJobDel)
//m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels)
m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel)
//m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion)
//m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion)
})
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew)
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate)
})
}, context.RepoRef())
m.Group("/modelmanage", func() {
m.Post("/create_model", reqRepoModelManageWriter, repo.SaveModel)


+ 1
- 1
services/socketwrap/clientManager.go View File

@@ -10,7 +10,7 @@ import (
"github.com/elliotchance/orderedmap"
)

var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 25, 26, 27, 28, 29, 30}
var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 25, 26, 27, 28, 29, 30, 31}

type ClientsManager struct {
Clients *orderedmap.OrderedMap


+ 451
- 0
templates/repo/cloudbrain/trainjob/new.tmpl View File

@@ -0,0 +1,451 @@
{{template "base/head" .}}
<style>

.unite{
font-family: SourceHanSansSC-medium !important;
color: rgba(16, 16, 16, 100) !important;
}

.title{
font-size: 16px !important;
padding-left: 3rem !important;
}
.min_title{
font-size: 14px !important;
padding-left: 6rem !important;
margin-bottom: 2rem !important;

}
.width{
width:100% !important;
}
.width80{
width: 80.7% !important;
margin-left: 10px;
}
.width806{
width: 80.6% !important;
margin-left: -2px;
}
.width85{
width: 85% !important;
margin-left: 4.5rem !important;
}
.width81{
margin-left: 1.5rem !important;
width: 81% !important;
}

.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.min{
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}

</style>
<!-- <div class="ui page dimmer">
<div class="ui text loader">{{.i18n.Tr "loading"}}</div>
</div> -->
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_names" value="">
<input type="hidden" id="ai_flaver_name" name="flaver_names" value="">
<h4 class="unite title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
</div>
</div>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="64">
<span class="tooltips" style="display: block;">{{.i18n.Tr "cloudbrain.job_name_rule"}}</span>
</div>
<div class="unite min_title inline field">
<label style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}&nbsp;&nbsp;</label>
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
</div>
<div class="ui divider"></div>

<h4 class="unite title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>

<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>

<div class="inline required field" style="display: none;">
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label>
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px' name="job_type">
<option name="job_type" value="TRAIN">TRAIN</option>
</select>
</div>

<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.gpu_type"}}</label>
<select id="cloudbrain_gpu_type" class="ui search width806 dropdown" placeholder="选择GPU类型" style='width:385px' name="gpu_type">
{{range .train_gpu_types}}
<option value="{{.Queue}}">{{.Value}}</option>
{{end}}
</select>
</div>

<div class="required unite min_title inline field" style="position: relative;">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}&nbsp;</label>
<input class="width81" type="text" list="cloudbrain_image" placeholder="{{.i18n.Tr "cloudbrain.choose_mirror"}}" name="image" required autofocus maxlength="255">
<i class="times circle outline icon icons" style="visibility: hidden;" onclick="clearValue()"></i>
<datalist class="ui search" id="cloudbrain_image" name="image">
{{range .images}}
<option name="image" value="{{.Place}}">{{.PlaceView}}</option>
{{end}}
{{range .public_images}}
<option name="image" value="{{.Place}}">{{.PlaceView}}</option>
{{end}}
</datalist>
</div>

<div class="inline unite min_title field required">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .bootFile}}
<input style="width: 33.5%;" name="boot_file" id="trainjob_boot_file" value="{{.bootFile}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 33.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://git.openi.org.cn/OpenIOSSG/MINIST_Example" target="_blank">查看样例</a>
</div>

<div class="required unite min_title inline field" style="position: relative;">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.dataset"}}</label>
<select id="cloudbrain_dataset" class="ui search dropdown width80" placeholder="选择数据集" style='width:385px' name="attachment" required>

{{range .attachments}}
<option name="attachment" value="{{.UUID}}">{{.Attachment.Name}}</option>
{{end}}
</select>
<span class="tooltips">训练脚本存储在/code中,数据集存储在/dataset中,训练输出请存储在/model中以供后续下载。</span>
</div>
<div class="inline unite min_title field">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;">
{{if .params}}
{{if ne 0 (len .params)}}
{{range $k ,$v := .params}}
<div class="two fields width85" id="para{{$k}}">
<div class="field">
<input type="text" name="shipping_first-name" value={{$v.Label}} required>
</div>
<div class="field">
<input type="text" name="shipping_last-name" value={{$v.Value}} required>
</div>
<span>
<i class="trash icon"></i>
</span>

</div>
{{end}}
{{end}}
{{end}}
</div>
</div>

<div class="required inline unite min_title field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="cloudbrain_resource_spec" class="ui search dropdown" placeholder="选择资源规格" style='width:385px' name="resource_spec_id">
{{range .train_resource_specs}}
<option name="resource_spec_id" value="{{.Id}}">GPU数:{{.GpuNum}},CPU数:{{.CpuNum}},内存(MB):{{.MemMiB}},共享内存(MB):{{.ShareMemMiB}}</option>
{{end}}
</select>
</div>
<div class="inline unite min_title field">
<button class="ui create_train_job green button">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>
<!-- 模态框 -->
</form>
</div>
</div>
</div>
{{template "base/footer" .}}

<script>
//let url_href = window.location.pathname.split('create')[0]
//$(".ui.button").attr('href',url_href)
$('select.dropdown')
.dropdown();

$('.menu .item')
.tab();

let sever_num = $('#trainjob_work_server_num')
$('.add').click(function(){
sever_num.val(parseInt(sever_num.val())+1)
if(sever_num.val()>=26){
sever_num.val(parseInt(sever_num.val())-1)
}
})
$('.min').click(function(){
sever_num.val(parseInt(sever_num.val())-1)
if(sever_num.val()<=0){
sever_num.val(parseInt(sever_num.val())+1)
}
})
// 参数增加、删除、修改、保存
function Add_parameter(i){
value = '<div class="two fields width85" id= "para'+ i +'">' +
'<div class="field">' +
'<input type="text" name="shipping_first-name" required placeholder={{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}> ' +
'</div> ' +
'<div class="field"> ' +
'<input type="text" name="shipping_last-name" required placeholder={{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}>' +
'</div>'+
'<span>' +
'<i class="trash icon">' +
'</i>' +
'</span>' +
'</div>'
$(".dynamic.field").append(value)
}

$('#add_run_para').click(function(){
var len = $(".dynamic.field .two.fields").length
Add_parameter(len)
});

$(".dynamic.field").on("click",".trash.icon", function() {
var index = $(this).parent().parent().index()
$(this).parent().parent().remove()
var len = $(".dynamic.field .two.fields").length
$(".dynamic.field .two.fields").each(function(){
var cur_index = $(this).index()
$(this).attr('id', 'para' + cur_index)
})
});

$('.ui.parameter.green.button').click(function(){
var parameters = [];
$('table tr').each(function() {
$(this).find('td:eq(1)').each(function(){
parameters.push($(this).text());
})
$(this).find('input').each(function(){
parameters.push($(this).text())
})
});
$('.ui.parameter.modal')
.modal('hide');
for(var i = 2; i < parameters.length; i++){
switch(i) {
// 数据集uuid待完成
// case (2):
// console.log(1)
// break;
// $("#trainjob_datasets").val(parameters[i]);
// console.log($("#trainjob_datasets").val())
case (3):
$("input[name='boot_file']").val(parameters[i]);
break;
case (4):
var para = parameters[i].split(" ")
for(var j = 0; j < para.length; j++){
var para_name = para[j].split('=')[0]
var para_value = para[j].split('=')[1]
var len = $(".dynamic.field .two.fields").length
Add_parameter(len)
var pid = 'para' + len
$(".dynamic.field"+ " #" + pid + "").find("input[name=shipping_first-name]").val(para_name)
$(".dynamic.field"+ " #" + pid + "").find("input[name=shipping_last-name]").val(para_value)
}
break;
// 数据集pool_id待完成
// case (5):
// $("select[name='pool_id']").val(parameters[i]);
// break;
case (6):
$("input[name='work_server_number']").val(parameters[i]);
break;
}
}
})

$('.ui.save.checkbox').click(function(){
$(this).checkbox({
onChange: function(){
if ($('.ui.save.checkbox').checkbox('is checked')){
$('#save_para').removeClass("disabled")
}else{
$('#save_para').addClass("disabled")
}
}
});
})

$('.question.circle.icon').hover(function(){
$(this).popup('show')
});

$(".item.active.parameter_config").click(function(){
$('.ui.parameter.modal')
.modal('setting', 'closable', false)
.modal('show');
})

$('.ui.deny.button').click(function(){
$('.ui.parameter.modal')
.modal('hide');
})
$('select.dropdown')
.dropdown();

function validate(){
$('.ui.form')
.form({
on: 'blur',
fields: {
boot_file: {
identifier : 'boot_file',
rules: [
{
type: 'regExp[/.+\.py$/g]',
}
]
},
display_job_name:{
identifier : 'display_job_name',
rules: [
{
type: 'regExp[/^[a-zA-Z0-9-_]{1,64}[a-zA-Z0-9_]$/]',
}
]
},
attachment:{
identifier : 'attachment',
rules: [
{
type: 'empty',
}
]

},
work_server_number: {
identifier : 'work_server_number',
rules: [
{
type : 'integer[1..25]',
}
]
}
},
onSuccess: function(){
// $('.ui.page.dimmer').dimmer('show')
document.getElementById("mask").style.display = "block"
},
onFailure: function(e){
return false;
}
})
}
document.onreadystatechange = function() {
if (document.readyState === "complete") {
document.getElementById("mask").style.display = "none"
}
}
function send_run_para(){
var run_parameters = []
var msg = {}
$(".dynamic.field .two.fields").each(function(){
var para_name = $(this).find('input[name=shipping_first-name]').val()
var para_value = $(this).find('input[name=shipping_last-name]').val()
run_parameters.push({"label": para_name, "value": para_value})
})
msg["parameter"] = run_parameters
msg = JSON.stringify(msg)
$('#store_run_para').val(msg)
}
function get_name(){
let name1=$("#engine_name .text").text()
let name2=$("#flaver_name .text").text()
$("input#ai_engine_name").val(name1)
$("input#ai_flaver_name").val(name2)

}
$('.ui.create_train_job.green.button').click(function(e) {
get_name()
send_run_para()
validate()
})
</script>

+ 653
- 0
templates/repo/cloudbrain/trainjob/show.tmpl View File

@@ -0,0 +1,653 @@
{{template "base/head" .}}
<style>
.according-panel-heading{
box-sizing: border-box;
padding: 8px 16px;
color: #252b3a;
background-color: #f2f5fc;
line-height: 1.5;
cursor: pointer;
-moz-user-select: none;
-webkit-user-select: none;
-ms-user-select: none;
-khtml-user-select: none;
user-select: none;
}
.accordion-panel-title {
margin-top: 0;
margin-bottom: 0;
color: #252b3a;
}
.accordion-panel-title-content{
vertical-align: middle;
display: inline-block;
width: calc(100% - 32px);
cursor: default;
}
.acc-margin-bottom {
margin-bottom: 5px;
}
.title_text {
font-size: 12px;
}
.ac-display-inblock {
display: inline-block;
}
.cti-mgRight-sm {
margin-right: 8px;
}
.ac-text-normal {
font-size: 14px;
color: #575d6c;
}
.uc-accordionTitle-black {
color: #333;
}
.accordion-border{
border:1px solid #cce2ff;
}
.padding0{
padding: 0 !important;
}
.content-pad{
padding: 15px 35px;
}
.content-margin{
margin:10px 5px ;
}
.tab_2_content {
min-height: 360px;
margin-left: 10px;
}
.ac-grid {
display: block;
*zoom: 1;
}
.ac-grid-col {
float: left;
width: 100%;
}
.ac-grid-col2 .ac-grid-col {
width: 50%;
}
.ti-form {
text-align: left;
max-width: 100%;
vertical-align: middle;
}
.ti-form>tbody {
font-size: 12px;
}
.ti-form>tbody, .ti-form>tbody>tr {
vertical-align: inherit;
}
.ti-text-form-label {
padding-bottom: 20px;
padding-right: 20px;
color: #8a8e99;
font-size: 12px;
white-space: nowrap !important;
width: 80px;
line-height: 30px;
}
.ti-text-form-content{
line-height: 30px;
padding-bottom: 20px;
}
.ti-form>tbody>tr>td {
vertical-align: top;
white-space: normal;
}
td, th {
padding: 0;
}
.ac-grid-col .text-span {
width: 450px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.redo-color{
color: #3291F8;
}
.ti-action-menu-item:not(:last-child){
margin-right: 10px;
padding-right: 11px;
text-decoration: none!important;
color: #526ecc;
cursor: pointer;
display: inline-block;
-moz-user-select: none;
-webkit-user-select: none;
-ms-user-select: none;
-khtml-user-select: none;
user-select: none;
position: relative;
}
.ti-action-menu-item:not(:last-child):after {
content: "";
display: inline-block;
position: absolute;
height: 12px;
right: 0;
top: 50%;
-webkit-transform: translateY(-6px);
-ms-transform: translateY(-6px);
-o-transform: translateY(-6px);
transform: translateY(-6px);
border-right: 1px solid #dfe1e6;
}
.text-width80{
width: 100px;
line-height: 30px;
}
.border-according{
border: 1px solid #dfe1e6;
}
.disabled {
cursor: default;
pointer-events: none;
color: rgba(0,0,0,.6) !important;
opacity: .45 !important;
}
.pad20{
border:0px !important;
}
.model_file_bread{
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem ;
}
</style>
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}" data-repopath="{{$.RepoRelPath}}" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black" id="{{.VersionName}}-duration-span">{{$.duration}}</span>
</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<!--<a class="item" data-tab="second{{$k}}" onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>-->
<a class="item" data-tab="third{{$k}}" onclick="loadModelFile({{.VersionName}},'','','init')">{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-duration">
{{$.duration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{$.resource_type}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{$.resource_spec}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
镜像
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.Image}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.train_dataset"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DatasetName}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80" >
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{.Parameters}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Description}}">
{{.Description}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}" style="height: 300px !important; overflow: auto;">
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

<div class="ui tab" data-tab="third{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">{{.VersionName}}</div>
<div class="divider"> / </div>

</div>
<div id="dir_list{{.VersionName}}">
</div>
</div>
</div>
</div>
</div>
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> 删除任务
</div>
<div class="content">
<p>你确认删除该任务么?此任务一旦删除不可恢复。</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> 取消操作
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> 确定操作
</div>
</div>
</div>
</div>
</div>
{{template "base/footer" .}}

<script>
$('.menu .item').tab()

$(document).ready(function(){
$('.ui.accordion').accordion({selector:{trigger:'.icon'}});
});
$(document).ready(function(){
$('.secondary.menu .item').tab();
});
let userName
let repoPath
let jobID
let downlaodFlag = {{$.canDownload}}
let taskID = {{$.task.ID}}
let realJobName = {{$.task.JobName}}
$(document).ready(function(){
let url = window.location.href;
let urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
jobID = urlArr.slice(-1)[0]
})
function stopBubbling(e) {
e = window.event || e;
if (e.stopPropagation) {
e.stopPropagation(); //阻止事件 冒泡传播
} else {
e.cancelBubble = true; //ie兼容
}
}
let timeid = window.setInterval(loadJobStatus, 30000);
$(document).ready(loadJobStatus);

function loadLog(version_name){
document.getElementById("mask").style.display = "block"
$.get(`/api/v1/repos/${userName}/${repoPath}/cloudbrain/${taskID}/log?version_name=${version_name}&lines=50&order=asc`, (data) => {
$('input[name=end_line]').val(data.EndLine)
$('input[name=start_line]').val(data.StartLine)
$(`#log_file${version_name}`).text(data.Content)
document.getElementById("mask").style.display = "none"
}).fail(function(err) {
console.log(err);
document.getElementById("mask").style.display = "none"
});
}
function loadModelFile(version_name,parents,filename,init){
parents = parents || ''
filename = filename || ''
init = init || ''
console.log("start")
$.get(`/api/v1/repos/${userName}/${repoPath}/cloudbrain/train-job/${jobID}/model_list?version_name=${version_name}&parentDir=${parents}`, (data) => {
$(`#dir_list${version_name}`).empty()
renderDir(data,version_name)
if(init==="init"){
$(`input[name=model${version_name}]`).val("")
$(`input[name=modelback${version_name}]`).val(version_name)
$(`#file_breadcrumb${version_name}`).empty()
let htmlBread = ""
htmlBread += `<div class='active section'>${version_name}</div>`
htmlBread += "<div class='divider'> / </div>"
$(`#file_breadcrumb${version_name}`).append(htmlBread)
}else{
renderBrend(version_name,parents,filename,init)
}
}).fail(function(err) {
console.log(err,version_name);
});
}
function renderBrend(version_name,parents,filename,init){
if(init=="folder"){
let htmlBrend = ""
let sectionName=$(`#file_breadcrumb${version_name} .active.section`).text()
let parents1 = $(`input[name=model${version_name}]`).val()
let filename1 = $(`input[name=modelback${version_name}]`).val()
if(parents1===""){
$(`#file_breadcrumb${version_name} .active.section`).replaceWith(`<a class='section' onclick="loadModelFile('${version_name}','${parents1}','','init')">${sectionName}</a>`)
}else{
$(`#file_breadcrumb${version_name} .active.section`).replaceWith(`<a class='section' onclick="loadModelFile('${version_name}','${parents1}','${filename1}')">${sectionName}</a>`)
}
htmlBrend += `<div class='active section'>${filename}</div>`
htmlBrend += "<div class='divider'> / </div>"
$(`#file_breadcrumb${version_name}`).append(htmlBrend)
$(`input[name=model${version_name}]`).val(parents)
$(`input[name=modelback${version_name}]`).val(filename)
}else{
$(`input[name=model${version_name}]`).val(parents)
$(`input[name=modelback${version_name}]`).val(filename)
$(`#file_breadcrumb${version_name} a.section:contains(${filename})`).nextAll().remove()
$(`#file_breadcrumb${version_name} a.section:contains(${filename})`).replaceWith(`<div class='active section'>${filename}</div>`)
$(`#file_breadcrumb${version_name} div.section:contains(${filename})`).append("<div class='divider'> / </div>")
}
}
function renderDir(data,version_name){
let html=""
html += "<div class='ui grid' style='margin:0;'>"
html += "<div class='row' style='padding: 0;'>"
html += "<div class='ui sixteen wide column' style='padding:1rem;'>"
html += "<div class='dir list'>"
html += "<table id='repo-files-table' class='ui single line table pad20'>"
html += '<tbody>'
// html += "</tbody>"
for(let i=0;i<data.Dirs.length;i++){
let dirs_size = renderSize(data.Dirs[i].Size)
html += "<tr>"
html += "<td class='name six wid'>"
html += "<span class='truncate'>"
html += "<span class='octicon octicon-file-directory'>"
html += "</span>"
if(data.Dirs[i].IsDir){
html += `<a onclick="loadModelFile('${version_name}','${data.Dirs[i].ParenDir}','${data.Dirs[i].FileName}','folder')">`
html += "<span class='fitted'><i class='folder icon' width='16' height='16' aria-hidden='true'></i>" + data.Dirs[i].FileName + "</span>"
}else{
if(downlaodFlag){
html += `<a href="${location.href}/download_model?version_name=${version_name}&fileName=${data.Dirs[i].FileName}&parentDir=${data.Dirs[i].ParenDir}&jobName=${realJobName}">`
}
else{
html += `<a class="disabled">`
}
html += "<span class='fitted'><i class='file icon' width='16' height='16' aria-hidden='true'></i>" + data.Dirs[i].FileName + "</span>"
}
html += '</a>'
html += "</span>"
html += "</td>"
html += "<td class='message seven wide'>"
if(data.Dirs[i].IsDir){
html += "<span class='truncate has-emoji'></span>"
}else{
html += "<span class='truncate has-emoji'>"+ `${dirs_size}` + "</span>"
}
html += "</td>"

html += "<td class='text right age three wide'>"
html += "<span class='truncate has-emoji'>" + data.Dirs[i].ModTime + "</span>"
html += "</td>"
html += "</tr>"
}
html += "</tbody>"
html += "</table>"
html += "</div>"
html += "</div>"
html += "</div>"
html += "</div>"
$(`#dir_list${version_name}`).append(html)
}
function renderSize(value){
if(null==value||value==''){
return "0 Bytes";
}
var unitArr = new Array("Bytes","KB","MB","GB","TB","PB","EB","ZB","YB");
var index=0;
var srcsize = parseFloat(value);
index=Math.floor(Math.log(srcsize)/Math.log(1024));
var size =srcsize/Math.pow(1024,index);
size=size.toFixed(0);//保留的小数位数
return size+unitArr[index];
}
function loadJobStatus() {
$(".ui.accordion.border-according").each((index, job) => {
const jobID = job.dataset.jobid;
const repoPath = job.dataset.repopath;
const versionname = job.dataset.version
// ['IMAGE_FAILED','SUBMIT_FAILED','DELETE_FAILED','KILLED','COMPLETED','FAILED','CANCELED','LOST','START_FAILED']
// if (job.textContent.trim() == 'IMAGE_FAILED' || job.textContent.trim() == 'SUBMIT_FAILED' || job.textContent.trim() == 'DELETE_FAILED'
// || job.textContent.trim() == 'KILLED' || job.textContent.trim() == 'COMPLETED' || job.textContent.trim() == 'FAILED'
// || job.textContent.trim() == 'CANCELED' || job.textContent.trim() == 'LOST') {
// return
// }
let status = $(`#${versionname}-status-span`).text()
if(['IMAGE_FAILED','SUBMIT_FAILED','DELETE_FAILED','KILLED','COMPLETED','FAILED','CANCELED','LOST','START_FAILED','SUCCEEDED'].includes(status)){
return
}
let stopArray=["KILLED","FAILED","START_FAILED","KILLING","COMPLETED","SUCCEEDED"]
$.get(`/api/v1/repos/${repoPath}/cloudbrain/${taskID}?version_name=${versionname}`, (data) => {
//$(`#${versionname}-duration-span`).text(data.JobDuration)
$(`#${versionname}-status-span span`).text(data.JobStatus)
$(`#${versionname}-status-span i`).attr("class",data.JobStatus)
// detail status and duration
//$('#'+versionname+'-duration').text(data.JobDuration)
$('#'+versionname+'-status').text(data.JobStatus)
if(stopArray.includes(data.JobStatus)){
$('#'+versionname+'-stop').addClass('disabled')
}
if(data.JobStatus==="COMPLETED"){
$('#'+versionname+'-create-model').removeClass('disabled').addClass('blue')
}
}).fail(function(err) {
console.log(err);
});
});
};

function refreshStatus(version_name){
$.get(`/api/v1/repos/${userName}/${repoPath}/cloudbrain/${taskID}?version_name=${versionname}`,(data)=>{
// header status and duration
//$(`#${version_name}-duration-span`).text(data.JobDuration)
$(`#${version_name}-status-span span`).text(data.JobStatus)
$(`#${version_name}-status-span i`).attr("class",data.JobStatus)
// detail status and duration
//$('#'+version_name+'-duration').text(data.JobDuration)
$('#'+version_name+'-status').text(data.JobStatus)
loadLog(version_name)


}).fail(function(err) {
console.log(err);
});
stopBubbling(arguments.callee.caller.arguments[0])
}
</script>

+ 1
- 1
templates/repo/editor/upload.tmpl View File

@@ -27,7 +27,7 @@
</div>
<div class="field">
<div class="files"></div>
<div class="ui dropzone" id="dropzone" data-upload-url="{{.RepoLink}}/upload-file" data-remove-url="{{.RepoLink}}/upload-remove" data-csrf="{{.CsrfToken}}" data-accepts="{{.UploadAllowedTypes}}" data-max-file="{{.UploadMaxFiles}}" data-max-size="{{.UploadMaxSize}}" data-default-message="{{.i18n.Tr "dropzone.default_message"}}asdsadsad" data-invalid-input-type="{{.i18n.Tr "dropzone.invalid_input_type"}}" data-file-too-big="{{.i18n.Tr "dropzone.file_too_big"}}" data-remove-file="{{.i18n.Tr "dropzone.remove_file"}}"></div>
<div class="ui dropzone" id="dropzone" data-upload-url="{{.RepoLink}}/upload-file" data-remove-url="{{.RepoLink}}/upload-remove" data-csrf="{{.CsrfToken}}" data-accepts="{{.UploadAllowedTypes}}" data-max-file="{{.UploadMaxFiles}}" data-max-size="{{.UploadMaxSize}}" data-default-message="{{.i18n.Tr "dropzone.default_message"}}" data-invalid-input-type="{{.i18n.Tr "dropzone.invalid_input_type"}}" data-file-too-big="{{.i18n.Tr "dropzone.file_too_big"}}" data-remove-file="{{.i18n.Tr "dropzone.remove_file"}}"></div>
</div>
{{template "repo/editor/commit_form" .}}
</form>


+ 39
- 4
templates/repo/modelarts/trainjob/index.tmpl View File

@@ -39,8 +39,18 @@
</div>
</div>
<div class="column right aligned">
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;">
{{svg "octicon-server" 16}}
<div class="default text" style="color: rgba(0,0,0,.87);"></div>
<i class="dropdown icon"></i>
<div class="menu">
<div class="item" data-value="all">{{$.i18n.Tr "repo.gpu_type_all"}}</div>
<div class="item" data-value="CPU/GPU">CPU/GPU</div>
<div class="item" data-value="NPU">NPU</div>
</div>
</div>
{{if .Permission.CanWrite $.UnitTypeCloudBrain}}
<a class="ui green button" href="{{.RepoLink}}/modelarts/train-job/create">{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
<a class="ui green button" href="{{.RepoLink}}/cloudbrain/train-job/create">{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{else}}
<a class="ui disabled button" >{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{end}}
@@ -102,7 +112,7 @@

<!-- 任务名 -->
<div class="three wide column padding0">
<a class="title" href="{{$.Link}}/{{.JobID}}" title="{{.DisplayJobName}}" style="font-size: 14px;">
<a class="title" href='{{if eq .ComputeResource "NPU" }}{{$.Link}}/{{.JobID}}{{else}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{end}}' title="{{.DisplayJobName}}" style="font-size: 14px;">

<span class="fitted" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span>
</a>
@@ -143,7 +153,7 @@
<div class="ui compact buttons">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{.JobID}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED"}}disabled {{else}} blue {{end}}button" data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{.JobID}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" data-repopath='{{$.RepoRelPath}}{{if eq .ComputeResource "NPU"}}/modelarts/train-job{{else}}/cloudbrain/train-job{{end}}' data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
{{$.i18n.Tr "repo.stop"}}
</a>
{{else}}
@@ -154,7 +164,8 @@

</div>
<!-- 删除任务 -->
<form class="ui compact buttons" id="delForm-{{.JobID}}" action="{{$.Link}}/{{.JobID}}/del" method="post">
<form class="ui compact buttons" id="delForm-{{.JobID}}" action='{{if eq .ComputeResource "NPU" }}{{$.Link}}/{{.JobID}}{{else}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{end}}/del' method="post">
<input type="hidden" name="listType" value="{{$.ListType}}">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{.JobID}}" class="ui basic ai_delete blue button" style="border-radius: .28571429rem;">
@@ -206,3 +217,27 @@
</div>
</div>
{{template "base/footer" .}}

<script>
let url = {{$.RepoLink}};
let all = {{$.i18n.Tr "repo.gpu_type_all"}}
$(document).ready(function(){
const params = new URLSearchParams(location.search)
if(!location.search){
$('.default.text').text(all)
}else{
if(params.has('listType') && params.get('listType')=='all'){
$('.default.text').text(all)
}
else{
$('.default.text').text(params.get('listType'))
}
}

$('.ui.selection.dropdown').dropdown({
onChange:function(value){
location.href = `${url}/modelarts/train-job?listType=${value}`
}
})
})
</script>

+ 18
- 0
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -78,6 +78,24 @@
<input type="hidden" id="ai_engine_name" name="engine_names" value="">
<input type="hidden" id="ai_flaver_name" name="flaver_names" value="">
<h4 class="unite title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="active item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
</div>
</div>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="64">


+ 5
- 1
templates/user/dashboard/feeds.tmpl View File

@@ -77,13 +77,15 @@
{{else if eq .GetOpType 26}}
{{$.i18n.Tr "action.task_npudebugjob" .GetRepoLink .Content .RefName | Str2html}}
{{else if eq .GetOpType 27}}
{{$.i18n.Tr "action.task_trainjob" .GetRepoLink .Content .RefName | Str2html}}
{{$.i18n.Tr "action.task_nputrainjob" .GetRepoLink .Content .RefName | Str2html}}
{{else if eq .GetOpType 28}}
{{$.i18n.Tr "action.task_inferencejob" .GetRepoLink .Content .RefName | Str2html}}
{{else if eq .GetOpType 29}}
{{$.i18n.Tr "action.task_benchmark" .GetRepoLink .Content .RefName | Str2html}}
{{else if eq .GetOpType 30}}
{{$.i18n.Tr "action.task_createmodel" .GetRepoLink .RefName .RefName | Str2html}}
{{else if eq .GetOpType 31}}
{{$.i18n.Tr "action.task_gputrainjob" .GetRepoLink .Content .RefName | Str2html}}
{{end}}
</p>
{{if or (eq .GetOpType 5) (eq .GetOpType 18)}}
@@ -129,6 +131,8 @@
<span class="text grey"><i class="ri-vip-crown-line icon big"></i></span>
{{else if eq .GetOpType 30}}
<span class="text grey"><i class="ri-picture-in-picture-exit-line icon big"></i></span>
{{else if eq .GetOpType 31}}
<span class="text grey"><i class="ri-character-recognition-line icon big"></i></span>
{{else}}
<span class="text grey">{{svg (printf "octicon-%s" (ActionIcon .GetOpType)) 32}}</span>
{{end}}


Loading…
Cancel
Save