#4514 合并重构分支代码到里程碑分支

Merged
ychao_1983 merged 241 commits from ai_task_refactor_v3_train into V20230718 10 months ago
  1. +270
    -15
      entity/ai_task.go
  2. +0
    -35
      entity/ai_task_config.go
  3. +0
    -12
      entity/ai_task_list.go
  4. +102
    -20
      entity/cluster.go
  5. +56
    -0
      entity/command.go
  6. +20
    -6
      entity/container.go
  7. +1
    -0
      entity/creation.go
  8. +8
    -0
      entity/file.go
  9. +22
    -0
      entity/file_download.go
  10. +11
    -0
      entity/resource_usage.go
  11. +24
    -5
      manager/client/grampus/grampus.go
  12. +5
    -3
      models/ai_model_manage.go
  13. +152
    -13
      models/cloudbrain.go
  14. +35
    -0
      models/cloudbrain_config.go
  15. +12
    -0
      models/error.go
  16. +1
    -0
      models/models.go
  17. +1
    -0
      modules/convert/cloudbrain.go
  18. +3
    -3
      modules/modelarts/modelarts.go
  19. +2
    -2
      modules/modelarts/resty.go
  20. +4
    -0
      modules/setting/setting.go
  21. +15
    -0
      modules/storage/minio_ext.go
  22. +29
    -13
      modules/storage/obs.go
  23. +22
    -4
      modules/storage/storage.go
  24. +1
    -0
      modules/structs/cloudbrain.go
  25. +2
    -0
      modules/templates/helper.go
  26. +1
    -1
      modules/urfs_client/urchin/schedule.go
  27. +9
    -5
      options/locale/locale_en-US.ini
  28. +7
    -2
      options/locale/locale_zh-CN.ini
  29. +1
    -1
      package-lock.json
  30. +29
    -17
      public/home/home.js
  31. +293
    -23
      routers/ai_task/ai_task.go
  32. +10
    -2
      routers/api/v1/api.go
  33. +10
    -0
      routers/api/v1/repo/cloudbrain.go
  34. +79
    -8
      routers/api/v1/repo/modelarts.go
  35. +62
    -0
      routers/common/download.go
  36. +21
    -17
      routers/repo/ai_model_manage.go
  37. +22
    -6
      routers/repo/cloudbrain.go
  38. +15
    -9
      routers/repo/dataset.go
  39. +75
    -67
      routers/repo/grampus.go
  40. +150
    -122
      routers/repo/modelarts.go
  41. +10
    -0
      routers/response/error.go
  42. +6
    -4
      routers/response/response_list.go
  43. +438
    -92
      services/ai_task_service/cluster/c2net.go
  44. +319
    -23
      services/ai_task_service/cluster/cloudbrain_one.go
  45. +592
    -29
      services/ai_task_service/cluster/cloudbrain_two.go
  46. +19
    -13
      services/ai_task_service/cluster/cluster_base.go
  47. +140
    -0
      services/ai_task_service/cluster/common.go
  48. +19
    -13
      services/ai_task_service/container_builder/code_builder.go
  49. +7
    -7
      services/ai_task_service/container_builder/common.go
  50. +0
    -2
      services/ai_task_service/container_builder/container_builder.go
  51. +27
    -26
      services/ai_task_service/container_builder/dataset_builder.go
  52. +1
    -0
      services/ai_task_service/container_builder/file_notebook_code_builder.go
  53. +60
    -0
      services/ai_task_service/container_builder/log_path_builder.go
  54. +29
    -10
      services/ai_task_service/container_builder/output_path_builder.go
  55. +10
    -5
      services/ai_task_service/container_builder/pre_model_builder.go
  56. +30
    -1
      services/ai_task_service/context/context.go
  57. +17
    -14
      services/ai_task_service/schedule/model_schedule.go
  58. +48
    -0
      services/ai_task_service/storage_helper/client.go
  59. +203
    -0
      services/ai_task_service/storage_helper/minio.go
  60. +225
    -0
      services/ai_task_service/storage_helper/obs.go
  61. +1
    -1
      services/ai_task_service/storage_helper/repo.go
  62. +56
    -74
      services/ai_task_service/task/cloudbrain_one_notebook_task.go
  63. +173
    -0
      services/ai_task_service/task/cloudbrain_one_train_task.go
  64. +42
    -65
      services/ai_task_service/task/cloudbrain_two_notebook_task.go
  65. +149
    -0
      services/ai_task_service/task/cloudbrain_two_train_task.go
  66. +88
    -103
      services/ai_task_service/task/grampus_notebook_task.go
  67. +36
    -65
      services/ai_task_service/task/grampus_online_infer_task.go
  68. +117
    -57
      services/ai_task_service/task/grampus_train_task.go
  69. +17
    -10
      services/ai_task_service/task/opt.go
  70. +158
    -130
      services/ai_task_service/task/opt_handler.go
  71. +265
    -60
      services/ai_task_service/task/task_base.go
  72. +50
    -6
      services/ai_task_service/task/task_config.go
  73. +16
    -8
      services/ai_task_service/task/task_creation_info.go
  74. +195
    -0
      services/ai_task_service/task/task_extend.go
  75. +226
    -238
      services/ai_task_service/task/task_service.go
  76. +0
    -29
      services/ai_task_service/upload/client.go
  77. +0
    -40
      services/ai_task_service/upload/minio.go
  78. +0
    -46
      services/ai_task_service/upload/obs.go
  79. +15
    -0
      services/cloudbrain/cloudbrainTask/ai_model.go
  80. +20
    -13
      templates/admin/cloudbrain/list.tmpl
  81. +1
    -0
      templates/admin/cloudbrain/search.tmpl
  82. +1
    -0
      templates/admin/cloudbrain/search_dashboard.tmpl
  83. +1
    -273
      templates/repo/cloudbrain/trainjob/new.tmpl
  84. +273
    -0
      templates/repo/cloudbrain/trainjob/new_ori.tmpl
  85. +1
    -746
      templates/repo/cloudbrain/trainjob/show.tmpl
  86. +746
    -0
      templates/repo/cloudbrain/trainjob/show_ori.tmpl
  87. +1
    -257
      templates/repo/grampus/trainjob/gcu/new.tmpl
  88. +257
    -0
      templates/repo/grampus/trainjob/gcu/new_ori.tmpl
  89. +1
    -261
      templates/repo/grampus/trainjob/gpu/new.tmpl
  90. +261
    -0
      templates/repo/grampus/trainjob/gpu/new_ori.tmpl
  91. +1
    -247
      templates/repo/grampus/trainjob/npu/new.tmpl
  92. +247
    -0
      templates/repo/grampus/trainjob/npu/new_ori.tmpl
  93. +1
    -861
      templates/repo/grampus/trainjob/show.tmpl
  94. +861
    -0
      templates/repo/grampus/trainjob/show_ori.tmpl
  95. +3
    -288
      templates/repo/modelarts/trainjob/index.tmpl
  96. +292
    -0
      templates/repo/modelarts/trainjob/index_ori.tmpl
  97. +1
    -269
      templates/repo/modelarts/trainjob/new.tmpl
  98. +269
    -0
      templates/repo/modelarts/trainjob/new_ori.tmpl
  99. +1
    -1
      templates/repo/modelarts/trainjob/para_manage.tmpl
  100. +1
    -870
      templates/repo/modelarts/trainjob/show.tmpl

+ 270
- 15
entity/ai_task.go View File

@@ -1,7 +1,14 @@
package entity

import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"encoding/json"
"fmt"
"io"
"reflect"
"strings"
"sync"

"code.gitea.io/gitea/models"

@@ -32,8 +39,11 @@ type CreateReq struct {
Description string `json:"description"`
LabelName string `json:"label_names"`
DatasetUUIDStr string `json:"dataset_uuid_str"`
Params string `json:"run_para_list"`
Params string `json:"params"`
BootFile string `json:"boot_file"`
PoolId string `json:"pool_id"`
IsContinueRequest bool `json:"is_continue"`
SourceCloudbrainId int64 `json:"source_cloudbrain_id"`
ParamArray models.Parameters
ComputeSource *models.ComputeSource
ReqCommitID string
@@ -45,16 +55,47 @@ type CreateReq struct {
}

type CreationResponse struct {
Error error
JobID string
Status string //todo 考虑统一状态
CreateTime timeutil.TimeStamp
Error error
JobID string
Status string //todo 考虑统一状态
CreateTime timeutil.TimeStamp
VersionID int64
VersionName string
}
type JobIdAndVersionId struct {
JobID string
VersionID int64
}

type QueryAITaskRes struct {
Task *AITaskDetailInfo `json:"task"`
EarlyVersionList []*AITaskDetailInfo `json:"early_version_list"`
CanCreateVersion bool `json:"can_create_version"`
CanDownload bool `json:"can_download"`
}

func (r *QueryAITaskRes) TryToRemoveDatasetAndModelInfo(currentUser *models.User) {
if r.Task != nil {
r.Task.TryToRemoveDatasets(currentUser)
r.Task.TryToRemovePretrainModelList(currentUser)

}
if r.EarlyVersionList != nil {
for _, t := range r.EarlyVersionList {
t.TryToRemoveDatasets(currentUser)
t.TryToRemovePretrainModelList(currentUser)
}
}
}
func (r *QueryAITaskRes) Tr(language string) {
if r.Task != nil {
r.Task.Tr(language)
}
if r.EarlyVersionList != nil {
for _, t := range r.EarlyVersionList {
t.Tr(language)
}
}
}

type AITaskDetailInfo struct {
@@ -79,11 +120,13 @@ type AITaskDetailInfo struct {
CodePath string `json:"code_path"`
DatasetPath string `json:"dataset_path"`
PretrainModelPath string `json:"pretrain_model_path"`
PretrainModelUrl string `json:"pretrain_model_url"`
OutputPath string `json:"output_path"`
CodeUrl string `json:"code_url"`
PretrainModelName string `json:"pretrain_model_name"`
PretrainModelVersion string `json:"pretrain_model_version"`
PretrainCkptName string `json:"pretrain_model_ckpt_name"`
PretrainModelId string `json:"pretrain_model_id"`
StartTime timeutil.TimeStamp `json:"start_time"`
EndTime timeutil.TimeStamp `json:"end_time"`
Description string `json:"description"`
@@ -95,17 +138,22 @@ type AITaskDetailInfo struct {
CreatorName string `json:"creator_name"`
EngineName string `json:"engine_name"`
FailedReason string `json:"failed_reason"`
UserId int64 `json:"-"`
}

func (a *AITaskDetailInfo) Tr(language string) {
a.AICenter = getAiCenterShow(a.AICenter, language)
}

func (a *AITaskDetailInfo) RemoveDatasets() {
a.DatasetList = []*models.DatasetDownload{}
func (a *AITaskDetailInfo) TryToRemoveDatasets(currentUser *models.User) {
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId) {
a.DatasetList = []*models.DatasetDownload{}
}
}
func (a *AITaskDetailInfo) RemovePretrainModelList() {
a.PretrainModelList = []*models.ModelDownload{}
func (a *AITaskDetailInfo) TryToRemovePretrainModelList(currentUser *models.User) {
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId) {
a.PretrainModelList = []*models.ModelDownload{}
}
}

func getAiCenterShow(aiCenter string, language string) string {
@@ -158,6 +206,7 @@ type AITaskBriefInfo struct {
ComputeSource string `json:"compute_source"`
AICenter string `json:"ai_center"`
IsFileNotebook bool `json:"is_file_notebook"`
IsFineTuneTask bool `json:"is_fine_tune_task"`
}

func (a *AITaskBriefInfo) Tr(language string) {
@@ -170,6 +219,7 @@ type AITaskListRes struct {
PageSize int `json:"page_size"`
Page int `json:"page"`
CanCreateTask bool `json:"can_create_task"`
IsRepoEmpty bool `json:"is_repo_empty"`
}
type AITaskInfo4List struct {
Task *AITaskBriefInfo `json:"task"`
@@ -179,11 +229,6 @@ type AITaskInfo4List struct {
}

func ConvertCloudbrainToAITaskBriefInfo(task *models.Cloudbrain) *AITaskBriefInfo {
computeSource := ""
c := models.GetComputeSourceInstance(task.ComputeResource)
if c != nil {
computeSource = c.Name
}
return &AITaskBriefInfo{
ID: task.ID,
JobType: task.JobType,
@@ -192,14 +237,224 @@ func ConvertCloudbrainToAITaskBriefInfo(task *models.Cloudbrain) *AITaskBriefInf
CreatedUnix: task.CreatedUnix,
FormattedDuration: task.TrainJobDuration,
Cluster: GetClusterTypeFromCloudbrainType(task.Type).GetParentCluster(),
ComputeSource: computeSource,
ComputeSource: task.GetStandardComputeSource(),
StartTime: task.StartTime,
EndTime: task.EndTime,
AICenter: task.AiCenter,
IsFileNotebook: task.IsFileNoteBookTask(),
IsFineTuneTask: task.FineTune,
}
}

type NotebookDataset struct {
DatasetUrl string `json:"dataset_url"`
}

type QueryLogOpts struct {
CloudbrainId int64
BaseLine int64
Lines int64
Order Direction
UserId int64
NodeId int
LogFileName string
}

type GetLogDownloadInfoReq struct {
CloudbrainId int64
NodeId int
LogFileName string
}
type GetAllOutputReq struct {
CloudbrainId int64
Suffix []string
}

type GetOutputDownloadInfoReq struct {
CloudbrainId int64
FileName string
ParentDir string
}

type Direction string

const (
UP Direction = "up"
DOWN Direction = "down"
)

func (o Direction) Reverse() Direction {
switch o {
case DOWN:
return UP
case UP:
return DOWN
}
return ""
}

type FileReader struct {
Reader io.ReadCloser
Name string
}

type GetTaskListReq struct {
models.ListOptions
ComputeSource *models.ComputeSource
JobTypes []string
RepoID int64
Operator *models.User
IsRepoOwner bool
}

type AITaskBaseConfig struct {
ContainerSteps map[ContainerDataType]*ContainerBuildOpts `json:"container_configs"`
ActionType models.ActionType `json:"action_type"`
IsActionUseJobId bool `json:"is_action_use_job_id"`
DatasetsLimitSizeGB int
DatasetsMaxNum int
}

func GetAITaskConfigFromCloudbrainConfig(config *models.CloudbrainConfig) *AITaskBaseConfig {
if config == nil {
return nil
}
s := config.ConfigurationSnapshot
c := &AITaskBaseConfig{}
err := json.Unmarshal([]byte(s), c)
if err != nil {
log.Error("GetAITaskConfigFromCloudbrain err,config=%+v err=&v", config, err)
return nil
}
return c
}

type AITaskDetailConfigInfo struct {
BaseConfig *AITaskBaseConfig
OutputObjectPrefix string
OutputStorageType StorageType
LogObjectPrefix string
LogStorageType StorageType
}

func BuildAITaskDetailConfigInfo(config *models.CloudbrainConfig) *AITaskDetailConfigInfo {
c := &AITaskBaseConfig{}
json.Unmarshal([]byte(config.ConfigurationSnapshot), c)
return &AITaskDetailConfigInfo{
BaseConfig: c,
OutputObjectPrefix: config.OutputObjectPrefix,
OutputStorageType: StorageType(config.OutputStorageType),
LogObjectPrefix: config.LogObjectPrefix,
LogStorageType: StorageType(config.LogStorageType),
}
}

type AITaskConfigKey struct {
ComputeSource string
IsFileNoteBookRequest bool
}

func (opts AITaskConfigKey) GetKey() string {
v := reflect.ValueOf(opts)
t := v.Type()
b := strings.Builder{}
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldName := t.Field(i).Name
fieldValue := field.Interface()
if !field.IsZero() {
b.WriteString(fmt.Sprintf("%s:%v;", fieldName, fieldValue))
}
}
return b.String()
}

func (c *AITaskBaseConfig) GetContainerConfig(containerDataType ContainerDataType) *ContainerBuildOpts {
containerConfigs := c.ContainerSteps
if containerConfigs != nil {
return containerConfigs[containerDataType]
}
return nil

}
func (c *AITaskBaseConfig) GetContainerPath(containerDataType ContainerDataType) string {
config := c.GetContainerConfig(containerDataType)
if config == nil {
return ""
}
return config.ContainerPath

}

type AITaskConfigMap struct {
mu sync.RWMutex
ConfigMap map[string]*AITaskBaseConfig
}

func (h *AITaskConfigMap) Add(opts AITaskConfigKey, config *AITaskBaseConfig) *AITaskConfigMap {
h.mu.Lock()
defer h.mu.Unlock()

if h.ConfigMap == nil {
h.ConfigMap = make(map[string]*AITaskBaseConfig, 0)
}
h.ConfigMap[opts.GetKey()] = config
return h
}
func (h *AITaskConfigMap) Default(config *AITaskBaseConfig) *AITaskConfigMap {
h.mu.Lock()
defer h.mu.Unlock()

if h.ConfigMap == nil {
h.ConfigMap = make(map[string]*AITaskBaseConfig, 0)
}
h.ConfigMap[AITaskConfigKey{}.GetKey()] = config
return h
}

func (h AITaskConfigMap) Get(opts AITaskConfigKey) *AITaskBaseConfig {
h.mu.RLock()
defer h.mu.RUnlock()

if h.ConfigMap == nil {
return nil
}
key := opts.GetKey()
if _, isOk := h.ConfigMap[key]; isOk {
return h.ConfigMap[key]
}
return nil
}

func (h AITaskConfigMap) IsEmpty() bool {
return h.ConfigMap == nil || len(h.ConfigMap) == 0
}

type AITaskOutput struct {
Status models.ModelMigrateStatus `json:"status"`
Path string `json:"path"`
FileList []storage.FileInfo `json:"file_list"`
IsTaskTerminal bool `json:"is_task_terminal"`
CanReschedule bool `json:"can_reschedule"`
CanDownload bool `json:"can_download"`
}
type AllAITaskOutput struct {
FileList []storage.FileInfo `json:"file_list"`
}

type GetResourceUsageOpts struct {
CloudbrainId int64
NodeId int
LogFileName string
}

type AITaskNodeInfo struct {
ID int `json:"id"`
LogFileName string `json:"log_file_name"`
}

type StorageObjectInfo struct {
ObjectKey string
StorageType StorageType
Bucket string
}

+ 0
- 35
entity/ai_task_config.go View File

@@ -1,35 +0,0 @@
package entity

type AITaskConfig struct {
ContainerSteps map[ContainerDataType]*ContainerBuildOpts `json:"container_configs"`
DatasetMaxSize int
}

type ContainerConfig struct {
Enable bool
ContainerPath string
ReadOnly bool
AcceptStorageType []StorageType
}

type GetAITaskConfigOpts struct {
ComputeSource string
IsFileNoteBookRequest bool
}

func (c *AITaskConfig) GetContainerConfig(containerDataType ContainerDataType) *ContainerBuildOpts {
containerConfigs := c.ContainerSteps
if containerConfigs != nil {
return containerConfigs[containerDataType]
}
return nil

}
func (c *AITaskConfig) GetContainerPath(containerDataType ContainerDataType) string {
config := c.GetContainerConfig(containerDataType)
if config == nil {
return ""
}
return config.ContainerPath

}

+ 0
- 12
entity/ai_task_list.go View File

@@ -1,12 +0,0 @@
package entity

import "code.gitea.io/gitea/models"

type GetTaskListReq struct {
models.ListOptions
ComputeSource *models.ComputeSource
JobTypes []string
RepoID int64
Operator *models.User
IsRepoOwner bool
}

+ 102
- 20
entity/cluster.go View File

@@ -1,14 +1,14 @@
package entity

import (
"encoding/json"
"fmt"
"strconv"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
"encoding/json"
"fmt"
"strconv"
)

type CreateNoteBookTaskRequest struct {
@@ -54,8 +54,11 @@ type RestartNoteBookTaskResponse struct {
}

type CreateTrainTaskRequest struct {
Name string
Tasks []TrainTask
Name string
DisplayJobName string
Description string
TaskConfig *AITaskBaseConfig
Tasks []TrainTask
}

type QueryTaskResponse struct {
@@ -71,6 +74,7 @@ type QueryTaskResponse struct {
DataUrl string `json:"data_url"`
ContainerIP string `json:"container_ip"`
ContainerID string `json:"container_id"`
VersionId int64 `json:"version_id"`
}

func ConvertGrampusNotebookResponse(job models.GrampusNotebookInfo) *QueryTaskResponse {
@@ -195,23 +199,33 @@ func ConvertCloudbrainOneNotebookResponse(input map[string]interface{}) (*QueryT
}

type ClusterLog struct {
Content string `json:"content"`
CanLogDownload bool `json:"can_log_download"`
//云脑二返回的startline和baseline在前端会丢失精度。因此改为string类型
StartLine string `json:"start_line"`
EndLine string `json:"end_line"`
Lines int64 `json:"lines"`
}

type TrainTask struct {
Command string `json:"command"`
Name string `json:"name"`
ImageId string `json:"imageId"`
ImageUrl string `json:"imageUrl"`
ResourceSpecId string `json:"resourceSpecId"`
CenterID []string `json:"centerID"`
ReplicaNum int `json:"replicaNum"`
Datasets []ContainerData `json:"datasets"`
Models []ContainerData `json:"models"`
Code ContainerData `json:"code"`
BootFile string `json:"bootFile"`
OutPut ContainerData `json:"output"`
Params models.Parameters
Spec *models.Specification
Command string `json:"command"`
Name string `json:"name"`
ImageId string `json:"imageId"`
ImageUrl string `json:"imageUrl"`
ResourceSpecId string `json:"resourceSpecId"`
CenterID []string `json:"centerID"`
ReplicaNum int `json:"replicaNum"`
Datasets []ContainerData `json:"datasets"`
PreTrainModel []ContainerData `json:"models"`
Code []ContainerData `json:"code"`
BootFile string `json:"bootFile"`
OutPut []ContainerData `json:"output"`
LogPath []ContainerData `json:"logPath"`
PoolId string `json:"poolId"`
Params models.Parameters
Spec *models.Specification
RepoName string
WorkServerNumber int
}

type CreateTrainTaskResponse struct {
@@ -225,6 +239,8 @@ type CreateTrainTaskResponse struct {
Name string `json:"name"`
Status string `json:"status"`
UserID string `json:"userId"`
VersionID int64 `json:"versionID"`
VersionName string `json:"versionName"` //当前版本
}

type ClusterType string
@@ -270,3 +286,69 @@ func GetClusterTypeFromCloudbrainType(t int) ClusterType {
}
return ""
}

type ClusterLogOpts struct {
JobId string
BaseLine int64
Lines int64
Direction Direction
ObjectKeyPrefix string
StorageType StorageType
VersionID int64
NodeId int
LogFileName string
WorkServerNum int
}

func (opts ClusterLogOpts) IsBottomRequest() bool {
return opts.BaseLine == 0 && opts.Direction == UP
}
func (opts ClusterLogOpts) IsHeadRequest() bool {
return opts.BaseLine == 0 && opts.Direction == DOWN
}

type ClusterLogDownloadInfoOpts struct {
JobId string
ObjectKeyPrefix string
StorageType StorageType
NodeId int
LogFileName string
WorkServerNum int
JobName string
DisplayJobName string
}

type ClusterOutputDownloadInfoOpts struct {
JobId string
Path string
JobName string
StorageType StorageType
}
type ClusterNodeInfoOpts struct {
JobId string
WorkServerNum int
VersionId int64
}
type ClusterResourceUsageOpts struct {
JobId string
NodeId int
LogFileName string
VersionID int64
StartTime int64
EndTime int64
ComputeSource string
WorkServerNumber int
}

type ClusterOutputOpts struct {
JobId string
ObjectKeyPrefix string
StorageType StorageType
ParentDir string
}

type ClusterAITaskOutput struct {
Status models.ModelMigrateStatus
Path string
FileList []storage.FileInfo
}

+ 56
- 0
entity/command.go View File

@@ -0,0 +1,56 @@
package entity

import "strings"

type Command struct {
CommandStr string
}

func NewCommand(s ...string) *Command {
var builder strings.Builder
for i := 0; i < len(s); i++ {
builder.WriteString(s[i] + " ")
}
r := strings.TrimSuffix(builder.String(), " ")
return &Command{
CommandStr: r,
}
}

func (c *Command) ToString() string {
return c.CommandStr
}

type CommandBuilder struct {
Commands []*Command
}

func (b *CommandBuilder) ToString() string {
var builder strings.Builder
for i := 0; i < len(b.Commands); i++ {
builder.WriteString(b.Commands[i].ToString() + ";")
}
return builder.String()
}

func (b *CommandBuilder) Next(c *Command) *CommandBuilder {
if b.Commands == nil {
b.Commands = make([]*Command, 0)
}
b.Commands = append(b.Commands, c)
return b
}

func (b *CommandBuilder) Add(another *CommandBuilder) *CommandBuilder {
if b.Commands == nil {
b.Commands = make([]*Command, 0)
}
if another == nil {
return b
}
if another.Commands == nil {
return b
}
b.Commands = append(b.Commands, another.Commands...)
return b
}

+ 20
- 6
entity/container.go View File

@@ -15,8 +15,11 @@ type ContainerData struct {
ContainerPath string `json:"containerPath"`
RealPath string `json:"realPath"`
ReadOnly bool `json:"readOnly"`
Size int64 `json:"size"`
IsDir bool `json:"isDir"`
GetBackEndpoint string `json:"getBackEndpoint"`
S3DownloadUrl string `json:"s3DownloadUrl"`
Size int64 `json:"size"`
StorageType StorageType
}

type ContainerDataType string
@@ -26,15 +29,20 @@ const (
ContainerDataset ContainerDataType = "dataset"
ContainerPreTrainModel ContainerDataType = "pre_train_model"
ContainerOutPutPath ContainerDataType = "output"
ContainerLogPath ContainerDataType = "log"
ContainerFileNoteBookCode ContainerDataType = "file_note_book_code"
)

type ContainerBuildOpts struct {
Disable bool
ContainerPath string
ReadOnly bool
AcceptStorageType []StorageType
NotArchive bool
Disable bool
//容器内路径
ContainerPath string
//在aiforge存储上基于云脑存储目录的相对路径
StorageRelativePath string
ReadOnly bool
AcceptStorageType []StorageType
Uncompressed bool
MKDIR bool
}

func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool {
@@ -45,3 +53,9 @@ func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool {
}
return false
}
func (opts ContainerBuildOpts) GetLocalPath() string {
if opts.StorageRelativePath != "" {
return opts.StorageRelativePath
}
return opts.ContainerPath
}

+ 1
- 0
entity/creation.go View File

@@ -18,6 +18,7 @@ type CreationRequiredInfo struct {
PointAccount *PointAccountInfo `json:"point_account"`
PaySwitch bool `json:"pay_switch"`
Config AITaskCreationConfig `json:"config"`
AllowedWorkerNum []int `json:"allowed_worker_num"`
}

type AITaskCreationConfig struct {


+ 8
- 0
entity/file.go View File

@@ -0,0 +1,8 @@
package entity

type FileType string

const (
FileTypeTXT FileType = "txt"
FileTypeZIP FileType = "zip"
)

+ 22
- 0
entity/file_download.go View File

@@ -0,0 +1,22 @@
package entity

type FileDownloadInfo struct {
Readers []FileReader
ResultType FileType
ResultFileName string
DownloadUrl string
}

func (f *FileDownloadInfo) IsEmpty() bool {
return (f.Readers == nil || len(f.Readers) == 0) && f.DownloadUrl == ""
}

func (f *FileDownloadInfo) Close() {
if f.Readers != nil && len(f.Readers) > 0 {
for _, r := range f.Readers {
if r.Reader != nil {
r.Reader.Close()
}
}
}
}

+ 11
- 0
entity/resource_usage.go View File

@@ -0,0 +1,11 @@
package entity

type ResourceUsage struct {
Interval int `json:"interval"`
MetricsInfo []MetricsInfo `json:"metrics_info"`
}

type MetricsInfo struct {
Name string `json:"name"`
Value []float32 `json:"value"`
}

+ 24
- 5
manager/client/grampus/grampus.go View File

@@ -4,7 +4,9 @@ import (
"crypto/tls"
"encoding/json"
"fmt"
"math"
"net/http"
"strconv"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
@@ -319,15 +321,20 @@ sendjob:
return &result, nil
}

func GetTrainJobLog(jobID string) (string, error) {
func GetTrainJobLog(jobID string, nodeId ...int) (string, error) {
checkSetting()
client := getRestyClient()
var logContent string

url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log"
if len(nodeId) > 0 {
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log/node/" + strconv.Itoa(nodeId[0])
}

res, err := client.R().
SetAuthToken(TOKEN).
SetResult(&logContent).
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log")
Get(url)

if err != nil {
return logContent, fmt.Errorf("resty GetTrainJobLog: %v", err)
@@ -348,13 +355,24 @@ func GetTrainJobLog(jobID string) (string, error) {
return logContent, nil
}

func GetGrampusMetrics(jobID string) (models.NewModelArtsMetricStatisticResult, error) {
func GetGrampusMetrics(jobID string, startTime int64, endTime int64, nodeId ...int) (models.NewModelArtsMetricStatisticResult, error) {
checkSetting()
client := getRestyClient()
var result models.NewModelArtsMetricStatisticResult
url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics"
if len(nodeId) > 0 {
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics/node/" + strconv.Itoa(nodeId[0])
}
if startTime > 0 {
var step int64 = 60

size := int64(math.Ceil(float64(endTime-startTime)/float64(step))) + 1

url = url + "?startTime=" + strconv.FormatInt(startTime, 10) + "&step=" + strconv.FormatInt(step, 10) + "&size=" + strconv.FormatInt(size, 10)
}
res, err := client.R().
SetAuthToken(TOKEN).
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics")
Get(url)

if err != nil {
return result, fmt.Errorf("resty GetTrainJobLog: %v", err)
@@ -512,13 +530,14 @@ func GetTrainJobEvents(jobID string) (*models.GetGrampusJobEventsResponse, error
retry := 0

sendjob:
_, err := client.R().
res, err := client.R().
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + urlTrainJob + "/" + jobID + "/events")
if err != nil {
return nil, fmt.Errorf("resty GetTrainJobEvents: %v", err)
}
log.Info("res=%+v", res)

if result.ErrorCode == errorIllegalToken && retry < 1 {
retry++


+ 5
- 3
models/ai_model_manage.go View File

@@ -291,10 +291,12 @@ func QueryModelById(id string) (*AiModelManage, error) {
defer sess.Close()
re := new(AiModelManage)
isExist, err := sess.Table(new(AiModelManage)).ID(id).Get(re)
if err == nil && isExist {
return re, nil
if err != nil {
return nil, err
} else if !isExist {
return nil, ErrPretrainModelNotExist{}
}
return nil, err
return re, nil
}

func DeleteModelConvertById(id string) error {


+ 152
- 13
models/cloudbrain.go View File

@@ -166,6 +166,8 @@ const (
ProcessorTypeDCU = "ac.sugon.com/dcu"
)

const CloudbrainTwoDefaultVersion = "/V0001"

type ComputeSource struct {
Name string
CloudbrainFormat string
@@ -188,6 +190,22 @@ func GetComputeSourceInstance(name string) *ComputeSource {
return nil
}

func GetComputeSourceCloudbrainFormat(name string) string {
c := GetComputeSourceInstance(name)
if c == nil {
return ""
}
return c.CloudbrainFormat
}

func GetComputeSourceStandardFormat(name string) string {
c := GetComputeSourceInstance(name)
if c == nil {
return ""
}
return c.Name
}

var ComputeSourceMap = map[string]*ComputeSource{
GPU: {Name: GPU, CloudbrainFormat: GPUResource, FullName: ProcessorTypeGPU},
NPU: {Name: NPU, FullName: ProcessorTypeNPU},
@@ -244,8 +262,9 @@ type Cloudbrain struct {
FailedReason string `xorm:"text"`

TrainUrl string //输出模型的obs路径
RemoteCodeUrl string //分中心下载代码地址
BranchName string `xorm:"varchar(2550)"` //分支名称
Parameters string //传给modelarts的param参数
Parameters string `xorm:"varchar(2000)"` //传给modelarts的param参数
BootFile string `xorm:"varchar(2550)"` //启动文件
DataUrl string `xorm:"varchar(3500)"` //数据集的obs路径
LogUrl string //日志输出的obs路径
@@ -277,7 +296,8 @@ type Cloudbrain struct {
FineTune bool `xorm:"DEFAULT false"`
FineTuneModelType int
FineTuneCategory int
Spec *Specification `xorm:"-"`
Spec *Specification `xorm:"-"`
Config *CloudbrainConfig `xorm:"-"`
}

type CloudbrainShow struct {
@@ -325,6 +345,21 @@ func (task *Cloudbrain) ToShow() *CloudbrainShow {
return c
}

func (task *Cloudbrain) GetStandardComputeSource() string {
return GetComputeSourceStandardFormat(task.ComputeResource)
}
func (task *Cloudbrain) GetCloudbrainConfig() *CloudbrainConfig {
if task.Config != nil {
return task.Config
}
c, err := GetCloudbrainConfig(task.ID)
if err != nil {
return nil
}
task.Config = c
return c
}

func (task *Cloudbrain) ComputeAndSetDuration() {
var d int64
if task.StartTime == 0 {
@@ -462,11 +497,7 @@ func (task *Cloudbrain) IsNewAITask() bool {
continue
}
for _, s := range v {
c := GetComputeSourceInstance(s)
if c == nil {
continue
}
if c.GetCloudbrainFormat() == task.ComputeResource {
if s == task.GetStandardComputeSource() {
return true
}
}
@@ -1526,6 +1557,7 @@ type ModelUrls struct {
}

type DatasetDownload struct {
UUID string `json:"uuid"`
DatasetName string `json:"dataset_name"`
DatasetDownloadLink string `json:"dataset_download_link"`
RepositoryLink string `json:"repository_link"`
@@ -1533,6 +1565,7 @@ type DatasetDownload struct {
}

type ModelDownload struct {
ModelName string `json:"model_name"`
Name string `json:"name"`
DownloadLink string `json:"download_link"`
RepositoryLink string `json:"repository_link"`
@@ -1977,6 +2010,7 @@ type GrampusDataset struct {
ContainerPath string `json:"containerPath"`
ReadOnly bool `json:"readOnly"`
GetBackEndpoint string `json:"getBackEndpoint"`
Size int64 `json:"size"`
}

type CreateGrampusJobRequest struct {
@@ -2328,6 +2362,13 @@ func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
return err
}
}
if cloudbrain.Config != nil {
cloudbrain.Config.CloudbrainID = cloudbrain.ID
if _, err = session.Insert(cloudbrain.Config); err != nil {
session.Rollback()
return err
}
}
session.Commit()
go updateReferenceCount(cloudbrain)
OperateRepoAITaskNum(cloudbrain.RepoID, 1)
@@ -3142,6 +3183,22 @@ type DatasetInfo struct {
FullName string
Type int
Size int64
DownloadUrl string
}

type DatasetInfo4AITask struct {
Compressed DatasetBaseInfo
Uncompressed DatasetBaseInfo
Type int
Size int64
}

type DatasetBaseInfo struct {
RealPath string
ObjectKey string
HttpDownloadUrl string
S3DownloadUrl string
Name string
}

func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) {
@@ -3178,6 +3235,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn
}
}
var dataLocalPath string
var downloadUrl string
if len(grampusType) > 0 {
if grampusType[0] == GPU {
dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID
@@ -3190,13 +3248,18 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/"
}
}

} else {
dataLocalPath = setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
if attach.Type == TypeCloudBrainOne {
dataLocalPath = setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
} else {
downloadUrl = "s3://" + setting.Bucket + "/" + setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/"
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/"
}

}

datasetInfos[attach.UUID] = DatasetInfo{
@@ -3205,6 +3268,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn
FullName: attach.Name,
Type: attach.Type,
Size: attach.Size,
DownloadUrl: downloadUrl,
}
if i == 0 {
datasetNames = attach.Name
@@ -3216,6 +3280,81 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn
return datasetInfos, datasetNames, nil
}

func GetDatasetInfo4AITask(uuidStr string) (map[string]DatasetInfo4AITask, error) {
uuids := strings.Split(uuidStr, ";")
attachments, err := GetAttachmentsByUUIDs(uuids)
if err != nil {
log.Error("GetAttachmentsByUUIDs failed: %v", err)
return nil, err
}

attachMap := make(map[string]*Attachment, 0)
attachNameMap := make(map[string]string, 0)
for _, attach := range attachments {
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz")
if _, exits := attachNameMap[fileName]; exits {
return nil, errors.New("the dataset name is same")
}
attachNameMap[fileName] = ""
attachMap[attach.UUID] = attach
}

datasetInfos := make(map[string]DatasetInfo4AITask)
for _, tmpUuid := range uuids {
attach := attachMap[tmpUuid]
if attach == nil {
log.Error("GetAttachmentsByUUIDs failed: %v", err)
return nil, err
}

var compressedRealPath, compressedObjectKey, compressedS3DownloadUrl string
var uncompressedRealPath, uncompressedObjectKey, uncompressedS3DownloadUrl string
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz")
if attach.Type == TypeCloudBrainOne {
uncompressedRealPath = setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
uncompressedObjectKey = setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
compressedRealPath = setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" +
attach.UUID
compressedObjectKey = setting.Attachment.Minio.BasePath +
path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" +
attach.UUID
} else {
compressedObjectKey = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + attach.Name
compressedS3DownloadUrl = "s3://" + setting.Bucket + "/" + compressedObjectKey
uncompressedObjectKey = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/"
uncompressedS3DownloadUrl = "s3://" + setting.Bucket + "/" + uncompressedObjectKey
}

datasetInfos[attach.UUID] = DatasetInfo4AITask{
Compressed: DatasetBaseInfo{
RealPath: compressedRealPath,
ObjectKey: compressedObjectKey,
S3DownloadUrl: compressedS3DownloadUrl,
Name: attach.Name,
},
Uncompressed: DatasetBaseInfo{
RealPath: uncompressedRealPath,
ObjectKey: uncompressedObjectKey,
S3DownloadUrl: uncompressedS3DownloadUrl,
Name: fileName,
},
Type: attach.Type,
Size: attach.Size,
}
}

return datasetInfos, nil
}

var (
SpecsMapInitFlag = false
CloudbrainDebugResourceSpecsMap map[int]*ResourceSpec


+ 35
- 0
models/cloudbrain_config.go View File

@@ -0,0 +1,35 @@
package models

import (
"code.gitea.io/gitea/modules/timeutil"
)

type CloudbrainConfig struct {
CloudbrainID int64 `xorm:"pk"`
OutputObjectPrefix string
OutputStorageType string
OutputBucket string
OutputEndpoint string
LogObjectPrefix string
LogStorageType string
LogBucket string
LogEndpoint string
ConfigurationSnapshot string `xorm:"text"`
ContainerDataSnapshot string `xorm:"text"`
CreatedTime timeutil.TimeStamp `xorm:"created"`
UpdatedTime timeutil.TimeStamp `xorm:"updated"`
}

func GetCloudbrainConfig(cloudbrainId int64) (*CloudbrainConfig, error) {
r := &CloudbrainConfig{}
if has, err := x.Where("cloudbrain_id = ?", cloudbrainId).Get(r); err != nil {
return nil, err
} else if !has {
return nil, ErrRecordNotExist{}
}
return r, nil
}

func InsertCloudbrainConfig(config *CloudbrainConfig) (int64, error) {
return x.Insert(config)
}

+ 12
- 0
models/error.go View File

@@ -2070,3 +2070,15 @@ type ErrModelartsDeployNotExist struct {
func (err ErrModelartsDeployNotExist) Error() string {
return fmt.Sprintf("Deployment %s does not exist", err.ID)
}

type ErrPretrainModelNotExist struct {
}

func IsErrPretrainModelNotExist(err error) bool {
_, ok := err.(ErrPretrainModelNotExist)
return ok
}

func (err ErrPretrainModelNotExist) Error() string {
return fmt.Sprintf("pretrain model is not exists")
}

+ 1
- 0
models/models.go View File

@@ -176,6 +176,7 @@ func init() {
new(IPLocation),
new(ModelartsDeploy),
new(ModelartsDeployQueue),
new(CloudbrainConfig),
)

tablesStatistic = append(tablesStatistic,


+ 1
- 0
modules/convert/cloudbrain.go View File

@@ -101,6 +101,7 @@ func ToSpecification(s *models.Specification) *api.SpecificationShow {
ShareMemGiB: s.ShareMemGiB,
ComputeResource: s.ComputeResource,
UnitPrice: s.UnitPrice,
SourceSpecId: s.SourceSpecId,
}
}



+ 3
- 3
modules/modelarts/modelarts.go View File

@@ -291,7 +291,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str
var jobResult *models.CreateTrainJobResult
var createErr error
if req.EngineID < 0 {
jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{
jobResult, createErr = CreateTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.UserImageConfig{
@@ -315,7 +315,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str
},
})
} else {
jobResult, createErr = createTrainJob(models.CreateTrainJobParams{
jobResult, createErr = CreateTrainJob(models.CreateTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.Config{
@@ -412,7 +412,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str

func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) {

return createTrainJobUserImage(models.CreateUserImageTrainJobParams{
return CreateTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.UserImageConfig{


+ 2
- 2
modules/modelarts/resty.go View File

@@ -491,7 +491,7 @@ sendjob:
return &result, nil
}

func createTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) {
func CreateTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) {
checkSetting()
client := getRestyClient()
var result models.CreateTrainJobResult
@@ -551,7 +551,7 @@ sendjob:
return &result, nil
}

func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
func CreateTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
checkSetting()
client := getRestyClient()
var result models.CreateTrainJobResult


+ 4
- 0
modules/setting/setting.go View File

@@ -726,6 +726,8 @@ var (
//ai_task config
AI_TASK_RANGE map[string][]string
PREPARING_MAX_WAIT_DURATION time.Duration
OUTPUT_SHOW_MAX_KEY int
OUTPUT_DOWNLOAD_MAX_KEY int

//wenxin url
BaiduWenXin = struct {
@@ -1576,6 +1578,8 @@ func NewContext() {
json.Unmarshal([]byte(tmp), &rangeMap)
AI_TASK_RANGE = rangeMap
PREPARING_MAX_WAIT_DURATION = sec.Key("ENABLED").MustDuration(15 * time.Minute)
OUTPUT_SHOW_MAX_KEY = sec.Key("OUTPUT_SHOW_MAX_KEY").MustInt(100)
OUTPUT_DOWNLOAD_MAX_KEY = sec.Key("OUTPUT_DOWNLOAD_MAX_KEY").MustInt(1000)

sec = Cfg.Section("benchmark")
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false)


+ 15
- 0
modules/storage/minio_ext.go View File

@@ -407,3 +407,18 @@ func IsObjectExist4Minio(bucket, objectName string) (bool, error) {

return true, nil
}

func MinioCheckAndGetFileSize(srcBucket string, key string) (bool, int64) {
_, core, err := getClients()
if err != nil {
log.Error("getClients failed:", err.Error())
return false, 0
}

meta, err := core.StatObject(srcBucket, key, miniov6.StatObjectOptions{})
if err != nil {
log.Info("MinioCheckAndGetFileSize error, error=%v", err)
return false, 0
}
return true, meta.Size
}

+ 29
- 13
modules/storage/obs.go View File

@@ -21,12 +21,13 @@ import (
)

type FileInfo struct {
FileName string `json:"FileName"`
ModTime string `json:"ModTime"`
IsDir bool `json:"IsDir"`
Size int64 `json:"Size"`
ParenDir string `json:"ParenDir"`
UUID string `json:"UUID"`
FileName string `json:"FileName"`
ModTime string `json:"ModTime"`
IsDir bool `json:"IsDir"`
Size int64 `json:"Size"`
ParenDir string `json:"ParenDir"`
UUID string `json:"UUID"`
RelativePath string `json:"RelativePath"`
}
type FileInfoList []FileInfo

@@ -278,13 +279,27 @@ func ObsGetFilesSize(srcBucket string, Files []string) int64 {
return fileTotalSize
}

func ObsCheckAndGetFileSize(srcBucket string, key string) (bool, int64) {
out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{
Bucket: srcBucket,
Key: key,
})
if err != nil {
log.Info("ObsCheckAndGetFilesSize error, error=%v", err)
return false, 0
}
return true, out.ContentLength
}

func ObsCopyManyFile(srcBucket string, srcPath string, destBucket string, destPath string, Files []string) (int64, error) {

var fileTotalSize int64
srcPath = strings.TrimSuffix(srcPath, "/") + "/"
destPath = strings.TrimSuffix(destPath, "/") + "/"

for _, file := range Files {
srcKey := srcPath + file
destKey := destPath + file
srcKey := srcPath + strings.TrimPrefix(file, "/")
destKey := destPath + strings.TrimPrefix(file, "/")
log.Info("srcKey=" + srcKey + " destKey=" + destKey)
out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{
Bucket: srcBucket,
@@ -495,11 +510,12 @@ func GetAllObjectByBucketAndPrefix(bucket string, prefix string) ([]FileInfo, er
isDir = false
}
fileInfo := FileInfo{
ModTime: val.LastModified.Format("2006-01-02 15:04:05"),
FileName: val.Key[prefixLen:],
Size: val.Size,
IsDir: isDir,
ParenDir: "",
ModTime: val.LastModified.Format("2006-01-02 15:04:05"),
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"),
Size: val.Size,
IsDir: isDir,
ParenDir: "",
RelativePath: val.Key,
}
fileInfoList = append(fileInfoList, fileInfo)
}


+ 22
- 4
modules/storage/storage.go View File

@@ -7,6 +7,7 @@ package storage
import (
"fmt"
"io"
"strings"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/obs"
@@ -46,9 +47,9 @@ func Copy(dstStorage ObjectStorage, dstPath string, srcStorage ObjectStorage, sr

var (
// Attachments represents attachments storage
Attachments ObjectStorage
ObsCli *obs.ObsClient
ScheduleMinioCore *minio.Core
Attachments ObjectStorage
ObsCli *obs.ObsClient
MinioCore *minio.Core
)

// Init init the stoarge
@@ -70,7 +71,7 @@ func Init() error {
m.UseSSL,
)
log.Info("minio storage inited.")
ScheduleMinioCore, err = minio.NewCore(m.Endpoint, m.AccessKeyID, m.SecretAccessKey, m.UseSSL)
MinioCore, err = minio.NewCore(m.Endpoint, m.AccessKeyID, m.SecretAccessKey, m.UseSSL)
if err != nil {
log.Error("init ScheduleMinioCore err.%v", err)
}
@@ -87,3 +88,20 @@ func Init() error {

return nil
}

func SelectFileByPrefixAndSuffix(list []FileInfo, prefix, suffix string) []FileInfo {
r := make([]FileInfo, 0)
for _, l := range list {
if l.IsDir {
continue
}
if !strings.HasPrefix(l.FileName, prefix) {
continue
}
if !strings.HasSuffix(l.FileName, suffix) {
continue
}
r = append(r, l)
}
return r
}

+ 1
- 0
modules/structs/cloudbrain.go View File

@@ -143,6 +143,7 @@ type SpecificationShow struct {
ShareMemGiB float32 `json:"share_mem_gi_b"`
ComputeResource string `json:"compute_resource"`
UnitPrice int `json:"unit_price"`
SourceSpecId string `json:"source_spec_id"`
}
type PointAccountShow struct {
ID int64 `json:"id"`


+ 2
- 0
modules/templates/helper.go View File

@@ -260,6 +260,7 @@ func NewFuncMap() []template.FuncMap {
return dict, nil
},
"Printf": fmt.Sprintf,
"ToLower": strings.ToLower,
"Escape": Escape,
"Sec2Time": models.SecToTime,
"ParseDeadline": func(deadline string) []string {
@@ -422,6 +423,7 @@ func NewTextFuncMap() []texttmpl.FuncMap {
return dict, nil
},
"Printf": fmt.Sprintf,
"ToLower": strings.ToLower,
"Escape": Escape,
"Sec2Time": models.SecToTime,
"ParseDeadline": func(deadline string) []string {


+ 1
- 1
modules/urfs_client/urchin/schedule.go View File

@@ -48,7 +48,7 @@ func tryScheduleDir(endpoint, bucket, objectKey, dstPeer string) {
}

func MoveBucketInOpenIMinio(objectKeyPrefix, targetObjectPrefix, oldBucket, newBucket string) error {
var core = storage.ScheduleMinioCore
var core = storage.MinioCore
objectInfo := core.Client.ListObjects(oldBucket, objectKeyPrefix, true, nil)
log.Info("MoveBucketInOpenIMinio start.objectKeyPrefix=%s", objectKeyPrefix)
count := 0


+ 9
- 5
options/locale/locale_en-US.ini View File

@@ -3177,9 +3177,9 @@ task_npudebugjob=`created NPU type debugging task <a href="%s/modelarts/notebook
task_c2net_gpudebugjob=`created CPU/GPU type debugging task <a href="%s/grampus/notebook/%s">%s</a>`
task_c2net_npudebugjob=`created NPU type debugging task <a href="%s/grampus/notebook/%s">%s</a>`
task_c2ent_gcudebugjob=`created GCU type debugging task <a href="%s/grampus/train-job/%s">%s</a>`
task_c2ent_gcutrainjob=`created GCU type train task <a href="%s/modelarts/train-job/%s">%s</a>`
task_c2ent_gcutrainjob=`created GCU type train task <a href="%s/grampus/train-job/%s">%s</a>`
task_c2ent_mludebugjob=`created MLU type debugging task <a href="%s/grampus/train-job/%s">%s</a>`
task_c2ent_mlutrainjob=`created MLU type train task <a href="%s/modelarts/train-job/%s">%s</a>`
task_c2ent_mlutrainjob=`created MLU type train task <a href="%s/grampus/train-job/%s">%s</a>`
task_c2net_dcudebugjob=`created DCU type debugging task <a href="%s/grampus/notebook/%s">%s</a>`
task_c2ent_onlineinferjob=`created GPU type online inference task <a href="%s/grampus/onlineinfer/%s">%s</a>`
task_nputrainjob=`created NPU training task <a href="%s/modelarts/train-job/%s">%s</a>`
@@ -3431,16 +3431,20 @@ job_name_already_used = The job name did already exist
insufficient_point_balance = Insufficient point balance
create_failed = Create AI task failed
restart_failed = Restart AI task failed, please try again later.
boot_file_must_python = The boot file must be a python file
stop_failed = Fail to stop the job, please try again later.
can_not_restart = The task was not scheduled successfully before, so it cannot be restart.
dataset_size_over_limit = The size of dataset exceeds limitation (%dGB)
boot_file_must_python = The boot file must be a python file
boot_file_not_exist= The boot file is not exists.
branch_not_exists= The branch does not exist. Please refresh and select again.
boot_file_not_exist = The boot file is not exists.
branch_not_exists = The branch does not exist. Please refresh and select again.
dataset_number_over_limit = The dataset count exceed the limit
result_cleared=The files of the task have been cleared, can not restart or retrain any more, please create a new task instead
model_not_exist=The model in the task does not exist or has been deleted

[common_error]
system_error = System error.Please try again later
insufficient_permission = Insufficient permissions
insufficient_permission = You do not have permission to perform this operation
param_error = The parameter you submitted is incorrect
wechat_not_bind = Please scan the code and bind to wechat first



+ 7
- 2
options/locale/locale_zh-CN.ini View File

@@ -3453,16 +3453,21 @@ job_name_already_used = 任务名已被使用,请换一个名称
insufficient_point_balance = 积分余额不足
create_failed = 创建AI任务失败
restart_failed = 再次调试失败,请稍后再试
boot_file_must_python = 启动文件必须是python文件
stop_failed = 任务停止失败,请稍后再试
can_not_restart = 这个任务之前没有调度成功,不能再次调试。
dataset_size_over_limit = 数据集大小超过限制(%dGB)
boot_file_must_python = 启动文件必须是python文件
boot_file_not_exist =启动文件不存在
branch_not_exists= 代码分支不存在,请刷新后重试
branch_not_exists = 代码分支不存在,请刷新后重试
dataset_number_over_limit = 选择的数据集文件数量超出限制
result_cleared=源任务的文件已被清理,无法再次调试或复用训练结果,请新建任务。
model_not_exist=选择的预训练模型不存在或者已被删除


[common_error]
system_error = 当前服务不可用,请稍后再试
insufficient_permission = 权限不足
insufficient_permission = 您没有权限执行此操作
param_error = 提交的参数有误
wechat_not_bind = 请先扫码绑定微信



+ 1
- 1
package-lock.json View File

@@ -1,5 +1,5 @@
{
"name": "aiforge1",
"name": "aiforge",
"lockfileVersion": 2,
"requires": true,
"packages": {


+ 29
- 17
public/home/home.js View File

@@ -31,7 +31,7 @@ var swiperRepo = new Swiper(".homepro-list", {
delay: 2500,
disableOnInteraction: false,
},
breakpoints: {
breakpoints: {
768: {
slidesPerView: 2,
},
@@ -170,7 +170,7 @@ document.onreadystatechange = function () {
if(document.readyState != "complete"){
return;
}
console.log("Start to open WebSocket." + document.readyState);
console.log("Start to open WebSocket." + document.readyState);
queryRecommendData();

var output = document.getElementById("newmessage");
@@ -179,7 +179,7 @@ document.onreadystatechange = function () {
url = "wss://" + document.location.host + "/action/notification"
}
var socket = new WebSocket(url);
socket.onopen = function () {
messageQueue = [];
console.log("message has connected.");
@@ -242,8 +242,8 @@ document.onreadystatechange = function () {
actionName = actionName.replace("{oldRepoName}",record.Content);
html += recordPrefix + actionName;
html += " <a href=\"" + getRepoLink(record) + "\" rel=\"nofollow\">" + getRepotext(record) + "</a>"
}
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30"
}
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30"
|| record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "42" || record.OpType == "44"){
html += recordPrefix + actionName;
const taskLink = getTaskLink(record);
@@ -251,7 +251,7 @@ document.onreadystatechange = function () {
html += " <a href=\"" + taskLink + "\" rel=\"nofollow\">" + record.RefName + "</a>"
} else {
html += " <span style=\"color: rgba(0,0,0,0.3)\">" + record.RefName + "</span>"
}
}
}
else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41" || record.OpType == "43"|| record.OpType == "44"|| record.OpType == "45"|| record.OpType == "46"){
html += recordPrefix + actionName;
@@ -302,7 +302,11 @@ function getTaskLink(record){
re = '';
}
}else if(record.OpType == 27){
re = re + "/modelarts/train-job/" + record.Content;
if (record.Cloudbrain) {
re = re + "/modelarts/train-job/" + record.Cloudbrain.ID;
} else {
re = '';
}
}else if(record.OpType == 28){
re = re + "/modelarts/inference-job/" + record.Content;
}else if(record.OpType == 29){
@@ -310,9 +314,17 @@ function getTaskLink(record){
}else if(record.OpType == 30){
re = re + "/modelmanage/model_readme_tmpl?name=" + record.RefName;
}else if(record.OpType == 31){
re = re + "/cloudbrain/train-job/" + record.Content;
if (record.Cloudbrain) {
re = re + "/cloudbrain/train-job/" + record.Cloudbrain.ID;
} else {
re = '';
}
}else if(record.OpType == 32 || record.OpType == 33 || record.OpType == 42 || record.OpType == 44){
re = re + "/grampus/train-job/" + record.Content;
if (record.Cloudbrain) {
re = re + "/grampus/train-job/" + record.Cloudbrain.ID;
} else {
re = '';
}
}else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41 || record.OpType == 43|| record.OpType == 46){
if (record.Cloudbrain) {
re = re + "/grampus/notebook/" + record.Cloudbrain.ID;
@@ -591,7 +603,7 @@ function queryRecommendData(){

function displayActivity(json){
var activityDiv = document.getElementById("recommendactivity");
if (!activityDiv) return;
if (!activityDiv) return;
var html = "";
if (json != null && json.length > 0){
for(var i = 0; i < json.length;i++){
@@ -649,7 +661,7 @@ function displayRepo(json){
if (json != null && json.length > 0){
var repoMap = {};
for (var i = 0, iLen = json.length; i < iLen; i++) {
var repo = json[i];
var repo = json[i];
var label = isZh ? repo.Label : repo.Label_en;
if (repoMap[label]) {
repoMap[label].push(repo);
@@ -670,7 +682,7 @@ function displayRepo(json){
${repo["Avatar"] ? `<img style="border-radius:100%;" class="left floated mini ui image" src="${repo["Avatar"]}">` : `<img style="border-radius:100%;" class="left floated mini ui image" avatar="${repo["OwnerName"]}">`}
<a class="header nowrap" style="color:rgb(50, 145, 248);font-size:14px;" href="/${repo["OwnerName"]}/${repo["Name"]}" title="${repo["Alias"]}">${repo["Alias"]}</a>
<div class="description nowrap-2" style="rgba(136,136,136,1);;font-size:12px;" title="${repo["Description"]}">${repo["Description"]}</div>
<a href="/${repo["OwnerName"]}/${repo["Name"]}" style="height:100%;width:100%;position:absolute;left:0;top:0"></a>`;
<a href="/${repo["OwnerName"]}/${repo["Name"]}" style="height:100%;width:100%;position:absolute;left:0;top:0"></a>`;
html += `
</div>
</div>`;
@@ -718,7 +730,7 @@ function displayOrg(json){
swiperOrg.updateSlides();
}

function displayDataset(data) {
function displayDataset(data) {
var homeDatasetEl = document.getElementById("home_dataset");
if (!homeDatasetEl) return;
var html = '';
@@ -757,7 +769,7 @@ function displayUserExp(data) {
for (var i = 0, iLen = data.length; i < iLen; i++) {
var dataI = data[i];
html += `<div class="swiper-slide">
<div class="ui fluid user-card">
<div class="ui fluid user-card">
<div><div class="content img-c">
<a href="/${dataI.name}">
<div class="img" style="width:60px;height:60px;background-image:url('${dataI.avatar}')"></div>
@@ -765,7 +777,7 @@ function displayUserExp(data) {
</div></div>
<div><div class="content label" title="${dataI.fullname || dataI.name}">${dataI.fullname || dataI.name}</div></div>
<div><div class="content descr" title="${dataI.desc}">${dataI.desc}</div></div>
</div>
</div>
</div>`
}
homeUserExpEl.innerHTML = html;
@@ -907,8 +919,8 @@ function initHomeTopBanner() {
if (banner.data) {
hmPageC.append($(banner.data));
hmPageSlidePaginationC.append('<div class="_hm-slide-pagination-item"></div>');
}
}
}
}
startSlide();
}



+ 293
- 23
routers/ai_task/ai_task.go View File

@@ -1,8 +1,6 @@
package ai_task

import (
"net/http"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
@@ -10,8 +8,12 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/routers/common"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/schedule"
"code.gitea.io/gitea/services/ai_task_service/task"
"net/http"
"strings"
)

func CreateAITask(ctx *context.Context, form entity.CreateReq) {
@@ -68,21 +70,49 @@ func RestartAITask(ctx *context.Context) {

func GetAITaskLog(ctx *context.Context) {
id := ctx.QueryInt64("id")
t, err := task.GetAITaskTemplateByCloudbrainId(id)
baseLine := ctx.QueryInt64("base_line")
lines := ctx.QueryInt64("lines")
order := ctx.Query("order")
nodeId := ctx.QueryInt("node_id")
logFileName := ctx.Query("file_name")
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("GetAITaskLog GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
t.GetLog(id)
ctx.JSON(http.StatusOK, response.OuterSuccess())
res, err := t.GetLog(entity.QueryLogOpts{
CloudbrainId: id,
BaseLine: baseLine,
Lines: lines,
Order: entity.Direction(order),
NodeId: nodeId,
LogFileName: logFileName,
})
if err != nil {
log.Error("GetAITaskLog error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
if res.Content != "" && cloudbrain.IsUserHasRight(ctx.User) {
res.CanLogDownload = true
}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(res))
}

func GetAITaskInfo(ctx *context.Context) {
func DownloadAITaskLog(ctx *context.Context) {
id := ctx.QueryInt64("id")
nodeId := ctx.QueryInt("node_id")
logFileName := ctx.Query("file_name")
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("GetAITaskInfo GetCloudbrainByCloudbrainID err.%v", bizErr)
log.Error("DownloadAITaskLog GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
@@ -92,36 +122,152 @@ func GetAITaskInfo(ctx *context.Context) {
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
resultTask, err := t.Query(id)
res, err := t.GetLogDownloadInfo(entity.GetLogDownloadInfoReq{
CloudbrainId: id,
NodeId: nodeId,
LogFileName: logFileName,
})
if err != nil {
log.Error("Query error.id=%d err=%v", id, err)
log.Error("DownloadAITaskLog error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
//国际化
resultTask.Tr(ctx.Language())
//根据权限去掉数据集和模型信息
var operatorId int64
if ctx.User != nil {
operatorId = ctx.User.ID

if res == nil || res.IsEmpty() {
log.Error("DownloadAITaskLog error.%v", err)
ctx.JSON(http.StatusNotFound, "")
return
}

tmpErr := common.WriteDownloadContent2Resp(ctx, res)
if tmpErr != nil {
log.Error("DownloadAITaskLog error.%v", tmpErr)
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr))
return
}

}

func DownloadOutputFile(ctx *context.Context) {
id := ctx.QueryInt64("id")
fileName := ctx.Query("file_name")
parentDir := ctx.Query("parent_dir")
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("DownloadOutputFile GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res, err := t.GetSingleOutputDownloadInfo(entity.GetOutputDownloadInfoReq{
CloudbrainId: id,
FileName: fileName,
ParentDir: parentDir,
})
if err != nil {
log.Error("DownloadOutputFile error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}

if res == nil || res.IsEmpty() {
log.Error("DownloadOutputFile error.%v", err)
ctx.JSON(http.StatusNotFound, "")
return
}

tmpErr := common.WriteDownloadContent2Resp(ctx, res)
if tmpErr != nil {
log.Error("DownloadAITaskLog error.%v", tmpErr)
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr))
return
}

}

func DownloadAllOutputFile(ctx *context.Context) {
id := ctx.QueryInt64("id")
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("DownloadAllOutputFile GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res, err := t.GetAllOutputDownloadInfo(entity.GetOutputDownloadInfoReq{
CloudbrainId: id,
})
if err != nil {
log.Error("GetAllOutputDownloadInfo error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}

if res == nil || res.IsEmpty() {
log.Error("DownloadAllOutputFile error.%v", err)
ctx.JSON(http.StatusNotFound, "")
return
}

tmpErr := common.WriteDownloadContent2Resp(ctx, res)
if tmpErr != nil {
log.Error("DownloadAITaskLog error.%v", tmpErr)
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr))
return
}

}

func GetAITaskInfo(ctx *context.Context) {
id := ctx.QueryInt64("id")
job, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("GetAITaskInfo GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(job)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
if operatorId == 0 || cloudbrain.UserID != operatorId {
resultTask.RemoveDatasets()
resultTask.RemovePretrainModelList()
resultTask, err := t.Query(id)
if err != nil {
log.Error("Query error.id=%d err=%v", id, err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
//加载关联版本
earlyVersionList, bizErr := task.QueryTaskEarlyVersionList(id, operatorId)
earlyVersionList, bizErr := task.QueryTaskEarlyVersionList(id)
if bizErr != nil {
log.Error("QueryTaskEarlyVersionList err.id=%d err=%v", id, err)
ctx.JSON(http.StatusOK, response.OuterResponseError(bizErr))
return
}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(&entity.QueryAITaskRes{
res := &entity.QueryAITaskRes{
Task: resultTask,
CanDownload: cloudbrain.CanDownloadJob(ctx, job),
EarlyVersionList: earlyVersionList,
CanCreateVersion: cloudbrain.CanUserModify(ctx.User),
}))
CanCreateVersion: job.CanUserModify(ctx.User),
}
//根据权限去掉数据集和模型信息
res.TryToRemoveDatasetAndModelInfo(ctx.User)
//国际化
res.Tr(ctx.Language())
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(res))
}

func GetAITaskBriefInfo(ctx *context.Context) {
id := ctx.QueryInt64("id")
t, err := task.GetAITaskTemplateByCloudbrainId(id)
@@ -141,7 +287,69 @@ func GetAITaskBriefInfo(ctx *context.Context) {
}

func GetAITaskOutput(ctx *context.Context) {
ctx.JSON(http.StatusOK, response.OuterSuccess())
id := ctx.QueryInt64("id")
parentDir := ctx.Query("parent_dir")
cloudbrainTask, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("GetAITaskOutput GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrainTask)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res, err := t.GetOutput(id, parentDir)
if err != nil {
log.Error("GetOutput error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res.CanReschedule = cloudbrain.CanDeleteJob(ctx, cloudbrainTask)
res.CanDownload = cloudbrain.CanDownloadJob(ctx, cloudbrainTask)

m := map[string]interface{}{"output": res}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m))
}

func GetAllAITaskOutput(ctx *context.Context) {
id := ctx.QueryInt64("id")
suffixStr := ctx.Query("suffix")
var suffix []string
if suffixStr != "" {
suffixList := strings.Split(suffixStr, "|")
for i := 0; i < len(suffixList); i++ {
if suffixList[i] != "" {
suffix = append(suffix, suffixList[i])
}
}
}
cloudbrainTask, bizErr := models.GetCloudbrainByCloudbrainID(id)
if bizErr != nil {
log.Error("GetAITaskOutput GetCloudbrainByCloudbrainID err.%v", bizErr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx))
return
}
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrainTask)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res, err := t.GetAllOutput(entity.GetAllOutputReq{
CloudbrainId: cloudbrainTask.ID,
Suffix: suffix,
})
if err != nil {
log.Error("GetAllOutput error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}

m := map[string]interface{}{"output": res}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m))
}

func GetNotebookUrl(ctx *context.Context) {
@@ -164,6 +372,25 @@ func GetNotebookUrl(ctx *context.Context) {
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m))
}

func GetNodeInfo(ctx *context.Context) {
id := ctx.QueryInt64("id")
t, err := task.GetAITaskTemplateByCloudbrainId(id)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
res, err := t.GetNodeInfo(id)
if err != nil {
log.Error("GetNodeInfo error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}

m := map[string]interface{}{"nodes": res}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m))
}

func GetCreationRequiredInfo(ctx *context.Context) {
jobType := ctx.Query("job_type")
var isOnlineType bool
@@ -222,6 +449,7 @@ func GetAITaskList(ctx *context.Context) {
return
}
result.CanCreateTask = cloudbrain.CanCreateOrDebugJob(ctx)
result.IsRepoEmpty = ctx.Repo.Repository.IsEmpty
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(result))
}

@@ -242,6 +470,48 @@ func GetAITaskOperationProfile(ctx *context.Context) {
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(r))
}

func GetAITaskResourceUsage(ctx *context.Context) {
id := ctx.QueryInt64("id")
nodeId := ctx.QueryInt("node_id")
logFileName := ctx.Query("file_name")
t, err := task.GetAITaskTemplateByCloudbrainId(id)
if err != nil {
log.Error("param error")
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
r, err := t.GetResourceUsage(entity.GetResourceUsageOpts{
CloudbrainId: id,
NodeId: nodeId,
LogFileName: logFileName,
})
if err != nil {
log.Error("GetOperationProfile error.%v", err)
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx))
return
}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(r))
}

func RetryModelSchedule(ctx *context.APIContext) {
id := ctx.QueryInt64("id")
if id <= 0 {
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx))
return
}
job, err := models.GetCloudbrainByCloudbrainID(id)
if err != nil {
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx))
return
}
err = schedule.RetryModelMigrate(job)
if err != nil {
ctx.JSON(http.StatusOK, response.OuterResponseError(err))
return
}
ctx.JSON(http.StatusOK, response.OuterSuccess())
}

func handCreateReq(req *entity.CreateReq) {
req.JobName = util.ConvertDisplayJobNameToJobName(req.DisplayJobName)
if req.WorkServerNumber == 0 {


+ 10
- 2
routers/api/v1/api.go View File

@@ -651,16 +651,24 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/stop", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask)
m.Post("/del", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask)
m.Post("/restart", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrAITaskCreator(), ai_task.RestartAITask)
m.Get("/log", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskLog)
m.Get("/output", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOutput)
m.Get("/debug_url", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNotebookUrl)
m.Get("/creation/required", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo)
m.Post("/output/reschedule", reqRepoWriter(models.UnitTypeCloudBrain), ai_task.RetryModelSchedule)

}, reqToken(), context.RepoRef())
m.Group("/ai_task", func() {
m.Get("", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskInfo)
m.Get("/brief", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskBriefInfo)
m.Get("/list", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskList)
m.Get("/operation_profile", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOperationProfile)
m.Get("/log", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskLog)
m.Get("/log/download", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadAITaskLog)
m.Get("/node_info", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNodeInfo)
m.Get("/output", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOutput)
m.Get("/output/download", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadOutputFile)
m.Get("/output/download/all", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadAllOutputFile)
m.Get("/output/all", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAllAITaskOutput)
m.Get("/resource_usage", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskResourceUsage)
})
}, repoAssignment())
// Miscellaneous


+ 10
- 0
routers/api/v1/repo/cloudbrain.go View File

@@ -46,6 +46,7 @@ import (
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/storage"
routerRepo "code.gitea.io/gitea/routers/repo"
ai_task "code.gitea.io/gitea/services/ai_task_service/task"
)

func CloudBrainShow(ctx *context.APIContext) {
@@ -92,6 +93,15 @@ func CloudBrainShow(ctx *context.APIContext) {
}
func GeneralCloudBrainJobStop(ctx *context.APIContext) {
task := ctx.Cloudbrain
if task.IsNewAITask() {
_, bizErr := ai_task.StopCloudbrain(task)
if bizErr != nil {
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.Stopped_failed")))
return
}
ctx.JSON(http.StatusOK, models.BaseOKMessageApi)
}

if task.IsTerminal() {
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("cloudbrain.Already_stopped"))
return


+ 79
- 8
routers/api/v1/repo/modelarts.go View File

@@ -7,6 +7,7 @@ package repo

import (
"encoding/json"
"errors"
"net/http"
"path"
"sort"
@@ -35,6 +36,7 @@ import (
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
routerRepo "code.gitea.io/gitea/routers/repo"
ai_task "code.gitea.io/gitea/services/ai_task_service/task"
cloudbrainService "code.gitea.io/gitea/services/cloudbrain"
)

@@ -120,12 +122,35 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {

jobID := ctx.Params(":jobid")
versionName := ctx.Query("version_name")
job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
var job *models.Cloudbrain
id := ctx.QueryInt64("id")
if id > 0 {
job, err = models.GetCloudbrainByCloudbrainID(id)
} else {
job, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
}
if err != nil {
ctx.NotFound(err)
return
}

if job.IsNewAITask() {
var bizErr *response.BizError
job, bizErr = ai_task.UpdateCloudbrain(job)
if bizErr != nil {
log.Error("UpdateCloudbrain err.job.DisplayJobName = %s err=%v", job.DisplayJobName, err)
ctx.NotFound(err)
return
}
aiCenterName = cloudbrainService.GetAiCenterShow(job.AiCenter, ctx.Context)
ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"JobStatus": job.Status,
"JobDuration": job.TrainJobDuration,
"AiCenter": aiCenterName,
"StartTime": job.StartTime,
})
return
}
if job.Type == models.TypeCloudBrainOne {
aiCenterName = routerRepo.GetAiCenterNameByCode(models.AICenterOfCloudBrainOne, ctx.Language())
job, err = cloudbrainTask.SyncCloudBrainOneStatus(job)
@@ -205,7 +230,12 @@ func GetModelScheduleStatus(ctx *context.APIContext) {

func RetryModelSchedule(ctx *context.APIContext) {
jobID := ctx.Params(":jobid")
err := schedule.RetryModelMigrate(jobID)
job, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("RetryModelMigrate GetCloudbrainByJobID err.jobId=%s err=%v", jobID, err)
ctx.JSON(http.StatusOK, response.OuterResponseError(errors.New("jobId not correct")))
}
err = schedule.RetryModelMigrate(job)
if err != nil {
ctx.JSON(http.StatusOK, response.OuterResponseError(err))
return
@@ -362,17 +392,41 @@ func trainJobGetLogContent(jobID string, versionID int64, baseLine string, order

func DelTrainJobVersion(ctx *context.APIContext) {
var (
err error
err error
task *models.Cloudbrain
)

var jobID = ctx.Params(":jobid")
var versionName = ctx.Query("version_name")
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
var id = ctx.QueryInt64("id")
if id > 0 {
task, err = models.GetCloudbrainByCloudbrainID(id)
} else {
task, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
}
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
ctx.NotFound(err)
return
}
if task.IsNewAITask() {
bizErr := ai_task.DelCloudbrain(task)
if bizErr != nil {
log.Error("DelCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr)
ctx.JSON(http.StatusOK, map[string]interface{}{
"Message": ctx.Tr(bizErr.TrCode),
"StatusOK": 1,
})
return
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": task.JobID,
"VersionName": task.VersionName,
"StatusOK": 0,
})
return
}

if !task.IsTerminal() {
log.Error("the job(%s) version has not been stopped", task.JobName)
@@ -444,16 +498,33 @@ func DelTrainJobVersion(ctx *context.APIContext) {

func StopTrainJobVersion(ctx *context.APIContext) {
var (
err error
err error
task *models.Cloudbrain
)
var jobID = ctx.Params(":jobid")
var versionName = ctx.Query("version_name")
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
var id = ctx.QueryInt64("id")
if id > 0 {
task, err = models.GetCloudbrainByCloudbrainID(id)
} else {
task, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
}
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
return
}

if task.IsNewAITask() {
_, bizErr := ai_task.StopCloudbrain(task)
if bizErr != nil {
log.Error("StopCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr)
return
}
ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": task.JobID,
"VersionName": task.VersionName,
"StatusOK": 0,
})
}
_, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())


+ 62
- 0
routers/common/download.go View File

@@ -0,0 +1,62 @@
package common

import (
"archive/zip"
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"io"
"net/http"
"net/url"
)

func WriteDownloadContent2Resp(ctx *context.Context, res *entity.FileDownloadInfo) error {
defer func() {
res.Close()
}()
resp := ctx.Resp

//优先重定向到下载链接
if res.DownloadUrl != "" {
ctx.Resp.Header().Set("Cache-Control", "max-age=0")
http.Redirect(ctx.Resp, ctx.Req.Request, res.DownloadUrl, http.StatusTemporaryRedirect)
return nil
}

//没有下载链接则直接返回文件流
resp.Header().Set("Content-Disposition", "attachment; filename="+url.QueryEscape(res.ResultFileName))
resp.Header().Set("Content-Type", "application/octet-stream")
var reader io.Reader

switch res.ResultType {
case entity.FileTypeTXT:
for _, f := range res.Readers {
reader = f.Reader
io.Copy(resp, reader)
}
case entity.FileTypeZIP:
w := zip.NewWriter(resp)
defer w.Close()
for _, f := range res.Readers {
fDest, err := w.Create(f.Name)
if err != nil {
log.Error("GetAITaskLog error.%v", err)
return err
}
p := make([]byte, 1024)
var readErr error
var readCount int
// 读取对象内容
for {
readCount, readErr = f.Reader.Read(p)
if readCount > 0 {
fDest.Write(p[:readCount])
}
if readErr != nil {
break
}
}
}
}
return nil
}

+ 21
- 17
routers/repo/ai_model_manage.go View File

@@ -47,17 +47,7 @@ const (
MODEL_ONLINE_TYPE = 0
)

func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, engine int, ctx *context.Context) (string, error) {
aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName)
if err != nil {
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId)
if err != nil {
log.Info("query task error." + err.Error())
return "", err
} else {
log.Info("query gpu train task.")
}
}
func saveModelByParameters(aiTask *models.Cloudbrain, name string, version string, label string, description string, engine int, ctx *context.Context) (string, error) {
uuid := uuid.NewV4()
id := uuid.String()
modelPath := id
@@ -406,6 +396,25 @@ func SaveModel(ctx *context.Context) {
re := map[string]string{
"code": "-1",
}

var aiTask *models.Cloudbrain
var err error
//云脑重构:适配用id的方式请求
cloudbrainId := ctx.QueryInt64("cloudbrain_id")
if cloudbrainId > 0 {
aiTask, err = models.GetCloudbrainByCloudbrainID(cloudbrainId)
} else {
aiTask, err = models.GetCloudbrainByJobIDAndVersionName(JobId, VersionName)
if err != nil {
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, JobId)
}
}
if err != nil {
log.Error("save model error." + err.Error())
re["msg"] = err.Error()
return
}

isPrivate := ctx.QueryBool("isPrivate")
if ctx.Repo.Repository.IsPrivate {
if !isPrivate {
@@ -414,11 +423,6 @@ func SaveModel(ctx *context.Context) {
return
}
}
if JobId == "" || VersionName == "" {
re["msg"] = "JobId or VersionName is null."
ctx.JSON(200, re)
return
}
if modelSelectedFile == "" {
re["msg"] = "Not selected model file."
ctx.JSON(200, re)
@@ -429,7 +433,7 @@ func SaveModel(ctx *context.Context) {
ctx.JSON(200, re)
return
}
id, err := saveModelByParameters(JobId, VersionName, name, version, label, description, engine, ctx)
id, err := saveModelByParameters(aiTask, name, version, label, description, engine, ctx)
if err != nil {
log.Info("save model error." + err.Error())
re["msg"] = err.Error()


+ 22
- 6
routers/repo/cloudbrain.go View File

@@ -872,7 +872,9 @@ func CloudBrainShow(ctx *context.Context) {
}

func CloudBrainTrainJobShow(ctx *context.Context) {
cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain)
// cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain)
ctx.Data["PageIsCloudBrain"] = true
ctx.HTML(200, tplCloudBrainTrainJobShow)
}

func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.JobType) {
@@ -1333,7 +1335,20 @@ func CloudBrainStop(ctx *context.Context) {
var status = ""

task := ctx.Cloudbrain

for {
if task.IsNewAITask() {
t, bizErr := ai_task.StopCloudbrain(task)
if bizErr != nil {
resultCode = "-1"
errorMsg = bizErr.TrCode
resultCode = task.Status
break
}
status = t.Status
break
}

if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) || task.Status == string(models.JobSucceeded) {
log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"])
resultCode = "-1"
@@ -2950,6 +2965,7 @@ func BenchmarkDel(ctx *context.Context) {

func CloudBrainTrainJobNew(ctx *context.Context) {
ctx.Data["IsCreate"] = true
ctx.Data["PageIsCloudBrain"] = true
cloudBrainTrainJobCreate(ctx)
}
func CloudBrainTrainJobVersionNew(ctx *context.Context) {
@@ -2958,11 +2974,11 @@ func CloudBrainTrainJobVersionNew(ctx *context.Context) {
}

func cloudBrainTrainJobCreate(ctx *context.Context) {
err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain))
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}
// err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain))
// if err != nil {
// ctx.ServerError("get new train-job info failed", err)
// return
// }
ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew)
}



+ 15
- 9
routers/repo/dataset.go View File

@@ -624,6 +624,7 @@ func ExportModelToExistDataSet(ctx *context.Context) {
}
description := ctx.Query("description")
jobId := ctx.Query("jobId")
cloudbrainId := ctx.QueryInt64("cloudbrain_id")
storeType := ctx.QueryInt("type")
versionName := ctx.Query("versionName")
dataset, err := models.GetDatasetByID(datasetId)
@@ -633,17 +634,22 @@ func ExportModelToExistDataSet(ctx *context.Context) {
ctx.JSON(200, re)
return
}
aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName)
if err != nil {
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId)
var aiTask *models.Cloudbrain
if cloudbrainId > 0 {
aiTask, err = models.GetCloudbrainByCloudbrainID(cloudbrainId)
} else {
aiTask, err = models.GetCloudbrainByJobIDAndVersionName(jobId, versionName)
if err != nil {
log.Info("query task error." + err.Error())
re["msg"] = "Query cloudbrain task error." + err.Error()
ctx.JSON(200, re)
return
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId)
}
}
msgKey := fmt.Sprint(datasetId) + "_" + jobId + "_" + versionName
if err != nil {
log.Info("query task error." + err.Error())
re["msg"] = "Query cloudbrain task error." + err.Error()
ctx.JSON(200, re)
return
}
msgKey := fmt.Sprint(datasetId) + "_" + aiTask.JobID + "_" + aiTask.VersionName
msgMap := make(map[string]int, 0)
msgMap["##type##"] = storeType
filterFiles := strings.Split(modelSelectedFile, ";")
@@ -651,7 +657,7 @@ func ExportModelToExistDataSet(ctx *context.Context) {
msgMap[shortFile] = 0
}
setProgress(msgKey, msgMap)
go asyncToExportDataset(dataset, storeType, modelSelectedFile, aiTask, ctx.User, msgKey, msgMap, versionName, description)
go asyncToExportDataset(dataset, storeType, modelSelectedFile, aiTask, ctx.User, msgKey, msgMap, aiTask.VersionName, description)
ctx.JSON(200, map[string]string{
"code": "0",
"progressId": msgKey,


+ 75
- 67
routers/repo/grampus.go View File

@@ -31,8 +31,8 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/timeutil"
// "code.gitea.io/gitea/modules/notification"
// "code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
"github.com/unknwon/com"

@@ -1421,7 +1421,13 @@ func GrampusNotebookDel(ctx *context.Context) {

func GrampusTrainJobDel(ctx *context.Context) {
var listType = ctx.Query("listType")
if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil {
if isHandled, err := ai_task.HandleNewAITaskDelete(ctx.Cloudbrain.ID); isHandled {
if err != nil {
log.Error("DeleteJob(%s) failed:%v", ctx.Cloudbrain.JobName, err, ctx.Data["msgID"])
ctx.ServerError(err.Error(), err)
return
}
} else if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil {
log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"])
ctx.ServerError(err.Error(), err)
return
@@ -1450,89 +1456,91 @@ func GrampusNotebookShow(ctx *context.Context) {

func GrampusTrainJobShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true

var task *models.Cloudbrain
task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid"))
if err != nil {
log.Error("GetCloudbrainByJobID failed:" + err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
task.ContainerIp = ""
task.User, _ = models.GetUserByID(task.UserID)
if task.DeletedAt.IsZero() { //normal record
result, err := grampus.GetJob(task.JobID)
ctx.HTML(http.StatusOK, tplGrampusTrainJobShow)
/*
var task *models.Cloudbrain
task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid"))
if err != nil {
log.Error("GetJob failed:" + err.Error())
log.Error("GetCloudbrainByJobID failed:" + err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}

if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
task.ContainerIp = ""
task.User, _ = models.GetUserByID(task.UserID)
if task.DeletedAt.IsZero() { //normal record
result, err := grampus.GetJob(task.JobID)
if err != nil {
log.Error("GetJob failed:" + err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
oldStatus := task.Status
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning {
task.Duration = result.JobInfo.RunSec
if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)

if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
oldStatus := task.Status
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning {
task.Duration = result.JobInfo.RunSec
if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)

if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed:" + err.Error())
}
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed:" + err.Error())
}
}
}

if len(task.Parameters) > 0 {
var parameters models.Parameters
err := json.Unmarshal([]byte(task.Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err)
ctx.ServerError("system error", err)
return
}
if len(task.Parameters) > 0 {
var parameters models.Parameters
err := json.Unmarshal([]byte(task.Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err)
ctx.ServerError("system error", err)
return
}

if len(parameters.Parameter) > 0 {
paramTemp := ""
for _, Parameter := range parameters.Parameter {
param := Parameter.Label + " = " + Parameter.Value + "; "
paramTemp = paramTemp + param
if len(parameters.Parameter) > 0 {
paramTemp := ""
for _, Parameter := range parameters.Parameter {
param := Parameter.Label + " = " + Parameter.Value + "; "
paramTemp = paramTemp + param
}
task.Parameters = paramTemp[:len(paramTemp)-2]
} else {
task.Parameters = ""
}
task.Parameters = paramTemp[:len(paramTemp)-2]
} else {
task.Parameters = ""
}
}

taskList := make([]*models.Cloudbrain, 0)
taskList = append(taskList, task)
prepareSpec4Show(ctx, task)
taskList := make([]*models.Cloudbrain, 0)
taskList = append(taskList, task)
prepareSpec4Show(ctx, task)

ctx.Data["version_list_task"] = taskList
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false)
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task)
ctx.Data["displayJobName"] = task.DisplayJobName
ctx.Data["canReschedule"] = cloudbrain.CanDeleteJob(ctx, task)
ctx.Data["version_list_task"] = taskList
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false)
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task)
ctx.Data["displayJobName"] = task.DisplayJobName
ctx.Data["canReschedule"] = cloudbrain.CanDeleteJob(ctx, task)

ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx)
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx)

ctx.HTML(http.StatusOK, tplGrampusTrainJobShow)
ctx.HTML(http.StatusOK, tplGrampusTrainJobShow)
*/
}

func GrampusDownloadLog(ctx *context.Context) {


+ 150
- 122
routers/repo/modelarts.go View File

@@ -811,54 +811,56 @@ func NotebookDel(ctx *context.Context) {

func TrainJobIndex(ctx *context.Context) {
MustEnableModelArts(ctx)
ctx.HTML(200, tplModelArtsTrainJobIndex)
/*
repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}

listType := ctx.Query("listType")
ctx.Data["ListType"] = listType

if listType == models.AllResource {
listType = ""
}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
JobTypeNot: false,
JobTypes: jobTypes,
IsLatestVersion: modelarts.IsLatestVersion,
ComputeResource: listType,
Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
return
}

repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}

listType := ctx.Query("listType")
ctx.Data["ListType"] = listType

if listType == models.AllResource {
listType = ""
}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
JobTypeNot: false,
JobTypes: jobTypes,
IsLatestVersion: modelarts.IsLatestVersion,
ComputeResource: listType,
Type: models.TypeCloudBrainAll,
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
return
}

for i, task := range tasks {
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
}
for i, task := range tasks {
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
}

pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
pager.SetDefaultParams(ctx)
pager.AddParam(ctx, "listType", "ListType")
ctx.Data["Page"] = pager
pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
pager.SetDefaultParams(ctx)
pager.AddParam(ctx, "listType", "ListType")
ctx.Data["Page"] = pager

ctx.Data["PageIsCloudBrain"] = true
ctx.Data["Tasks"] = tasks
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx)
ctx.Data["RepoIsEmpty"] = repo.IsEmpty
ctx.HTML(200, tplModelArtsTrainJobIndex)
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["Tasks"] = tasks
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx)
ctx.Data["RepoIsEmpty"] = repo.IsEmpty
ctx.HTML(200, tplModelArtsTrainJobIndex)
*/
}

func TrainJobNew(ctx *context.Context) {
@@ -1968,93 +1970,119 @@ func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) err

func TrainJobShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
var jobID = ctx.Params(":jobid")

repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypes: jobTypes,
JobID: jobID,
})

if err != nil {
log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
if len(VersionListTasks) == 0 {
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
//设置权限
canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID)
if err != nil {
ctx.ServerError("canNewJob failed", err)
return
}
ctx.Data["canNewJob"] = canNewJob
datasetList := make([][]*models.DatasetDownload, 0)
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式
for i, task := range VersionListTasks {
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
/*
var jobID = ctx.Params(":jobid")

repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypes: jobTypes,
JobID: jobID,
})

var parameters models.Parameters
if VersionListTasks[i].Parameters != "" {
err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err)
}
if err != nil {
log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}

if len(parameters.Parameter) > 0 {
paramTemp := ""
for _, Parameter := range parameters.Parameter {
param := Parameter.Label + " = " + Parameter.Value + "; "
paramTemp = paramTemp + param
}
VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2]
} else {
VersionListTasks[i].Parameters = ""
}
datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false))
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
VersionListTasks[i].ContainerIp = ""
//add spec
s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID)
if len(VersionListTasks) == 0 {
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
//设置权限
canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID)
if err != nil {
log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error())
continue
ctx.ServerError("canNewJob failed", err)
return
}
VersionListTasks[i].Cloudbrain.Spec = s
}
ctx.Data["canNewJob"] = canNewJob
datasetList := make([][]*models.DatasetDownload, 0)
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式
for i, task := range VersionListTasks {

pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5)
pager.SetDefaultParams(ctx)
ctx.Data["Page"] = pager
ctx.Data["jobID"] = jobID
ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName
ctx.Data["version_list_task"] = VersionListTasks
ctx.Data["version_list_count"] = VersionListCount
ctx.Data["datasetList"] = datasetList
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, &VersionListTasks[0].Cloudbrain)
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
var parameters models.Parameters
if VersionListTasks[i].Parameters != "" {
err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), &parameters)
if err != nil {
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err)
}
}

if len(parameters.Parameter) > 0 {
paramTemp := ""
for _, Parameter := range parameters.Parameter {
param := Parameter.Label + " = " + Parameter.Value + "; "
paramTemp = paramTemp + param
}
VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2]
} else {
VersionListTasks[i].Parameters = ""
}
datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false))
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain)
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain)
VersionListTasks[i].ContainerIp = ""
//add spec
s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID)
if err != nil {
log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error())
continue
}
VersionListTasks[i].Cloudbrain.Spec = s
}

pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5)
pager.SetDefaultParams(ctx)
ctx.Data["Page"] = pager
ctx.Data["jobID"] = jobID
ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName
ctx.Data["version_list_task"] = VersionListTasks
ctx.Data["version_list_count"] = VersionListCount
ctx.Data["datasetList"] = datasetList
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, &VersionListTasks[0].Cloudbrain)
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
*/
}

func TrainJobDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
var listType = ctx.Query("listType")
var id = ctx.QueryInt64("id")
if id > 0 {
task, _ := models.GetCloudbrainByCloudbrainID(id)
if task != nil && task.IsNewAITask() {
bizErr := ai_task.DelCloudbrain(task)
if bizErr != nil {
log.Error("DelCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr)
ctx.ServerError("DelCloudbrain failed", bizErr.ToError())
return
}
var isAdminPage = ctx.Query("isadminpage")
var isHomePage = ctx.Query("ishomepage")
if ctx.IsUserSiteAdmin() && isAdminPage == "true" {
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains")
} else if isHomePage == "true" {
ctx.Redirect(setting.AppSubURL + "/cloudbrains")
} else {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}
return
}

}
repo := ctx.Repo.Repository

var jobTypes []string


+ 10
- 0
routers/response/error.go View File

@@ -1,5 +1,7 @@
package response

import "errors"

type BizError struct {
Code int
DefaultMsg string
@@ -24,6 +26,14 @@ func (e *BizError) WithParams(params ...interface{}) *BizError {
return newErr
}

func (e *BizError) ToError() error {
msg := e.TrCode
if msg == "" {
msg = e.DefaultMsg
}
return errors.New(msg)
}

func NewBizError(err error) *BizError {
return &BizError{Code: RESPONSE_CODE_ERROR_DEFAULT, DefaultMsg: err.Error(), TrCode: err.Error()}
}


+ 6
- 4
routers/response/response_list.go View File

@@ -9,7 +9,7 @@ var BADGES_STILL_HAS_USERS = &BizError{Code: 1005, DefaultMsg: "Please delete us

//common response
var SYSTEM_ERROR = &BizError{Code: 9009, DefaultMsg: "System error.Please try again later", TrCode: "common_error.system_error"}
var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, DefaultMsg: "insufficient permissions", TrCode: "common_error.insufficient_permission"}
var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, DefaultMsg: "You do not have permission to perform this operation", TrCode: "common_error.insufficient_permission"}
var PARAM_ERROR = &BizError{Code: 9001, DefaultMsg: "param error", TrCode: "common_error.param_error"}
var WECHAT_NOT_BIND = &BizError{Code: 9002, DefaultMsg: "Please scan the code and bind to wechat first", TrCode: "common_error.wechat_not_bind"}

@@ -21,16 +21,18 @@ var MULTI_TASK = &BizError{Code: 2004, DefaultMsg: "You have already a running o
var JOB_NAME_ALREADY_USED = &BizError{Code: 2005, DefaultMsg: "The job name did already exist", TrCode: "ai_task.job_name_already_used"}
var INSUFFICIENT_POINT_BALANCE = &BizError{Code: 2006, DefaultMsg: "Insufficient point balance", TrCode: "ai_task.insufficient_point_balance"}
var DATASET_NOT_EXISTS = &BizError{Code: 2007, DefaultMsg: "The part of datasets in the task does not exist or has been deleted, please create a new debug job.", TrCode: "repo.debug.manage.dataset_not_exist"}
var MODEL_NOT_EXISTS = &BizError{Code: 2008, DefaultMsg: "The model in the task does not exist or has been deleted, please create a new debug job.", TrCode: "repo.debug.manage.model_not_exist"}
var RESULT_CLEARD = &BizError{Code: 2009, DefaultMsg: "The files of the task have been cleared, can not restart any more, please create a new debug task instead.", TrCode: "cloudbrain.result_cleared"}
var MODEL_NOT_EXISTS = &BizError{Code: 2008, DefaultMsg: "The model in the task does not exist or has been deleted", TrCode: "ai_task.model_not_exist"}
var RESULT_CLEARD = &BizError{Code: 2009, DefaultMsg: "The files of the task have been cleared, can not restart or retrain any more, please create a new task instead.", TrCode: "ai_task.result_cleared"}
var CREATE_FAILED = &BizError{Code: 2010, DefaultMsg: "Create AI task failed", TrCode: "ai_task.create_failed"}
var RESTART_FAILED = &BizError{Code: 2011, DefaultMsg: "Restart AI task failed", TrCode: "ai_task.restart_failed"}
var STOP_FAILED = &BizError{Code: 2012, DefaultMsg: "Stop AI task failed", TrCode: "ai_task.stop_failed"}
var DATASET_SIZE_OVER_LIMIT = &BizError{Code: 2013, DefaultMsg: "The size of dataset exceeds limitation", TrCode: "ai_task.dataset_size_over_limit"}
var BOOT_FILE_MUST_BE_PYTHON = &BizError{Code: 2013, DefaultMsg: "The boot file must be a python file", TrCode: "ai_task.boot_file_must_python"}
var BOOT_FILE_NOT_EXIST = &BizError{Code: 2014, DefaultMsg: "The boot file not exist", TrCode: "ai_task.boot_file_not_exist"}
var BOOT_FILE_MUST_BE_PYTHON = &BizError{Code: 2015, DefaultMsg: "The boot file must be a python file", TrCode: "ai_task.boot_file_must_python"}
var NO_NODE_RIGHR = &BizError{Code: 2016, DefaultMsg: "The boot file must be a python file", TrCode: "repo.modelarts.no_node_right"}
var DATASET_SELECT_ERROR = &BizError{Code: 2017, DefaultMsg: "Dataset select error: the count exceed the limit or has same name", TrCode: "cloudbrain.error.dataset_select"}
var PARTIAL_DATASETS_NOT_AVAILABLE = &BizError{Code: 2018, DefaultMsg: "There are non-existent or deleted files in the selected dataset file, please select again", TrCode: "cloudbrain.error.partial_datasets_not_available"}
var LOAD_CODE_FAILED = &BizError{Code: 2019, DefaultMsg: "Fail to load code, please check if the right branch is selected.", TrCode: "cloudbrain.load_code_failed"}
var BRANCH_NOT_EXISTS = &BizError{Code: 2020, DefaultMsg: "The branch does not exist", TrCode: "ai_task.branch_not_exists"}
var MODEL_NUM_OVER_LIMIT = &BizError{Code: 2021, DefaultMsg: "The number of models exceeds the limit of 30", TrCode: "repo.debug.manage.model_num_over_limit"}
var DATASET_NUMBER_OVER_LIMIT = &BizError{Code: 2022, DefaultMsg: "The dataset count exceed the limit", TrCode: "ai_task.dataset_number_over_limit"}

+ 438
- 92
services/ai_task_service/cluster/c2net.go View File

@@ -1,19 +1,26 @@
package cluster

import (
"errors"
"fmt"
"strings"
"time"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/manager/client/grampus"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
model_grampus "code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/schedule"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask"
"errors"
"fmt"
"io/ioutil"
"path"
"strings"
"time"
)

type C2NetClusterAdapter struct {
@@ -51,7 +58,7 @@ func (c C2NetClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequ
return convertGrampus2NoteBookRes(jobResult), nil
}

func (c C2NetClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c C2NetClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
processType := req.ComputeSource.FullName
images, err := grampus.GetImages(processType, string(req.JobType))
if err != nil {
@@ -68,6 +75,10 @@ func (c C2NetClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.Cluster
return r, false, nil
}

func (c C2NetClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
return c.GetNotebookImages(req)
}

func ConvertGrampusImageToStandard(image models.GrampusImage) entity.ClusterImage {
return entity.ClusterImage{
ImageId: image.ID,
@@ -88,8 +99,6 @@ func convertNoteBookReq2Grampus(req entity.CreateNoteBookTaskRequest) models.Cre
if models.DCU == req.Tasks[0].Spec.ComputeResource {
command = "cp -r /code /tmp;cp -r /dataset /tmp;cp -r /pretrainmodel /tmp;"
}
//command := fmt.Sprintf(commandGpuDebug, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, codePath)
// command := "bash && cd /code && unzip master.zip && cd test-export-data && uvicorn train:app --host 0.0.0.0 --port $OCTOPUS_NOTEBOOK_PORT"
if models.NPU == req.Tasks[0].Spec.ComputeResource {
command = ""
}
@@ -163,23 +172,18 @@ func getCopyCmd(datasetName, repoName, bootfilepath string) string {
}

func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.GrampusNotebookTask {

code := models.GrampusDataset{}
codeArray := convertContainerArray2Grampus(t.Code)
codeArray := convertContainerArray2GrampusArray(t.Code)
if codeArray != nil && len(codeArray) > 0 {
code = codeArray[0]
}
output := models.GrampusDataset{}
outputArray := convertContainerArray2Grampus(t.OutPut)
if outputArray != nil && len(outputArray) > 0 {
output = outputArray[0]
}
return models.GrampusNotebookTask{
Name: t.Name,
ResourceSpecId: t.Spec.SourceSpecId,
ImageId: t.ImageId,
ImageUrl: t.ImageUrl,
Datasets: convertContainerArray2Grampus(t.Datasets),
OutPut: output,
Datasets: convertContainerArray2GrampusArray(t.Datasets),
Code: code,
AutoStopDuration: t.AutoStopDuration,
Capacity: t.Capacity,
@@ -188,7 +192,7 @@ func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.G
}
}

func convertContainerArray2Grampus(containerDatas []entity.ContainerData) []models.GrampusDataset {
func convertContainerArray2GrampusArray(containerDatas []entity.ContainerData) []models.GrampusDataset {
res := make([]models.GrampusDataset, len(containerDatas))
for i := 0; i < len(containerDatas); i++ {
d := containerDatas[i]
@@ -197,6 +201,14 @@ func convertContainerArray2Grampus(containerDatas []entity.ContainerData) []mode
return res
}

func convertContainerArray2Grampus(containerDatas []entity.ContainerData) models.GrampusDataset {
res := models.GrampusDataset{}
if containerDatas != nil && len(containerDatas) > 0 {
res = convertContainer2Grampus(containerDatas[0])
}
return res
}

func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset {
return models.GrampusDataset{
Name: d.Name,
@@ -206,6 +218,7 @@ func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset {
ContainerPath: d.ContainerPath,
ReadOnly: d.ReadOnly,
GetBackEndpoint: d.GetBackEndpoint,
Size: d.Size,
}
}

@@ -248,24 +261,24 @@ func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartRespo
}
}

func (c C2NetClusterAdapter) DeleteNoteBook(string) error {
func (c C2NetClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error {
return nil
}

func (c C2NetClusterAdapter) StopNoteBook(jobId string) error {
_, err := grampus.StopJob(jobId, string(models.JobTypeDebug))
func (c C2NetClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error {
_, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug))
if err != nil {
log.Error("StopNoteBook(%s) failed:%v", jobId, err)
log.Error("StopNoteBook(%s) failed:%v", opts, err)
return err
}
return nil
}

func (c C2NetClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) {
if jobId == "" {
func (c C2NetClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
if opts.JobID == "" {
return nil, errors.New("jobID is empty")
}
result, err := grampus.GetNotebookJob(jobId)
result, err := grampus.GetNotebookJob(opts.JobID)
if err != nil {
return nil, err
}
@@ -297,7 +310,7 @@ func (c C2NetClusterAdapter) GetNoteBookLog(jobId string) (*entity.ClusterLog, e
}

func (c C2NetClusterAdapter) GetNoteBookUrl(jobId string) (string, error) {
res, err := c.QueryNoteBook(jobId)
res, err := c.QueryNoteBook(entity.JobIdAndVersionId{JobID: jobId})
if err != nil {
return "", err
}
@@ -367,67 +380,148 @@ func convertTrainReq2Grampus(req entity.CreateTrainTaskRequest) models.CreateGra
}

func generateGrampusTrainCommand(req entity.CreateTrainTaskRequest) string {
var command string
t := req.Tasks[0]
containerConfig := req.TaskConfig
computeResource := t.Spec.ComputeResource

var CommandPrepareScriptNpu = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;"
//todo 现状:NPU和GPU的目录不一致,原因?
var workDir = "/tmp/"
if computeResource == models.NPU {
workDir = "/cache/"
}
command += "pwd;cd " + workDir + ";" + CommandPrepareScriptNpu

if computeResource == models.GPU || computeResource == models.GCU {
command += "cd " + workDir + "code;echo \"start unzip code\";unzip -q master.zip;"
command += "cd " + workDir + "dataset;echo \"start to unzip dataset\";"
var unZipDatasetCommand string
for _, d := range t.Datasets {
if strings.HasSuffix(d.Name, ".tar.gz") {
unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + d.Name + "';"
} else {
unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + d.Name + "' -d './" + strings.TrimSuffix(d.Name, ".zip") + "';"
}
unZipDatasetCommand += "rm -f '" + d.Name + "';"
var codePath = containerConfig.GetContainerPath(entity.ContainerCode)
var modelPath = containerConfig.GetContainerPath(entity.ContainerPreTrainModel)
var datasetPath = containerConfig.GetContainerPath(entity.ContainerDataset)
var outputPath = containerConfig.GetContainerPath(entity.ContainerOutPutPath)

var modelFilePath = ""
if t.PreTrainModel != nil && len(t.PreTrainModel) > 0 {
modelFilePath = t.PreTrainModel[0].ContainerPath
}
builder := &entity.CommandBuilder{}
builder.
//mkdir dirs
Add(buildMkdirCommand(codePath, modelPath, datasetPath, outputPath)).
//unzip code
Add(buildUnzipCodeCommand(codePath, t.Code[0].ContainerPath, computeResource)).
//unzip dataset
Add(buildUnzipDatasetCommand(t.Datasets, datasetPath, computeResource)).
//export
Add(buildExportCommand(req.Name, computeResource)).
//exec code
Add(buildExecCodeCommand(path.Join(codePath, strings.ToLower(t.RepoName)), modelFilePath, t.BootFile, computeResource, req.Name, t.Params))

return builder.ToString()
}

func buildMkdirCommand(dirs ...string) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}
for _, dir := range dirs {
builder.Next(entity.NewCommand("mkdir", "-p", dir))
}
return builder
}

func buildUnzipCodeCommand(codeConfigPath, codeFilePath, computeSource string) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}
if computeSource == models.NPU {
return builder
}
builder.
Next(entity.NewCommand("echo", "'start to unzip code'")).
Next(entity.NewCommand("cd", codeConfigPath)).
Next(entity.NewCommand("unzip", "-q", codeFilePath)).
Next(entity.NewCommand("echo", "'unzip code finished'")).
Next(entity.NewCommand("ls", "-l")).
Next(entity.NewCommand("ls", "-l", "mnist_pytorchexample_gpu"))
return builder
}
func buildUnzipDatasetCommand(datasets []entity.ContainerData, datasetPath, computeSource string) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}
if computeSource == models.NPU {
return builder
}
if len(datasets) == 0 {
return nil
}
builder.Next(entity.NewCommand("cd", datasetPath)).
Next(entity.NewCommand("echo", "'start to unzip datasets'"))
//单数据集
if len(datasets) == 1 {
if strings.HasSuffix(datasets[0].Name, ".tar.gz") {
builder.Next(entity.NewCommand("tar", "--strip-components=1", "-zxvf", "'"+datasets[0].Name+"'"))
} else {
builder.Next(entity.NewCommand("unzip", "-q", "'"+datasets[0].Name+"'"))
}
builder.Next(entity.NewCommand("ls", "-l"))
builder.Next(entity.NewCommand("echo", "'unzip datasets finished'"))
return builder
}
//多数据集
for i := 0; i < len(datasets); i++ {
name := datasets[i].Name
if strings.HasSuffix(name, ".tar.gz") {
builder.Next(entity.NewCommand("tar", "-zxvf", name))
} else {
builder.Next(entity.NewCommand("unzip", "-q", "'"+name+"'", "-d", "'./"+strings.TrimSuffix(name, ".zip")+"'"))
}
command += "echo \"unzip finished;start to exec code;\";"
}
builder.Next(entity.NewCommand("ls", "-l"))
builder.Next(entity.NewCommand("echo", "'unzip datasets finished'"))
return builder
}

//exec code
var parameters = t.Params.Parameter
var paramCode string
func buildExportCommand(jobName, computeResource string) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}

if len(parameters) > 0 {
for _, parameter := range parameters {
//todo value需要单引号,再统一一下
paramCode += " --" + parameter.Label + "=" + parameter.Value
}
if computeResource == models.NPU {
outputRemotePath := setting.CodePathPrefix + jobName + modelarts.OutputPath
builder.Next(entity.NewCommand("export", "bucket="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath))
} else {
outputRemotePath := setting.CBCodePathPrefix + jobName + cloudbrain.ModelMountPath + "/"
builder.Next(entity.NewCommand("export", "env="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath))
}
return builder
}

func buildExecCodeCommand(codeDirPath, modelFilePath, bootFile, computeResource, jobName string, params models.Parameters) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}
builder.Next(entity.NewCommand("echo", "'start to exec code'"))

var commandCode string
modelRemoteObsUrl := getNpuModelRemoteObsUrl(t.Name)
if t.Spec.ComputeResource == models.NPU {
paramCode += " --model_url=" + modelRemoteObsUrl
commandCode = "source /home/ma-user/.bashrc;python /home/ma-user/davinci/train/davincirun.py python /home/ma-user/openi.py " + paramCode + ";"
} else if t.Spec.ComputeResource == models.GPU || t.Spec.ComputeResource == models.GCU {
if len(t.Models) > 0 {
paramCode += " --ckpt_url" + "=" + workDir + "pretrainmodel/" + t.Models[0].Name
var paramCode string
for _, param := range params.Parameter {
paramCode += " --'" + param.Label + "'='" + param.Value + "'"
}
if computeResource == models.NPU {
modelRemoteObsUrl := getNpuModelRemoteObsUrl(jobName)
builder.Next(entity.NewCommand("source", "/home/ma-user/.bashrc")).
Next(entity.NewCommand("python", "/home/ma-user/davinci/train/davincirun.py", "python", "/home/ma-user/openi.py", paramCode, "--model_url="+modelRemoteObsUrl))
} else if computeResource == models.GCU {
builder.Next(entity.NewCommand("cd", codeDirPath))
if modelFilePath != "" {
builder.Next(entity.NewCommand("python3", bootFile, paramCode, "--ckpt_url='"+modelFilePath+"'"))
} else {
builder.Next(entity.NewCommand("python3", bootFile, paramCode))
}
} else {
builder.Next(entity.NewCommand("cd", codeDirPath))
if modelFilePath != "" {
builder.Next(entity.NewCommand("python", bootFile, paramCode, "--ckpt_url='"+modelFilePath+"'"))
} else {
builder.Next(entity.NewCommand("python", bootFile, paramCode))
}
commandCode = "cd " + workDir + "code/" + strings.ToLower(t.Code.Name) + ";python " + t.BootFile + paramCode + ";"
}

command += commandCode
builder.Next(entity.NewCommand("result=$?"))
builder.Next(entity.NewCommand("bash", "-c", "\"[[ $result -eq 0 ]] && exit 0 || exit -1\""))
return builder
}

//get exec result
commandGetRes := "result=$?;"
command += commandGetRes
func buildParamCommand(outputRemotePath, computeResource string) *entity.CommandBuilder {
builder := &entity.CommandBuilder{}
builder.Next(entity.NewCommand("echo", "'start to exec code'"))

//check exec result
commandCheckRes := "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1\""
command += commandCheckRes
if computeResource == models.NPU {
builder.Next(entity.NewCommand("export", "bucket="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath))
} else {
builder.Next(entity.NewCommand("export", "env="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath))
}

return command
return builder
}

var BucketRemote = "grampus"
@@ -443,18 +537,19 @@ func getNpuModelObjectKey(jobName string) string {

func convertTrainTask2Grampus(t entity.TrainTask, command string) models.GrampusTasks {
return models.GrampusTasks{
Name: t.Name,
ResourceSpecId: t.ResourceSpecId,
ImageId: t.ImageId,
ImageUrl: t.ImageUrl,
Datasets: convertContainerArray2Grampus(t.Datasets),
Code: convertContainer2Grampus(t.Code),
Command: command,
CenterID: t.CenterID,
ReplicaNum: 1,
Models: convertContainerArray2Grampus(t.Models),
BootFile: t.BootFile,
OutPut: convertContainer2Grampus(t.OutPut),
Name: t.Name,
ResourceSpecId: t.ResourceSpecId,
ImageId: t.ImageId,
ImageUrl: t.ImageUrl,
Datasets: convertContainerArray2GrampusArray(t.Datasets),
Code: convertContainerArray2Grampus(t.Code),
Command: command,
CenterID: t.CenterID,
ReplicaNum: 1,
Models: convertContainerArray2GrampusArray(t.PreTrainModel),
BootFile: t.BootFile,
OutPut: convertContainerArray2Grampus(t.OutPut),
WorkServerNumber: t.WorkServerNumber,
}
}

@@ -474,19 +569,270 @@ func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.Creat
}
}

func (c C2NetClusterAdapter) DeleteTrainJob(string) error {
func (c C2NetClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error {
return nil
}
func (c C2NetClusterAdapter) StopTrainJob(string) error {
func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error {
_, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug))
if err != nil {
log.Error("StopNoteBook(%s) failed:%v", opts, err)
return err
}
return nil
}
func (c C2NetClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) {
return nil, nil
func (c C2NetClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
if opts.JobID == "" {
return nil, errors.New("jobID is empty")
}
result, err := grampus.GetJob(opts.JobID)
if err != nil {
return nil, err
}
if result == nil {
return nil, nil
}
return convertGrampusTrainJobResponse(result.JobInfo), nil
}
func (c C2NetClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) {
return nil, nil

func (c C2NetClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) {
res, err := grampus.GetJobListByJobName(jobName)
if err != nil {
return nil, err
}
result := make([]*entity.QueryTaskResponse, 0)
if res != nil {
for i := 0; i < len(res.JobInfos); i++ {
if res.JobInfos[i].Name == jobName {
result = append(result, entity.ConvertGrampusTrainResponse(res.JobInfos[i]))
}

}
}
return result, nil
}

func (c C2NetClusterAdapter) GetTrainLog(jobId string) (*entity.ClusterLog, error) {
return nil, nil
func convertGrampusTrainJobResponse(job models.GrampusJobInfo) *entity.QueryTaskResponse {
if len(job.Tasks) == 0 {
return nil
}
task := job.Tasks[0]
centerId := ""
if len(task.CenterID) > 0 {
centerId = task.CenterID[0]
}
centerName := ""
if len(task.CenterName) > 0 {
centerName = task.CenterName[0]
}
return &entity.QueryTaskResponse{
StartedAt: timeutil.TimeStamp(job.StartedAt),
CompletedAt: timeutil.TimeStamp(job.CompletedAt),
Status: job.Status,
CenterId: centerId,
CenterName: centerName,
JobId: job.JobID,
}
}

func (c C2NetClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) {
exitDiagnostics := getGrampusExitDiagnostics(opts.JobId)
var content string
var err error
if opts.WorkServerNum > 1 {
if opts.WorkServerNum < 1 || opts.NodeId > opts.WorkServerNum-1 {
return nil, errors.New("query parameter is wrong")
}
content, err = grampus.GetTrainJobLog(opts.JobId, opts.NodeId)
} else {
content, err = grampus.GetTrainJobLog(opts.JobId)
}
if err != nil {
log.Error("GetLog err.opts=%+v,err=%v", opts, err)
content = ""
}
return &entity.ClusterLog{
Content: content + "\n" + exitDiagnostics,
}, nil
}

func getGrampusExitDiagnostics(jobId string) string {
result, err := grampus.GetJob(jobId)
if err != nil {
log.Error("GetJob(%s) failed:%v", jobId, err)
return ""
}
if result != nil {
return result.ExitDiagnostics
}
return ""
}

func (c C2NetClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
res, err := c.GetLog(entity.ClusterLogOpts{
JobId: opts.JobId,
NodeId: opts.NodeId,
WorkServerNum: opts.WorkServerNum,
})
if err != nil {
log.Error("error occurs when attempting to get log content.opts=%+v err=%v", opts, err)
return nil, err
}
fileName := opts.JobName + "-log.txt"
if opts.WorkServerNum > 1 {
fileName = opts.JobName + "-" + fmt.Sprint(opts.NodeId) + "-log.txt"
}
return &entity.FileDownloadInfo{
Readers: []entity.FileReader{{Reader: ioutil.NopCloser(strings.NewReader(res.Content))}},
ResultType: entity.FileTypeTXT,
ResultFileName: fileName,
}, nil
}

func (c C2NetClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
url, err := helper.GetSignedDownloadUrl(opts.Path)
if err != nil {
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err)
return nil, err
}
return &entity.FileDownloadInfo{
DownloadUrl: url,
}, nil
}

func (c C2NetClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
return GetAllOutputDownloadInfo(opts)
}

func (c C2NetClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) {
workServerNum := opts.WorkServerNum
if opts.WorkServerNum < 1 {
workServerNum = 1
}

res := make([]entity.AITaskNodeInfo, workServerNum)
for i := 0; i < workServerNum; i++ {
res[i] = entity.AITaskNodeInfo{
ID: i,
}
}
return res, nil
}

func (c C2NetClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) {
var err error

startTime := opts.StartTime
endTime := opts.EndTime
nodeId := opts.NodeId
jobId := opts.JobId

if opts.ComputeSource == models.NPU {
startTime = 0
endTime = 0
} else {
if startTime == 0 {
startTime = time.Now().Unix() - 30*60
}
if endTime == 0 {
endTime = time.Now().Unix()
}
}
var result models.NewModelArtsMetricStatisticResult
if opts.WorkServerNumber <= 1 {
result, err = grampus.GetGrampusMetrics(jobId, startTime, endTime)
} else {
if nodeId > opts.WorkServerNumber-1 {
return nil, response.PARAM_ERROR.ToError()
}
result, err = grampus.GetGrampusMetrics(opts.JobId, opts.StartTime, opts.EndTime, nodeId)
}

if err != nil {
log.Error("GetGrampusMetrics error. opts=%+v err= %v", opts, err)
return nil, err
}
return transferGrampusMetrics2Standard(result), nil
}

func transferGrampusMetrics2Standard(result models.NewModelArtsMetricStatisticResult) *entity.ResourceUsage {
m := make([]entity.MetricsInfo, 0)
for i := 0; i < len(result.MetricsInfo); i++ {
m = append(m, entity.MetricsInfo{
Name: result.MetricsInfo[i].Metric,
Value: result.MetricsInfo[i].Value,
})
}
return &entity.ResourceUsage{
Interval: 0,
MetricsInfo: m,
}
}

func (c C2NetClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) {
if jobId == "" {
log.Error("jobid is empty")
return nil, errors.New("jobid is empty")
}
jobResult, err := grampus.GetTrainJobEvents(jobId)
if err != nil {
log.Error("GetTrainJobEvents failed:%v", err)
return nil, err
}

r := parseC2NetEventsToOperationProfile(jobResult.JobEvents)
getJobResult, err := grampus.GetJob(jobId)
if err == nil && getJobResult != nil && getJobResult.ExitDiagnostics != "" {
r.Events = append(r.Events, entity.ProfileEvent{
Message: getJobResult.ExitDiagnostics,
Reason: "Exit",
})
}
return r, nil
}

func (c C2NetClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) {
status, err := schedule.GetModelScheduleStatus(opts.JobId)
if err != nil {
log.Error("GetModelScheduleStatus(%s) failed:%v", opts.JobId, err)
return nil, err
}
if status != models.ModelMigrateSuccess {
return &entity.ClusterAITaskOutput{
Status: status,
Path: opts.ParentDir,
FileList: []storage.FileInfo{},
}, nil
}

helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.ClusterAITaskOutput{
Status: status,
Path: opts.ParentDir,
FileList: fileList,
}, nil
}

func (c C2NetClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) {
status, err := schedule.GetModelScheduleStatus(opts.JobId)
if err != nil {
log.Error("GetModelScheduleStatus(%s) failed:%v", opts.JobId, err)
return nil, err
}
if status != models.ModelMigrateSuccess {
return &entity.AllAITaskOutput{FileList: []storage.FileInfo{}}, nil
}

helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.AllAITaskOutput{FileList: fileList}, nil
}

+ 319
- 23
services/ai_task_service/cluster/cloudbrain_one.go View File

@@ -2,14 +2,18 @@ package cluster

import "C"
import (
"encoding/json"
"errors"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"path"
"strings"
)

type CloudbrainOneClusterAdapter struct {
@@ -33,10 +37,14 @@ func (c CloudbrainOneClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook
return nil, nil
}

func (c CloudbrainOneClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c CloudbrainOneClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
return nil, true, nil
}

func (c CloudbrainOneClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
return c.GetNotebookImages(req)
}

var SubTaskName = "task1"

func convertNoteBookReq2CloudbrainOne(req entity.CreateNoteBookTaskRequest) models.CreateJobParams {
@@ -100,25 +108,25 @@ func (c CloudbrainOneClusterAdapter) RestartNoteBook(string) (*entity.RestartNot

return nil, nil
}
func (c CloudbrainOneClusterAdapter) DeleteNoteBook(string) error {
func (c CloudbrainOneClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error {
return nil
}

func (c CloudbrainOneClusterAdapter) StopNoteBook(jobId string) error {
err := cloudbrain.StopJob(jobId)
func (c CloudbrainOneClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error {
err := cloudbrain.StopJob(opts.JobID)
if err != nil {
log.Error("StopNoteBook(%s) failed:%v", jobId, err)
log.Error("StopNoteBook(%s) failed:%v", opts, err)
return err
}
return nil
}

func (c CloudbrainOneClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) {
if jobId == "" {
func (c CloudbrainOneClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
if opts.JobID == "" {
log.Error("jobid is empty")
return nil, errors.New("jobid is empty")
}
jobResult, err := cloudbrain.GetJob(jobId)
jobResult, err := cloudbrain.GetJob(opts.JobID)
if err != nil {
log.Error("QueryNoteBook failed:%v", err)
return nil, err
@@ -178,7 +186,7 @@ func (c CloudbrainOneClusterAdapter) GetNoteBookOperationProfile(jobId string) (
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
ExitDiagnostics := taskRes.TaskStatuses[0].ExitDiagnostics

return parseDiagnosticsToOperationProfile(result.JobStatus.AppExitDiagnostics,ExitDiagnostics), nil
return parseDiagnosticsToOperationProfile(result.JobStatus.AppExitDiagnostics, ExitDiagnostics), nil
}

func parseDiagnosticsToOperationProfile(appExitDiagnostics string, exitDiagnostics string) *entity.OperationProfile {
@@ -216,30 +224,318 @@ func parseDiagnosticsToOperationProfile(appExitDiagnostics string, exitDiagnosti
Action: e.Action,
})
}
if exitDiagnostics != ""{
if exitDiagnostics != "" {
events = append(events, entity.ProfileEvent{
Message: exitDiagnostics,
Reason: "Error",
Reason: "Error",
})
}
return &entity.OperationProfile{Events: events}
}

func (c CloudbrainOneClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) {
return nil, nil
func (c CloudbrainOneClusterAdapter) CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) {
jobResult, err := cloudbrain.CreateJob(req.Name, convertTrainJobReq2CloudbrainOne(req))
if err != nil {
log.Error("CreateNoteBook failed: %v", err.Error())
return nil, err
}
return convertCloudbrainOne2TrainJobRes(jobResult), nil
}
func (c CloudbrainOneClusterAdapter) DeleteTrainJob(string) error {

func convertTrainJobReq2CloudbrainOne(req entity.CreateTrainTaskRequest) models.CreateJobParams {
var command = getTrainJobCommand(req)
t := req.Tasks[0]

return models.CreateJobParams{
JobName: t.Name,
RetryCount: 1,
GpuType: t.Spec.QueueCode,
Image: t.ImageUrl,
TaskRoles: []models.TaskRole{
{
Name: SubTaskName,
TaskNumber: 1,
MinSucceededTaskCount: 1,
MinFailedTaskCount: 1,
CPUNumber: t.Spec.CpuCores,
GPUNumber: t.Spec.AccCardsNum,
MemoryMB: int(t.Spec.MemGiB * 1024),
ShmMB: int(t.Spec.ShareMemGiB * 1024),
Command: command,
NeedIBDevice: false,
IsMainRole: false,
UseNNI: false,
},
},
Volumes: convertContainerDataArray2Volume(t.Code, t.Datasets, t.PreTrainModel, t.OutPut),
}
}

func getTrainJobCommand(req entity.CreateTrainTaskRequest) string {
form := req.Tasks[0]
var command string
bootFile := strings.TrimSpace(form.BootFile)
params := form.Params

var param string
if params.Parameter != nil && len(params.Parameter) != 0 {
for _, parameter := range params.Parameter {
param += " --'" + parameter.Label + "'='" + parameter.Value + "'"
}
}

//启智GPU训练暂未支持多模型,此处先视为只会有一个模型文件
if form.PreTrainModel != nil && len(form.PreTrainModel) > 0 {
param += " --ckpt_url" + "=" + "'/pretrainmodel/" + form.PreTrainModel[0].Name + "'"
}

logPath := cloudbrain.ModelMountPath
if form.LogPath != nil && len(form.LogPath) > 0 {
logPath = form.LogPath[0].ContainerPath
}
command += "python -u /code/" + bootFile + param + " > " + logPath + "/" + req.DisplayJobName + "-" + cloudbrain.LogFile

return command
}

func convertCloudbrainOne2TrainJobRes(res *models.CreateJobResult) *entity.CreateTrainTaskResponse {
playload := res.Payload
return &entity.CreateTrainTaskResponse{
JobID: playload["jobId"].(string),
Status: string(models.JobWaiting),
}
}

func (c CloudbrainOneClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error {
return nil
}
func (c CloudbrainOneClusterAdapter) StopTrainJob(string) error {
func (c CloudbrainOneClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error {
err := cloudbrain.StopJob(opts.JobID)
if err != nil {
log.Error("StopNoteBook(%s) failed:%v", opts, err)
return err
}
return nil
}
func (c CloudbrainOneClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) {
return nil, nil
func (c CloudbrainOneClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
return c.QueryNoteBook(opts)
}
func (c CloudbrainOneClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) {
return nil, nil

func (c CloudbrainOneClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) {
jobResult, err := cloudbrain.GetJobListByName(jobName)
if err != nil {
log.Error("GetJobListByName failed:%v", err)
return nil, err
}
result, err := models.ConvertToJobListResultPayload(jobResult.Payload)
if err != nil {
log.Error("ConvertToJobListResultPayload failed:%v", err)
return nil, err
}
r := make([]*entity.QueryTaskResponse, 0)
for i := 0; i < len(result.Jobs); i++ {
if result.Jobs[i].Name == jobName {
r = append(r, entity.ConvertCloudbrainOneQueryNotebookByNameResponse(result.Jobs[i]))
}
}
return r, nil
}

func (c CloudbrainOneClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) {
if opts.Lines <= 0 || opts.ObjectKeyPrefix == "" {
return nil, nil
}
//获取任务退出信息
existStr := getCloudbrainOneExitDiagnostics(opts.JobId)

helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)

//查找日志文件
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, "log.txt")
if len(files) == 0 {
//此时未找符合条件的文件
startLine, endLine, lines := handleOverLines(opts)
return &entity.ClusterLog{
Content: existStr,
StartLine: fmt.Sprint(startLine),
EndLine: fmt.Sprint(endLine),
Lines: lines,
}, nil
}

//默认选择第一个文件
file := files[0]

//计算开始行和结束行
startLine, endLine := findStartAndEnd(opts, file.RelativePath, helper)

//获取日志内容
result, realEndLine, contentLines := getLogInStorage(startLine, endLine, helper, file.RelativePath)

//处理到达顶部或者底部时的情况
if contentLines == 0 {
startLine, realEndLine, contentLines = handleOverLines(opts)
}

return &entity.ClusterLog{
Content: result,
StartLine: fmt.Sprint(startLine),
EndLine: fmt.Sprint(realEndLine),
Lines: contentLines,
}, nil
}

func (c CloudbrainOneClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
//获取任务退出信息
existStr := getCloudbrainOneExitDiagnostics(opts.JobId)

helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)

//查找日志文件
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, "log.txt")
if len(files) == 0 {
//此时未找符合条件的文件
if existStr != "" {
return &entity.FileDownloadInfo{
ResultType: entity.FileTypeTXT,
ResultFileName: "exit.log.txt",
Readers: []entity.FileReader{{
Reader: ioutil.NopCloser(strings.NewReader(existStr)),
}},
}, nil
}

return nil, nil
}

//默认选择第一个文件
file := files[0]

//获取日志reader
reader, err := helper.OpenFile(file.RelativePath)
if err != nil {
log.Error("GetLogDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err)
return nil, err
}

return &entity.FileDownloadInfo{
ResultType: entity.FileTypeTXT,
ResultFileName: file.FileName,
Readers: []entity.FileReader{{
Reader: reader,
}},
}, nil
}

func (c CloudbrainOneClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
url, err := helper.GetSignedDownloadUrl(opts.Path)
if err != nil {
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err)
return nil, err
}
return &entity.FileDownloadInfo{
DownloadUrl: url,
}, nil
}
func (c CloudbrainOneClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) {

func (c CloudbrainOneClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
return GetAllOutputDownloadInfo(opts)
}

func (c CloudbrainOneClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) {
return nil, nil
}

func (c CloudbrainOneClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) {
return &entity.ResourceUsage{
Interval: 0,
MetricsInfo: []entity.MetricsInfo{},
}, nil
}

func getLogInStorage(startLine, endLine int64, helper storage_helper.StorageHelper, path string) (content string, realEndLine int64, total int64) {
log.Info("getLogInStorage path=%s", path)
reader, err := helper.OpenFile(path)
if err != nil {
log.Info("elper.OpenFile error,path=%s err=%v", path, err)
return "", 0, 0
}
defer reader.Close()

return GetLocalLog(reader, startLine, endLine)
}

func handleOverLines(opts entity.ClusterLogOpts) (int64, int64, int64) {
var startLine, endLine int64
if opts.Direction == entity.DOWN {
endLine = opts.BaseLine
startLine = endLine + 1 - opts.Lines
if startLine < 1 {
startLine = 1
}
} else {
startLine = 1
endLine = startLine + opts.Lines - 1
}
return startLine, endLine, 0
}

func getCloudbrainOneExitDiagnostics(jobId string) string {
jobResult, _ := cloudbrain.GetJob(jobId)
if jobResult != nil {
jobRes, _ := models.ConvertToJobResultPayload(jobResult.Payload)
taskRoles := jobRes.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
return taskRes.TaskStatuses[0].ExitDiagnostics
}
return ""
}

//findStartAndEnd 基于baseLine,根据方向向上或者向下计算
func findStartAndEnd(opts entity.ClusterLogOpts, filePath string, helper storage_helper.StorageHelper) (startLine int64, endLine int64) {
baseLine := opts.BaseLine
if opts.Direction == entity.UP {
if baseLine == 0 {
endLine = getAllLineFromFile(helper, filePath)
} else {
endLine = baseLine - 1
}
startLine = endLine - opts.Lines + 1
if startLine <= 0 {
startLine = 1
}
} else {
startLine = baseLine + 1
endLine = startLine + opts.Lines - 1
}
return startLine, endLine
}

func (c CloudbrainOneClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) {
return c.GetNoteBookOperationProfile(jobId)
}

func (c CloudbrainOneClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.ClusterAITaskOutput{
Status: models.ModelMigrateSuccess,
Path: opts.ParentDir,
FileList: fileList,
}, nil
}

func (c CloudbrainOneClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.AllAITaskOutput{FileList: fileList}, nil
}

+ 592
- 29
services/ai_task_service/cluster/cloudbrain_two.go View File

@@ -2,16 +2,22 @@ package cluster

import "C"
import (
"encoding/json"
"fmt"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/manager/client/cloudbrain_two"
"code.gitea.io/gitea/manager/client/cloudbrain_two_cd"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"encoding/json"
"errors"
"fmt"
"io"
"path"
"strconv"
"strings"
)

type CloudbrainTwoClusterAdapter struct {
@@ -73,21 +79,59 @@ func (c CloudbrainTwoClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook
return nil, nil
}

var cloudbrainTwoImages []entity.ClusterImage
var cloudbrainTwoNotebookImages []entity.ClusterImage

func (c CloudbrainTwoClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
if cloudbrainTwoImages == nil || len(cloudbrainTwoImages) == 0 {
func (c CloudbrainTwoClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
if cloudbrainTwoNotebookImages == nil || len(cloudbrainTwoNotebookImages) == 0 {
images := setting.StImageInfos.ImageInfo
cloudbrainTwoImages = make([]entity.ClusterImage, len(images))
cloudbrainTwoNotebookImages = make([]entity.ClusterImage, len(images))
for i := 0; i < len(images); i++ {
cloudbrainTwoImages[i] = entity.ClusterImage{
cloudbrainTwoNotebookImages[i] = entity.ClusterImage{
ImageId: images[i].Id,
ImageName: images[i].Value,
}
}
}

return cloudbrainTwoImages, false, nil
return cloudbrainTwoNotebookImages, false, nil
}

var cloudbrainTwoTrainImages []entity.ClusterImage

func (c CloudbrainTwoClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
if cloudbrainTwoTrainImages == nil || len(cloudbrainTwoTrainImages) == 0 {
var versionInfos modelarts.VersionInfo
if err := json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
log.Error("Unmarshal setting.EngineVersions err. req=%+v err=%v", req, err)
return cloudbrainTwoTrainImages, false, nil
}
cloudbrainTwoTrainImages = make([]entity.ClusterImage, len(versionInfos.Version))
for i := 0; i < len(versionInfos.Version); i++ {
cloudbrainTwoTrainImages[i] = entity.ClusterImage{
ImageId: fmt.Sprint(versionInfos.Version[i].ID),
ImageName: versionInfos.Version[i].Value,
ImageUrl: versionInfos.Version[i].Url,
}
}
}

return cloudbrainTwoTrainImages, false, nil
}

func (c CloudbrainTwoClusterAdapter) GetTrainImageByImageId(imageId string) (entity.ClusterImage, error) {
if imageId == "" {
return entity.ClusterImage{}, errors.New("imageId is empty")
}
images, _, _ := c.GetTrainImages(entity.GetImageReq{})
if images == nil {
return entity.ClusterImage{}, errors.New("image not setting correctly")
}
for _, image := range images {
if image.ImageId == imageId {
return image, nil
}
}
return entity.ClusterImage{}, errors.New("image not exists")
}

var poolInfos *models.PoolInfos
@@ -128,8 +172,8 @@ func convertCloudbrainTwo2NoteBookRestartRes(jobId string, res *models.NotebookA
}
}

func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(jobId string) error {
task, err := models.GetNewestCloudbrainByJobId(jobId)
func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(opts entity.JobIdAndVersionId) error {
task, err := models.GetNewestCloudbrainByJobId(opts.JobID)
if err != nil {
return err
}
@@ -140,14 +184,14 @@ func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(jobId string) error {
_, err = cloudbrain_two_cd.DelNotebook(task.JobID)
}
if err != nil {
log.Error("DeleteNoteBook err.jobID=%s err=%v", jobId, err)
log.Error("DeleteNoteBook err.jobID=%s err=%v", opts, err)
return err
}
return nil
}

func (c CloudbrainTwoClusterAdapter) StopNoteBook(jobId string) error {
task, err := models.GetNewestCloudbrainByJobId(jobId)
func (c CloudbrainTwoClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error {
task, err := models.GetNewestCloudbrainByJobId(opts.JobID)
if err != nil {
return err
}
@@ -160,14 +204,14 @@ func (c CloudbrainTwoClusterAdapter) StopNoteBook(jobId string) error {
_, err = cloudbrain_two_cd.ManageNotebook(task.JobID, param)
}
if err != nil {
log.Error("StopNoteBook err.jobID=%s err=%v", jobId, err)
log.Error("StopNoteBook err.jobID=%s err=%v", opts, err)
return err
}
return nil
}

func (c CloudbrainTwoClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) {
task, err := models.GetNewestCloudbrainByJobId(jobId)
func (c CloudbrainTwoClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
task, err := models.GetNewestCloudbrainByJobId(opts.JobID)
if err != nil {
return nil, err
}
@@ -232,7 +276,9 @@ func (c CloudbrainTwoClusterAdapter) GetNoteBookLog(jobId string) (*entity.Clust
}

func (c CloudbrainTwoClusterAdapter) GetNoteBookUrl(jobId string) (string, error) {
res, err := c.QueryNoteBook(jobId)
res, err := c.QueryNoteBook(entity.JobIdAndVersionId{
JobID: jobId,
})
if err != nil {
return "", err
}
@@ -277,21 +323,538 @@ func parseCloudbrainTwoEventsToOperationProfile(result *models.GetNotebook2Resul
return &entity.OperationProfile{Events: events}
}

func (c CloudbrainTwoClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) {
return nil, nil
func (c CloudbrainTwoClusterAdapter) CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) {
t := req.Tasks[0]
var jobResult *models.CreateTrainJobResult
var createErr error
//imageId 为0或者负数时代表自定义镜像
imageId := int64(-1)
i, err := strconv.ParseInt(t.ImageId, 10, 32)
if err == nil {
imageId = i
}
if imageId <= 0 {
image, err := c.GetTrainImageByImageId(t.ImageId)
if err != nil {
log.Error("GetTrainImageByImageId error.req=%+v err=%v", req, err)
return nil, err
}
jobResult, createErr = modelarts.CreateTrainJobUserImage(convertCloudbrainTwoTrainJobUserImageReq(req, image))
} else {
param, err := convertCloudbrainTwoTrainJobReq(req)
if err != nil {
return nil, err
}
jobResult, createErr = modelarts.CreateTrainJob(param)
}
if createErr != nil {
log.Error("CloudbrainTwo createTrainJob failed: %v", createErr.Error())
if strings.HasPrefix(createErr.Error(), modelarts.UnknownErrorPrefix) {
return nil, models.NetworkError{}
}
return nil, createErr
}
return convertCloudbrainTwoRes2Standard(jobResult), nil
}
func (c CloudbrainTwoClusterAdapter) DeleteTrainJob(string) error {
return nil

func convertCloudbrainTwoRes2Standard(res *models.CreateTrainJobResult) *entity.CreateTrainTaskResponse {
var jobId string
if res.JobID > 0 {
jobId = fmt.Sprint(res.JobID)
}
return &entity.CreateTrainTaskResponse{
CreatedAt: res.CreateTime,
JobID: jobId,
Name: res.JobName,
Status: modelarts.TransTrainJobStatus(res.Status),
VersionID: res.VersionID,
VersionName: res.VersionName,
}
}
func (c CloudbrainTwoClusterAdapter) StopTrainJob(string) error {

func convertCloudbrainTwoTrainJobReq(req entity.CreateTrainTaskRequest) (models.CreateTrainJobParams, error) {
t := req.Tasks[0]
imageId, err := strconv.ParseInt(t.ImageId, 10, 64)
if err != nil {
log.Error("Parse imageId err.imageIdStr=%s err=%v", t.ImageId, err)
return models.CreateTrainJobParams{}, err
}
return models.CreateTrainJobParams{
JobName: req.Name,
Description: req.Description,
Config: models.Config{
WorkServerNum: t.WorkServerNumber,
AppUrl: JointCloudbrainTwoReqUrl(t.Code),
BootFileUrl: path.Join(JointCloudbrainTwoReqUrl(t.Code), t.BootFile),
DataUrl: JointCloudbrainTwoReqUrl(t.Datasets),
TrainUrl: JointCloudbrainTwoReqUrl(t.OutPut),
LogUrl: JointCloudbrainTwoReqUrl(t.LogPath),
PoolID: t.PoolId,
CreateVersion: true,
Flavor: models.Flavor{
Code: t.Spec.SourceSpecId,
},
EngineID: imageId,
Parameter: handleCloudbrainTwoParameter(req).Parameter,
ShareAddr: setting.ModelArtsShareAddr,
MountPath: setting.ModelArtsMountPath,
NasType: setting.ModelArtsNasType,
},
}, nil

}

func JointCloudbrainTwoReqUrl(data []entity.ContainerData) string {
if len(data) > 0 {
d := data[0]
s := path.Join("/", d.Bucket, d.ObjectKey)
if d.IsDir {
s = strings.TrimSuffix(path.Join("/", d.Bucket, d.ObjectKey, "/"), "/") + "/"
}
return s
}
return ""
}

func convertCloudbrainTwoTrainJobUserImageReq(req entity.CreateTrainTaskRequest, image entity.ClusterImage) models.CreateUserImageTrainJobParams {
t := req.Tasks[0]
appUrl := JointCloudbrainTwoReqUrl(t.Code)
bootFileUrl := path.Join(JointCloudbrainTwoReqUrl(t.Code), t.BootFile)
dataUrl := JointCloudbrainTwoReqUrl(t.Datasets)
trainUrl := JointCloudbrainTwoReqUrl(t.OutPut)
logUrl := JointCloudbrainTwoReqUrl(t.LogPath)
params := handleCloudbrainTwoParameter(req)
return models.CreateUserImageTrainJobParams{
JobName: req.Name,
Description: req.Description,
Config: models.UserImageConfig{
WorkServerNum: t.WorkServerNumber,
AppUrl: appUrl,
BootFileUrl: bootFileUrl,
DataUrl: dataUrl,
TrainUrl: trainUrl,
LogUrl: logUrl,
PoolID: t.PoolId,
CreateVersion: true,
Flavor: models.Flavor{
Code: t.Spec.SourceSpecId,
},
UserImageUrl: image.ImageUrl,
UserCommand: getCloudbrainTwoUserCommand(appUrl, t.BootFile, dataUrl, trainUrl, params),
ShareAddr: setting.ModelArtsShareAddr,
MountPath: setting.ModelArtsMountPath,
NasType: setting.ModelArtsNasType,
},
}

}

func getCloudbrainTwoDataUrl(data []entity.ContainerData) string {
if len(data) == 0 {
return ""
}
return data[0].ObjectKey
}

func handleCloudbrainTwoParameter(req entity.CreateTrainTaskRequest) models.Parameters {
t := req.Tasks[0]

var param = models.Parameters{}

datasetUrl := getCloudbrainTwoMultiDataUrl(t.Datasets)
if datasetUrl != "" {
param.Parameter = append(param.Parameter, models.Parameter{
Label: modelarts.MultiDataUrl,
Value: datasetUrl,
})
}
multiModelUrl := getCloudbrainTwoModelUrl(t.PreTrainModel)
if multiModelUrl != "" {
param.Parameter = append(param.Parameter, models.Parameter{
Label: modelarts.PretrainUrl,
Value: multiModelUrl,
}, models.Parameter{
Label: modelarts.CkptUrl,
Value: t.Datasets[0].S3DownloadUrl,
})
}

existDeviceTarget := false
for _, parameter := range t.Params.Parameter {
if parameter.Label == modelarts.DeviceTarget {
existDeviceTarget = true
}
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
param.Parameter = append(param.Parameter, models.Parameter{
Label: parameter.Label,
Value: parameter.Value,
})
}
}
if !existDeviceTarget {
param.Parameter = append(param.Parameter, models.Parameter{
Label: modelarts.DeviceTarget,
Value: modelarts.Ascend,
})
}
return param
}

func getCloudbrainTwoUserCommand(appUrl, bootFile, dataUrl, trainUrl string, params models.Parameters) string {
userCommand := ""

tmpCodeObsPaths := strings.Split(strings.Trim(appUrl, "/"), "/")
lastCodeDir := "code"
if len(tmpCodeObsPaths) > 0 {
lastCodeDir = tmpCodeObsPaths[len(tmpCodeObsPaths)-1]
}
userCommand = "/bin/bash /home/work/run_train.sh 's3://" + appUrl + "' '" + lastCodeDir + "/" + bootFile + "' '/tmp/log/train.log' --'data_url'='s3://" + dataUrl + "' --'train_url'='s3://" + trainUrl + "'"
for _, param := range params.Parameter {
userCommand += " --'" + param.Label + "'='" + param.Value + "'"
}
return userCommand
}

func getCloudbrainTwoMultiDataUrl(datasets []entity.ContainerData) string {
if len(datasets) == 0 {
return ""
}
var datasUrlList []models.Datasurl
for _, d := range datasets {
datasUrlList = append(datasUrlList, models.Datasurl{
DatasetUrl: d.S3DownloadUrl,
DatasetName: d.Name,
})
}
jsondata, _ := json.Marshal(datasUrlList)
return string(jsondata)
}

func getCloudbrainTwoModelUrl(datasets []entity.ContainerData) string {
if len(datasets) == 0 {
return ""
}
var modelUrlList []models.ModelUrls
for _, d := range datasets {
modelUrlList = append(modelUrlList, models.ModelUrls{
ModelUrl: d.S3DownloadUrl,
ModelName: d.Name,
})
}
jsondata, _ := json.Marshal(modelUrlList)
return string(jsondata)
}

func (c CloudbrainTwoClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error {
_, err := modelarts.DelTrainJobVersion(opts.JobID, strconv.FormatInt(opts.VersionID, 10))
return err
}

func (c CloudbrainTwoClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error {
_, err := modelarts.StopTrainJob(opts.JobID, strconv.FormatInt(opts.VersionID, 10))
if err != nil {
log.Error("StopTrainJob(%s) failed:%v", opts, err)
return err
}
return nil
}
func (c CloudbrainTwoClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) {
return nil, nil

func (c CloudbrainTwoClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) {
res, err := modelarts.GetTrainJobList(20, 1, "create_time", "desc", jobName)
if err != nil {
log.Error("GetTrainJobList failed:%v", err)
return nil, err
}
result := make([]*entity.QueryTaskResponse, 0)
if res != nil {
for i := 0; i < len(res.JobList); i++ {
if res.JobList[i].JobName == jobName {
result = append(result, convertJobList2QueryRes(res.JobList[i]))
}

}
}
return result, nil
}
func (c CloudbrainTwoClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) {
return nil, nil

func convertJobList2QueryRes(res models.JobList) *entity.QueryTaskResponse {
return &entity.QueryTaskResponse{
JobId: strconv.FormatInt(res.JobID, 10),
Status: transCloudbrainTwoTrainJobStatus(res.IntStatus),
VersionId: res.VersionID,
}
}
func (c CloudbrainTwoClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) {
return nil, nil

func (c CloudbrainTwoClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) {
result, err := modelarts.GetTrainJob(opts.JobID, strconv.FormatInt(opts.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", opts, err)
return nil, err
}
return convertCloudbrainTwoTrainJob2QueryRes(result), nil
}

func convertCloudbrainTwoTrainJob2QueryRes(res *models.GetTrainJobResult) *entity.QueryTaskResponse {
status := transCloudbrainTwoTrainJobStatus(res.IntStatus)
startedAt := timeutil.TimeStamp(0)
if res.StartTime > 0 {
startedAt = timeutil.TimeStamp(res.StartTime / 1000)
}

duration := res.Duration / 1000
completedAt := timeutil.TimeStamp(0)
if startedAt > 0 && models.IsCloudbrainTerminalStatus(status) {
completedAt = startedAt.Add(duration)
}
return &entity.QueryTaskResponse{
StartedAt: startedAt,
CompletedAt: completedAt,
JobId: fmt.Sprint(res.JobID),
Status: status,
VersionId: res.VersionID,
}
}

func transCloudbrainTwoTrainJobStatus(status int) string {
switch status {
case 0:
return "UNKNOWN"
case 1:
return "INIT"
case 2:
return "IMAGE_CREATING"
case 3:
return "IMAGE_FAILED"
case 4:
return "SUBMIT_TRYING"
case 5:
return "SUBMIT_FAILED"
case 6:
return "DELETE_FAILED"
case 7:
return "WAITING"
case 8:
return "RUNNING"
case 9:
return "KILLING"
case 10:
return "COMPLETED"
case 11:
return "FAILED"
case 12:
return "KILLED"
case 13:
return "CANCELED"
case 14:
return "LOST"
case 15:
return "SCALING"
case 16:
return "SUBMIT_MODEL_FAILED"
case 17:
return "DEPLOY_SERVICE_FAILED"
case 18:
return "CHECK_INIT"
case 19:
return "CHECK_RUNNING"
case 20:
return "CHECK_RUNNING_COMPLETED"
case 21:
return "CHECK_FAILED"

default:
return strconv.Itoa(status)
}
}

func transferCloudbrain2LogOrder(direction entity.Direction) string {
if direction == entity.UP {
return "asc"
} else if direction == entity.DOWN {
return "desc"
}
return ""
}

func (c CloudbrainTwoClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) {
baseLine := fmt.Sprint(opts.BaseLine)
order := transferCloudbrain2LogOrder(opts.Direction)
if opts.IsHeadRequest() {
baseLine = ""
order = "asc"
} else if opts.IsBottomRequest() {
baseLine = ""
order = "desc"
}
result, err := getModelartsTrainJob(opts.JobId, opts.VersionID, baseLine, order, int(opts.Lines), opts.LogFileName)
if err != nil {
log.Error("getModelartsTrainJob(%s) failed:%v", opts.JobId, err)
return nil, err
}
lines := int64(result.Lines)
return &entity.ClusterLog{
Content: result.Content,
StartLine: result.StartLine,
EndLine: result.EndLine,
Lines: lines,
}, nil
}

func getModelartsTrainJob(jobID string, versionID int64, baseLine string, order string, lines int, logFileName string) (*models.GetTrainJobLogResult, error) {
result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(versionID, 10), baseLine, logFileName, order, lines)
if err != nil {
log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
return nil, err
}

return result, err
}

func (c CloudbrainTwoClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
var err error
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)

//查找日志文件
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, ".log")
if len(files) == 0 {
//此时未找符合条件的文件
return nil, nil
}

//只有一个日志文件时直接使用obs的下载链接
if len(files) == 1 {
var url string
url, err = helper.GetSignedDownloadUrl(files[0].RelativePath)
if err != nil {
log.Error("GetObsCreateSignedUrlByBucketAndKey failed when GetLogDownloadInfo opts=%+v: err=%v", opts, err)
return nil, err
}
return &entity.FileDownloadInfo{
DownloadUrl: url,
}, nil
}

readerList := make([]entity.FileReader, 0)
defer func() {
if err != nil {
for _, r := range readerList {
if r.Reader != nil {
r.Reader.Close()
}
}
}
}()
//多个文件时需要打包后下载
for _, file := range files {
//获取日志reader
var reader io.ReadCloser
reader, err = helper.OpenFile(file.RelativePath)
if err != nil {
log.Error("GetLogDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err)
return nil, err
}
readerList = append(readerList, entity.FileReader{
Reader: reader,
Name: file.FileName,
})
}
return &entity.FileDownloadInfo{
Readers: readerList,
ResultType: entity.FileTypeZIP,
ResultFileName: opts.DisplayJobName + ".zip",
}, nil
}

func (c CloudbrainTwoClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
url, err := helper.GetSignedDownloadUrl(opts.Path)
if err != nil {
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err)
return nil, err
}
return &entity.FileDownloadInfo{
DownloadUrl: url,
}, nil
}

func (c CloudbrainTwoClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
return GetAllOutputDownloadInfo(opts)
}

func (c CloudbrainTwoClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) {
return c.GetNoteBookOperationProfile(jobId)
}

func (c CloudbrainTwoClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) {
result, err := modelarts.GetTrainJobMetricStatistic(opts.JobId, strconv.FormatInt(opts.VersionID, 10), opts.LogFileName)
if err != nil {
log.Error("GetTrainJobMetricStatistic(%s) failed:%v", opts.JobId, err.Error())
return nil, err
}
return transferModelartsMetricsToStandard(result), nil
}

func transferModelartsMetricsToStandard(result *models.GetTrainJobMetricStatisticResult) *entity.ResourceUsage {
m := make([]entity.MetricsInfo, 0)
for i := 0; i < len(result.MetricsInfo); i++ {
valArray := result.MetricsInfo[i].Value
temp := make([]float32, len(valArray))
for j := 0; j < len(valArray); j++ {
val, err := strconv.ParseFloat(valArray[j], 32)
if err != nil {
log.Error("parse metrics value error, val=%v err=%v result=%+v", valArray[j], err, result)
return nil
}
temp[j] = float32(val)
}

m = append(m, entity.MetricsInfo{
Name: result.MetricsInfo[i].Metric,
Value: temp,
})
}
return &entity.ResourceUsage{
Interval: result.Interval,
MetricsInfo: m,
}
}

func (c CloudbrainTwoClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) {
resultLogFile, err := modelarts.GetTrainJobLogFileNames(opts.JobId, strconv.FormatInt(opts.VersionId, 10))
if err != nil {
log.Error("GetTrainJobLogFileNames(%s) failed:%v", opts.JobId, err.Error())
return nil, nil
}
if resultLogFile == nil {
return nil, nil
}
res := make([]entity.AITaskNodeInfo, len(resultLogFile.LogFileList))
for i := 0; i < len(resultLogFile.LogFileList); i++ {
res[i] = entity.AITaskNodeInfo{LogFileName: resultLogFile.LogFileList[i]}
}
return res, nil
}

func (c CloudbrainTwoClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.ClusterAITaskOutput{
Status: models.ModelMigrateSuccess,
Path: opts.ParentDir,
FileList: fileList,
}, nil
}

func (c CloudbrainTwoClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir))
if err != nil {
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err)
return nil, err
}
return &entity.AllAITaskOutput{FileList: fileList}, nil
}

+ 19
- 13
services/ai_task_service/cluster/cluster_base.go View File

@@ -1,9 +1,8 @@
package cluster

import (
"errors"

"code.gitea.io/gitea/entity"
"errors"
)

var clusterMap = map[entity.ClusterType]ClusterAdapter{}
@@ -26,23 +25,30 @@ func GetCluster(t entity.ClusterType) (ClusterAdapter, error) {
type ClusterAdapter interface {
CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error)
RestartNoteBook(jobId string) (*entity.RestartNoteBookTaskResponse, error)
DeleteNoteBook(jobId string) error
StopNoteBook(jobId string) error
QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error)
DeleteNoteBook(opts entity.JobIdAndVersionId) error
StopNoteBook(opts entity.JobIdAndVersionId) error
QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error)
QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error)
GetNoteBookLog(jobId string) (*entity.ClusterLog, error)
GetNoteBookUrl(jobId string) (string, error)
GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error)
CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error)
DeleteTrainJob(jobId string) error
StopTrainJob(string) error
RestartTrainJob(jobId string) (*entity.CreateTrainTaskResponse, error)
QueryTrainJob(jobId string) (*entity.QueryTaskResponse, error)
GetTrainLog(jobId string) (*entity.ClusterLog, error)

DeleteTrainJob(opts entity.JobIdAndVersionId) error
StopTrainJob(opts entity.JobIdAndVersionId) error
QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error)
QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error)
GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error)
GetLogDownloadInfo(entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error)
GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error)
GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error)
GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error)
GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error)
GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error)
GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error)
GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error)
//GetImages return available list of clusters
//The second parameter will return true if image is no limit
GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error)

GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error)
GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error)
CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error)
}

+ 140
- 0
services/ai_task_service/cluster/common.go View File

@@ -0,0 +1,140 @@
package cluster

import (
"bufio"
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"io"
"strings"
)

func GetLocalLog(r io.Reader, startLine, endLine int64) (content string, realEndLine int64, total int64) {
if startLine > endLine {
return "", 0, 0
}
re := ""
fileEndLine := endLine
reader := bufio.NewReader(r)
var countLine = int64(1)
//跳过开始行之前的内容
for countLine < startLine {
_, err := reader.ReadString('\n')
if err != nil {
log.Error("GetLocalLog ReadString err. %v", err)
return "", 0, 0
}
countLine++
}
//读取指定的开始行到结束行的内容
for countLine >= startLine && countLine <= endLine {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
re = re + line
countLine++
}
log.Error("GetLocalLog ReadString err. %v", err)
break
}
re = re + line
countLine++
}
fileEndLine = countLine - 1

return re, fileEndLine, fileEndLine - startLine + 1
}

func getAllLineFromFile(helper storage_helper.StorageHelper, filePath string) int64 {
var count int64
r, err := helper.OpenFile(filePath)
defer r.Close()
if err != nil {
log.Info("error:" + err.Error())
return 0
}

reader := bufio.NewReader(r)
for {
_, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
count++
}
log.Error("GetLocalLog ReadString err. %v", err)
break
}
count++
}

return count
}

func getLogFilesInStorage(helper storage_helper.StorageHelper, objectKeyPrefix string, logSuffix string) []storage.FileInfo {
//获取日志输出目录下文件列表

fileList, err := helper.GetOneLevelObjectsUnderDir(objectKeyPrefix)
if err != nil {
log.Error("GetTrainLog read dir err.objectKeyPrefix=%s,err=%v", objectKeyPrefix, err)
return nil
}
if len(fileList) == 0 {
return nil
}

logFiles := make([]storage.FileInfo, 0)
for _, f := range fileList {
if f.IsDir {
continue
}
if strings.HasSuffix(f.FileName, logSuffix) {
logFiles = append(logFiles, f)
}
}
return logFiles
}

func GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) {
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType)
var err error
fileList, err := helper.GetAllObjectsUnderDir(opts.Path)
if err != nil {
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.Path, err)
return nil, err
}
if len(fileList) == 0 {
return nil, nil
}

res := &entity.FileDownloadInfo{
Readers: make([]entity.FileReader, 0),
ResultType: entity.FileTypeZIP,
ResultFileName: opts.JobName + ".zip",
}

defer func() {
if err != nil {
res.Close()
}
}()

for i := 0; i < len(fileList); i++ {
file := fileList[i]
if file.IsDir {
continue
}
var reader io.ReadCloser
reader, err = helper.OpenFile(file.RelativePath)
if err != nil {
log.Error("GetAllOutputDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err)
return nil, err
}
res.Readers = append(res.Readers, entity.FileReader{
Reader: reader,
Name: file.FileName,
})
}

return res, nil
}

+ 19
- 13
services/ai_task_service/container_builder/code_builder.go View File

@@ -1,15 +1,15 @@
package container_builder

import (
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/ai_task_service/upload"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"path"
"strings"
)

type CodeBuilder struct {
@@ -42,14 +42,12 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat
jobName := ctx.Request.JobName
repo := ctx.Repository
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/"
uploader := upload.SelectUploaderFromStorageType(storageTypes[0])

remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.CodeMountPath
uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0])

remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + opts.GetLocalPath()
//再次调试和在线运行notebook不需要下载、上传代码
if !ctx.Request.IsRestartRequest && !ctx.Request.IsFileNoteBookRequest {
log.Info("start to upload to remoteDir=" + remoteDir + " codeLocalPath=" + codeLocalPath)
if err := DownloadCode(ctx, codeLocalPath, b.Opts.NotArchive); err != nil {
if err := DownloadCode(ctx, codeLocalPath, b.Opts.Uncompressed); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
return nil, response.LOAD_CODE_FAILED
}
@@ -60,17 +58,22 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat
}
}

codeArchiveName := ""
//如果代码是压缩包形式,以默认分支命名压缩包(继承原有逻辑)
if !b.Opts.NotArchive {
var codeArchiveName, objectKey string
//如果代码是压缩包形式,挂载的是文件,以默认分支命名压缩包(继承原有逻辑)
if !b.Opts.Uncompressed {
codeArchiveName = cloudbrain.DefaultBranchName + ".zip"
objectKey = path.Join(remoteDir, codeArchiveName)
} else {
objectKey = remoteDir + "/"
}

containerPath := ""
if opts.ContainerPath != "" {
containerPath = opts.ContainerPath + "/" + codeArchiveName
//如果代码是压缩包,此时的挂载路径是文件
//如果代码不是压缩包,此时的挂载路径是目录
containerPath = path.Join(opts.ContainerPath, codeArchiveName)
}
objectKey := remoteDir + "/" + codeArchiveName
codeData := entity.ContainerData{
Name: strings.ToLower(repo.Name),
Bucket: uploader.GetBucket(),
@@ -79,6 +82,9 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat
ReadOnly: opts.ReadOnly,
ContainerPath: containerPath,
RealPath: uploader.GetRealPath(objectKey),
IsDir: b.Opts.Uncompressed,
S3DownloadUrl: uploader.GetS3DownloadUrl(objectKey),
StorageType: storageTypes[0],
}
return []entity.ContainerData{codeData}, nil
}

+ 7
- 7
services/ai_task_service/container_builder/common.go View File

@@ -6,7 +6,7 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/ai_task_service/upload"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"errors"
"io"
"io/ioutil"
@@ -14,7 +14,7 @@ import (
"strings"
)

func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive bool) error {
func DownloadCode(ctx *context.CreationContext, codeLocalPath string, uncompressed bool) error {
dir, err := ioutil.ReadDir(codeLocalPath)
//ReqCommitID为空时需要下载最新的代码,把旧的删掉
if len(dir) != 0 && ctx.Request.ReqCommitID == "" {
@@ -26,10 +26,10 @@ func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive

//目录为空时需要下载代码
if len(dir) == 0 {
if notArchive {
commitId, err = upload.DownloadCode(ctx.GitRepo, ctx.Repository, codeLocalPath, ctx.Request.BranchName)
if uncompressed {
commitId, err = storage_helper.DownloadCode(ctx.GitRepo, ctx.Repository, codeLocalPath, ctx.Request.BranchName)
} else {
commitId, err = upload.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName)
commitId, err = storage_helper.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName)
}
if err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", ctx.Repository.FullName(), err)
@@ -40,8 +40,8 @@ func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive
return nil
}

var obsUploader = &upload.OBSUploader{}
var minioUploader = &upload.MinioUploader{}
var obsUploader = &storage_helper.OBSHelper{}
var minioUploader = &storage_helper.MinioHelper{}

const CLONE_FILE_PREFIX = "file:///"



+ 0
- 2
services/ai_task_service/container_builder/container_builder.go View File

@@ -33,8 +33,6 @@ func CreateContainerBuilder(containerType entity.ContainerDataType, opts *entity
return nil
}
b := reflect.New(t.Elem()).Interface().(ContainerBuilder)
//.Interface().(ContainerBuilder)
//b.SetOpts(opts)
b.SetOpts(opts)
return b
}


+ 27
- 26
services/ai_task_service/container_builder/dataset_builder.go View File

@@ -1,14 +1,13 @@
package container_builder

import (
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"path"
"strings"
)

type DatasetBuilder struct {
@@ -32,61 +31,63 @@ func (b *DatasetBuilder) Build(ctx *context.CreationContext) ([]entity.Container
if uuid == "" {
return nil, nil
}
var datasetInfos map[string]models.DatasetInfo
var datasetNames string
var err error
// models.GetDatasetInfo 是使用的以前的方法,所以此处按集群类型适配
if ctx.Request.Cluster == models.C2NetCluster {
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, ctx.Request.ComputeSource.Name)
} else {
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid)
}
datasetInfos, err := models.GetDatasetInfo4AITask(uuid)
if err != nil {
log.Error("GetDatasetInfo failed: %v", err)
return nil, response.DATASET_SELECT_ERROR
}
uuidArray := strings.Split(uuid, ";")
if datasetInfos == nil || len(datasetInfos) < len(uuidArray) {
if len(datasetInfos) < len(strings.Split(uuid, ";")) {
log.Error("GetDatasetInfo count error.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return nil, response.PARTIAL_DATASETS_NOT_AVAILABLE
}
var data []entity.ContainerData
for _, datasetInfo := range datasetInfos {
name := datasetInfo.FullName
var name, objectKey, s3DownloadUrl string
//如果不是压缩包,那么文件名是去掉后缀以后的数据集名称
if b.Opts.NotArchive {
name = datasetInfo.Name
if b.Opts.Uncompressed {
name = datasetInfo.Uncompressed.Name
objectKey = datasetInfo.Uncompressed.ObjectKey
s3DownloadUrl = datasetInfo.Uncompressed.S3DownloadUrl
} else {
name = datasetInfo.Compressed.Name
objectKey = datasetInfo.Compressed.ObjectKey
s3DownloadUrl = datasetInfo.Compressed.S3DownloadUrl
}
//由于云脑一训练任务单数据集情况比较特殊,挂载时没有数据集名字的父文件夹,因此特殊处理
//todo AITask 解决此特殊处理
if ctx.Request.Cluster == entity.OpenICloudbrainOne &&
ctx.Request.JobType == models.JobTypeTrain && len(datasetInfos) == 1 {
name = ""
}
if datasetInfo.Type == models.TypeCloudBrainOne {
//如果返回的localPath已经带了实际路径的前缀,需要去除掉以后才是在minio上的objectKey
objectKey := datasetInfo.DataLocalPath
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.RealPath)
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.Bucket)
objectKey = strings.TrimPrefix(objectKey, "/")
data = append(data, entity.ContainerData{
Name: name,
Bucket: minioUploader.GetBucket(),
EndPoint: minioUploader.GetEndpoint(),
ObjectKey: objectKey,
ReadOnly: b.Opts.ReadOnly,
ContainerPath: b.Opts.ContainerPath + "/" + name,
ContainerPath: path.Join(b.Opts.ContainerPath, name),
RealPath: minioUploader.GetRealPath(objectKey),
IsDir: b.Opts.Uncompressed,
Size: datasetInfo.Size,
StorageType: entity.MINIO,
})

} else {
objectKey := datasetInfo.DataLocalPath + datasetInfo.FullName
data = append(data, entity.ContainerData{
Name: name,
Bucket: obsUploader.GetBucket(),
EndPoint: obsUploader.GetEndpoint(),
ObjectKey: objectKey,
ReadOnly: b.Opts.ReadOnly,
ContainerPath: b.Opts.ContainerPath + "/" + name,
ContainerPath: path.Join(b.Opts.ContainerPath, name),
S3DownloadUrl: s3DownloadUrl,
IsDir: b.Opts.Uncompressed,
Size: datasetInfo.Size,
StorageType: entity.OBS,
})
}
}
ctx.Request.DatasetNames = datasetNames
return data, nil
}



+ 1
- 0
services/ai_task_service/container_builder/file_notebook_code_builder.go View File

@@ -34,6 +34,7 @@ func (b *FileNoteBookCodeBuilder) Build(ctx *context.CreationContext) ([]entity.
if repo == nil {
return nil, nil
}
//在线运行notebook不需要代码挂载或者调度,只需要把对对应分支的代码仓下载到指定目录。上传目标分支的逻辑在其他地方(继承原有逻辑)
err := DownloadBranch(repo, getCodePath(ctx.Request.JobName, repo, ctx.Request.FileBranchName), ctx.Request.FileBranchName)
if err != nil {
log.Error("download code failed", err)


+ 60
- 0
services/ai_task_service/container_builder/log_path_builder.go View File

@@ -0,0 +1,60 @@
package container_builder

import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"path"
)

type LogPathBuilder struct {
Opts *entity.ContainerBuildOpts
}

func init() {
o := &LogPathBuilder{}
RegisterContainerBuilder(o)
}

func (b *LogPathBuilder) SetOpts(opts *entity.ContainerBuildOpts) {
b.Opts = opts
}

func (b *LogPathBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) {
if b.Opts.Disable {
return nil, nil
}
storageTypes := b.Opts.AcceptStorageType
if storageTypes == nil || len(storageTypes) == 0 {
return nil, response.SYSTEM_ERROR
}

jobName := ctx.Request.JobName

uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0])
remoteDir := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), b.Opts.GetLocalPath())
if b.Opts.MKDIR {
err := uploader.MKDIR(remoteDir)
if err != nil {
log.Error("MKDIR err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err)
return nil, response.NewBizError(err)
}
}

return []entity.ContainerData{{
ContainerPath: b.Opts.ContainerPath,
ReadOnly: b.Opts.ReadOnly,
ObjectKey: remoteDir,
RealPath: uploader.GetRealPath(remoteDir),
Bucket: uploader.GetBucket(),
EndPoint: uploader.GetEndpoint(),
IsDir: true,
StorageType: storageTypes[0],
}}, nil
}

func (b *LogPathBuilder) GetContainerType() entity.ContainerDataType {
return entity.ContainerLogPath
}

+ 29
- 10
services/ai_task_service/container_builder/output_path_builder.go View File

@@ -1,15 +1,13 @@
package container_builder

import (
"fmt"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/ai_task_service/upload"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"path"
)

type OutputPathBuilder struct {
@@ -29,25 +27,36 @@ func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.Contai
if b.Opts.Disable {
return nil, nil
}
log.Info("go here len(storageTypes)=")

storageTypes := b.Opts.AcceptStorageType
log.Info("len=" + fmt.Sprint(len(storageTypes)))
if storageTypes == nil || len(storageTypes) == 0 {
return nil, response.SYSTEM_ERROR
}

jobName := ctx.Request.JobName

uploader := upload.SelectUploaderFromStorageType(storageTypes[0])
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.ModelMountPath
if ctx.Request.JobType != models.JobTypeOnlineInference {
uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0])
remoteDir := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), b.Opts.GetLocalPath())
if b.Opts.MKDIR {
err := uploader.MKDIR(remoteDir)
if err != nil {
log.Error("MKDIR err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err)
return nil, response.NewBizError(err)
}
}

//如果是继续训练,需要将上次的结果拷贝到本次训练任务的输出目录
if ctx.Request.IsContinueRequest {
if ctx.SourceCloudbrain == nil {
log.Error("SourceCloudbrain empty.displayJobName = %s", ctx.Request.DisplayJobName)
return nil, response.PARAM_ERROR
}
sourcePath := getSourceOutputPath(ctx.SourceCloudbrain, uploader, b.Opts.ContainerPath)
err := uploader.CopyByPath(sourcePath, remoteDir, []string{"README", ".txt"})
if err != nil {
log.Error("CopyByPath err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err)
return nil, response.NewBizError(err)
}
}
return []entity.ContainerData{{
ContainerPath: b.Opts.ContainerPath,
ReadOnly: b.Opts.ReadOnly,
@@ -56,9 +65,19 @@ func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.Contai
Bucket: uploader.GetBucket(),
EndPoint: uploader.GetEndpoint(),
GetBackEndpoint: uploader.GetEndpoint(),
IsDir: true,
StorageType: storageTypes[0],
}}, nil
}

func getSourceOutputPath(sourceCloudbrain *models.Cloudbrain, helper storage_helper.StorageHelper, containerPath string) string {
c := sourceCloudbrain.GetCloudbrainConfig()
if c != nil {
return c.OutputObjectPrefix
}
return path.Join(helper.GetJobDefaultObjectKeyPrefix(sourceCloudbrain.JobName), sourceCloudbrain.VersionName, containerPath)
}

func (b *OutputPathBuilder) GetContainerType() entity.ContainerDataType {
return entity.ContainerOutPutPath
}

+ 10
- 5
services/ai_task_service/container_builder/pre_model_builder.go View File

@@ -3,6 +3,7 @@ package container_builder
import (
"code.gitea.io/gitea/routers/response"
"fmt"
"path"
"strings"

"code.gitea.io/gitea/entity"
@@ -12,7 +13,7 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/ai_task_service/upload"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask"
)

@@ -69,7 +70,8 @@ func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.Con
storageType := oldStorageType
ckptNames := strings.Split(form.PretrainModelCkptName, ";")
for _, ckptName := range ckptNames {
if !cloudbrainTask.IsModelFileExists(m, ckptName) {
isExists, size := cloudbrainTask.CheckAndGetFileSize(m, ckptName)
if !isExists {
log.Error("model file not exist.name = %s", ckptName)
return nil, response.MODEL_NOT_EXISTS
}
@@ -90,15 +92,18 @@ func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.Con
storageType = entity.MINIO
}
}
uploader := upload.SelectUploaderFromStorageType(storageType)
uploader := storage_helper.SelectUploaderFromStorageType(storageType)
modelData := entity.ContainerData{
Name: form.PretrainModelName,
Name: ckptName,
Bucket: uploader.GetBucket(),
EndPoint: uploader.GetEndpoint(),
ObjectKey: preTrainModelPath,
ReadOnly: b.Opts.ReadOnly,
ContainerPath: b.Opts.ContainerPath + "/" + ckptName,
ContainerPath: path.Join(b.Opts.ContainerPath, ckptName),
RealPath: uploader.GetRealPath(preTrainModelPath),
S3DownloadUrl: uploader.GetS3DownloadUrl(preTrainModelPath),
IsDir: false,
Size: size,
}
preTrainModelEntity = append(preTrainModelEntity, modelData)
}


+ 30
- 1
services/ai_task_service/context/context.go View File

@@ -4,6 +4,7 @@ import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"encoding/json"
)

type CreationContext struct {
@@ -17,7 +18,7 @@ type CreationContext struct {
Response *entity.CreationResponse
SourceCloudbrain *models.Cloudbrain
NewCloudbrain *models.Cloudbrain
AITaskConfig entity.AITaskConfig
Config *entity.AITaskBaseConfig
}

func (ctx *CreationContext) AddContainerData(t entity.ContainerDataType, d []entity.ContainerData) {
@@ -46,3 +47,31 @@ func (ctx *CreationContext) WriteResponse(t entity.ContainerDataType) entity.Con
}
return a[0]
}

func (ctx *CreationContext) BuildCloudbrainConfig() *models.CloudbrainConfig {
var aiConfigStr = ""
s, err := json.Marshal(ctx.Config)
if err == nil {
aiConfigStr = string(s)
}
var containerDataStr = ""
t, err := json.Marshal(ctx.ContainerData)
if err == nil {
containerDataStr = string(t)
}
output := ctx.GetContainerData(entity.ContainerOutPutPath)
log := ctx.GetContainerData(entity.ContainerLogPath)
c := &models.CloudbrainConfig{
ConfigurationSnapshot: aiConfigStr,
OutputBucket: output.Bucket,
OutputObjectPrefix: output.ObjectKey,
OutputStorageType: string(output.StorageType),
OutputEndpoint: output.EndPoint,
LogBucket: log.Bucket,
LogObjectPrefix: log.ObjectKey,
LogStorageType: string(log.StorageType),
LogEndpoint: log.EndPoint,
ContainerDataSnapshot: containerDataStr,
}
return c
}

+ 17
- 14
services/ai_task_service/schedule/model_schedule.go View File

@@ -55,33 +55,28 @@ func GetModelScheduleStatus(jobId string) (models.ModelMigrateStatus, error) {
return record.Status, nil
}

func RetryModelMigrate(jobId string) error {
job, err := models.GetCloudbrainByJobID(jobId)
if err != nil {
log.Error("RetryModelMigrate GetCloudbrainByJobID err.jobId=%s err=%v", jobId, err)
return errors.New("jobId not correct")
}
func RetryModelMigrate(job *models.Cloudbrain) error {
if !job.IsTerminal() {
log.Info("RetryModelMigrate job is not terminal.jobId=%s", jobId)
log.Info("RetryModelMigrate job is not terminal.id=%s", job.ID)
return errors.New("task is not terminal")
}

//避免并发问题,先尝试获取锁,获取锁以后再查最新的记录
lock := redis_lock.NewDistributeLock(redis_key.RecordHandleLock(jobId))
lock := redis_lock.NewDistributeLock(redis_key.RecordHandleLock(job.JobID))
success, err := lock.LockWithWait(10*time.Second, 10*time.Second)
if err != nil {
log.Error("HandleUnfinishedMigrateRecord lock err.jobId=%d %v", jobId, err)
log.Error("HandleUnfinishedMigrateRecord lock err.id=%d %v", job.ID, err)
return err
}
if !success {
log.Error("HandleUnfinishedMigrateRecord lock failed.ID=%d ", jobId)
log.Error("HandleUnfinishedMigrateRecord lock failed.ID=%d ", job.ID)
return nil
}
defer lock.UnLock()

record, err := models.GetModelMigrateRecordByCloudbrainId(job.ID)
if err != nil {
log.Error("RetryModelMigrate GetModelMigrateRecordByCloudbrainId err.jobId=%s err=%v", jobId, err)
log.Error("RetryModelMigrate GetModelMigrateRecordByCloudbrainId err.id=%s err=%v", job.ID, err)
if models.IsErrRecordNotExist(err) {
return nil
}
@@ -91,7 +86,7 @@ func RetryModelMigrate(jobId string) error {
//只有两种情况可以再次调度,一是虎鲸调度失败 二是本地移桶失败
if record.CurrentStep == models.GrampusMigrateFailed {
log.Info("retry PostModelMigrate. record.id = %d", record.ID)
_, err := grampus.PostModelMigrate(jobId)
_, err := grampus.PostModelMigrate(job.JobID)
if err != nil {
log.Error("PostModelMigrate err.%v", err)
return err
@@ -217,6 +212,11 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe
}
log.Info("DestObjectKey", r.DestObjectKey)
if strings.Contains(r.DestObjectKey, ".") {
isExists, _ := storage.IsObjectExist4Obs(r.DestBucket, r.DestObjectKey)
if !isExists {
//此时没有文件需要解压迁移,直接更新为成功
models.UpdateModelMigrateStatusByStep(r, models.BucketMoveSuccess)
}
decompress(r.DestBucket+"/"+r.DestObjectKey, setting.Bucket+"/"+strings.TrimSuffix(r.DestObjectKey, models.ModelSuffix))

} else { //如果是文件夹,遍历文件
@@ -225,7 +225,10 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe
log.Error("UpdateModelMigrateStatusByStep err. r.ID=%d step=%d err=%v", r.ID, models.BucketMoveFailed, err)
return err
}

if len(fileInfos) == 0 {
//此时没有文件需要解压迁移,直接更新为成功
models.UpdateModelMigrateStatusByStep(r, models.BucketMoveSuccess)
}
for _, fileInfo := range fileInfos {
log.Info("decompress file:", fileInfo.FileName)
sourceFilPath := r.DestBucket + "/" + r.DestObjectKey + fileInfo.FileName
@@ -311,7 +314,7 @@ func updateModelMigrateFromRes(r *models.ModelMigrateRecord, res *models.Grampus
}

func MoveBucketInOpenIMinio(objectKeyPrefix, targetObjectPrefix, oldBucket, newBucket string) error {
var core = storage.ScheduleMinioCore
var core = storage.MinioCore
objectInfo := core.Client.ListObjects(oldBucket, objectKeyPrefix, true, nil)
log.Info("MoveBucketInOpenIMinio start.objectKeyPrefix=%s", objectKeyPrefix)
count := 0


+ 48
- 0
services/ai_task_service/storage_helper/client.go View File

@@ -0,0 +1,48 @@
package storage_helper

import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/modules/storage"
"io"
"strings"
)

type UploaderConfig struct {
Bucket string
Endpoint string
}

type StorageHelper interface {
UploadDir(codePath, jobName string) error
GetRealPath(objectKey string) string
GetBucket() string
GetEndpoint() string
GetJobDefaultObjectKeyPrefix(jobName string) string
MKDIR(path string) error
GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error)
GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error)
OpenFile(path string) (io.ReadCloser, error)
GetSignedDownloadUrl(key string) (string, error)
GetS3DownloadUrl(key string) string
CopyByPath(sourcePath, targetPath string, filterSuffix []string) error
}

func SelectUploaderFromStorageType(storageType entity.StorageType) StorageHelper {
switch storageType {
case entity.OBS:
return &OBSHelper{}
case entity.MINIO:
return &MinioHelper{}
}
return nil
}

func isMatchSuffix(fileName string, filterSuffix []string) bool {
for _, s := range filterSuffix {
if strings.HasSuffix(fileName, s) {
return true
}
}
return false

}

+ 203
- 0
services/ai_task_service/storage_helper/minio.go View File

@@ -0,0 +1,203 @@
package storage_helper

import (
"bytes"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"fmt"
"github.com/minio/minio-go"
"io"
"net/url"
"path"
"sort"
"strings"
"time"
)

type MinioHelper struct {
}

func (m *MinioHelper) UploadDir(codePath, objectKeyPrefix string) error {
return UploadDirToMinio(codePath, objectKeyPrefix, "")
}
func (m *MinioHelper) GetJobDefaultObjectKeyPrefix(jobName string) string {
return path.Join(setting.CBCodePathPrefix, jobName)
}
func (m *MinioHelper) GetRealPath(objectKey string) string {
return setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + strings.TrimPrefix(objectKey, "/")
}

func (m *MinioHelper) GetBucket() string {
return setting.Attachment.Minio.Bucket
}

func (m *MinioHelper) GetEndpoint() string {
return setting.Attachment.Minio.Endpoint
}

const README = "README"

func (m *MinioHelper) MKDIR(path string) error {
//无法直接创建空文件夹,上传一个readme文件模拟
path = strings.TrimSuffix(path, "/") + "/" + README
val := "You can put the files into this directory and download the files by the web page."
_, err := storage.Attachments.UploadContent(m.GetBucket(), path, bytes.NewReader([]byte(val)))
return err
}

func (m *MinioHelper) OpenFile(objectKey string) (io.ReadCloser, error) {
reader, _, err := storage.MinioCore.GetObject(m.GetBucket(), objectKey, minio.GetObjectOptions{})
if err != nil {
return nil, err
}
return reader, nil
}

func (m *MinioHelper) GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error) {
if !strings.HasSuffix(dirPath, "/") {
dirPath += "/"
}
maxKey := setting.OUTPUT_SHOW_MAX_KEY
if len(maxKeyArray) > 0 {
maxKey = maxKeyArray[0]
}
r, err := storage.MinioCore.ListObjectsV2(m.GetBucket(), dirPath, "", false, "/", maxKey, "")
if err != nil {
return nil, err
}
list := r.Contents

fileInfos := make([]storage.FileInfo, 0)
prefixLen := len(dirPath)
for _, val := range list {
var fileName string
if val.Key == dirPath {
continue
}
fileName = val.Key[prefixLen:]
fileInfo := storage.FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
Size: val.Size,
IsDir: false,
RelativePath: dirPath + fileName,
}
fileInfos = append(fileInfos, fileInfo)
}
for _, val := range r.CommonPrefixes {
fileName := strings.TrimSuffix(strings.TrimPrefix(val.Prefix, dirPath), "/")
fileInfo := storage.FileInfo{
FileName: fileName,
IsDir: true,
RelativePath: dirPath + "/" + fileName,
}
fileInfos = append(fileInfos, fileInfo)
}
return fileInfos, nil
}

func (m *MinioHelper) GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error) {
prefix = strings.TrimSuffix(prefix, "/") + "/"
prefixLen := len(prefix)
delimiter := ""
marker := ""
index := 1
fileInfoList := storage.FileInfoList{}
maxKey := setting.OUTPUT_DOWNLOAD_MAX_KEY
if len(maxKeyArray) > 0 {
maxKey = maxKeyArray[0]
}
for {
output, err := storage.MinioCore.ListObjects(m.GetBucket(), prefix, marker, delimiter, maxKey)
if err == nil {
log.Info("Page:%d\n", index)
index++
for _, val := range output.Contents {
var isDir bool
if prefixLen == len(val.Key) {
continue
}
if strings.HasSuffix(val.Key, "/") {
isDir = true
} else {
isDir = false
}
if isDir {
continue
}
fileInfo := storage.FileInfo{
ModTime: val.LastModified.Format("2006-01-02 15:04:05"),
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"),
Size: val.Size,
IsDir: isDir,
ParenDir: "",
RelativePath: val.Key,
}
fileInfoList = append(fileInfoList, fileInfo)
}
if output.IsTruncated {
marker = output.NextMarker
} else {
break
}
} else {
log.Info("list error." + err.Error())
return nil, err
}
}
sort.Sort(fileInfoList)
return fileInfoList, nil
}

func (m *MinioHelper) GetSignedDownloadUrl(key string) (string, error) {
fileName := key[strings.LastIndex(key, "/"):]
fileName = strings.TrimPrefix(fileName, "/")
if fileName == "" {
fileName = fmt.Sprint(time.Now().Unix())
}
reqParams := make(url.Values)
reqParams.Set("response-content-disposition", "attachment; filename=\""+fileName+"\"")

var preURL *url.URL
preURL, err := storage.MinioCore.PresignedGetObject(m.GetBucket(), key, storage.PresignedGetUrlExpireTime, reqParams)
if err != nil {
return "", err
}

return preURL.String(), nil
}

func (m *MinioHelper) GetS3DownloadUrl(key string) string {
return ""
}

func (m *MinioHelper) CopyByPath(sourcePath, targetPath string, filterSuffix []string) error {
log.Info("CopyByPath sourcePath=%s,targetPath=%s", sourcePath, targetPath)
allFiles, _ := m.GetAllObjectsUnderDir(sourcePath)
var fileNames []string
for _, file := range allFiles {
if isMatchSuffix(file.FileName, filterSuffix) {
continue
}
fileNames = append(fileNames, file.FileName)
}
log.Info("Previous task all files", fileNames)
if len(fileNames) == 0 {
return nil
}
for _, file := range fileNames {
srcObjectName := path.Join(sourcePath, file)
destObjectName := path.Join(targetPath, file)
_, err := storage.MinioCore.Client.StatObject(m.GetBucket(), srcObjectName, minio.StatObjectOptions{})
if err != nil {
log.Info("Get file error:" + err.Error())
}
_, err = storage.MinioCore.CopyObject(m.GetBucket(), srcObjectName, m.GetBucket(), destObjectName, map[string]string{})
if err != nil {
log.Error("CopyByPath MinioCopyFiles error. sourcePath=%s targetPath=%s err=%v", sourcePath, targetPath, err)
return err
}
}
return nil
}

+ 225
- 0
services/ai_task_service/storage_helper/obs.go View File

@@ -0,0 +1,225 @@
package storage_helper

import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/obs"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"io"
"net/url"
"path"
"sort"
"strings"
)

type OBSHelper struct {
}

func (m *OBSHelper) UploadDir(codePath, objectKeyPrefix string) error {
return UploadDirToObs(codePath, objectKeyPrefix, "")
}

func (m *OBSHelper) GetJobDefaultObjectKeyPrefix(jobName string) string {
return path.Join(setting.CodePathPrefix, jobName)
}

func (m *OBSHelper) GetRealPath(objectKey string) string {
return ""
}

func (m *OBSHelper) GetBucket() string {
return setting.Bucket
}
func (m *OBSHelper) MKDIR(path string) error {
path = strings.TrimSuffix(path, "/") + "/"
input := &obs.PutObjectInput{}
input.Bucket = setting.Bucket
input.Key = path
_, err := storage.ObsCli.PutObject(input)
if err != nil {
log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
return err
}
return nil
}

func (m *OBSHelper) GetEndpoint() string {
index := strings.Index(setting.Endpoint, "//")
endpoint := setting.Endpoint[index+2:]
return endpoint
}

func (m *OBSHelper) GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error) {
input := &obs.ListObjectsInput{}
input.Bucket = m.GetBucket()
input.Prefix = dirPath
input.Delimiter = "/"
maxKey := setting.OUTPUT_SHOW_MAX_KEY
if len(maxKeyArray) > 0 {
maxKey = maxKeyArray[0]
}
input.MaxKeys = maxKey
if !strings.HasSuffix(input.Prefix, "/") {
input.Prefix += "/"
}
fileInfos := make([]storage.FileInfo, 0)
prefixLen := len(input.Prefix)
index := 1
output, err := storage.ObsCli.ListObjects(input)
if err != nil {
if obsError, ok := err.(obs.ObsError); ok {
log.Error("Code:%s, Message:%s", obsError.Code, obsError.Message)
}
return nil, err
}
log.Info("Page:%d\n", index)
index++
for _, val := range output.Contents {
var fileName string
if val.Key == input.Prefix {
continue
}
fileName = val.Key[prefixLen:]
fileInfo := storage.FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
Size: val.Size,
IsDir: false,
RelativePath: dirPath + "/" + fileName,
}
fileInfos = append(fileInfos, fileInfo)
}
for _, val := range output.CommonPrefixes {
fileName := strings.TrimSuffix(strings.TrimPrefix(val, input.Prefix), "/")
fileInfo := storage.FileInfo{
FileName: fileName,
IsDir: true,
}
fileInfos = append(fileInfos, fileInfo)
}
return fileInfos, nil
}

func (m *OBSHelper) GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error) {
prefix = strings.TrimSuffix(prefix, "/") + "/"
bucket := m.GetBucket()
input := &obs.ListObjectsInput{}
input.Bucket = bucket
input.MaxKeys = 1000
input.Prefix = prefix
maxKey := setting.OUTPUT_DOWNLOAD_MAX_KEY
if len(maxKeyArray) > 0 {
maxKey = maxKeyArray[0]
}
input.MaxKeys = maxKey

index := 1
fileInfoList := storage.FileInfoList{}

prefixLen := len(prefix)
log.Info("full obs path:", input.Bucket+input.Prefix)
log.Info("prefix=" + input.Prefix)
for {
output, err := storage.ObsCli.ListObjects(input)
if err == nil {
log.Info("Page:%d\n", index)
index++
for _, val := range output.Contents {
var isDir bool
if prefixLen == len(val.Key) {
continue
}
if strings.HasSuffix(val.Key, "/") {
isDir = true
} else {
isDir = false
}
if isDir {
continue
}
fileInfo := storage.FileInfo{
ModTime: val.LastModified.Format("2006-01-02 15:04:05"),
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"),
Size: val.Size,
IsDir: isDir,
ParenDir: "",
RelativePath: val.Key,
}
fileInfoList = append(fileInfoList, fileInfo)
}
if output.IsTruncated {
input.Marker = output.NextMarker
} else {
break
}
} else {
if obsError, ok := err.(obs.ObsError); ok {
log.Info("Code:%s\n", obsError.Code)
log.Info("Message:%s\n", obsError.Message)
}
return nil, err
}
}
sort.Sort(fileInfoList)
return fileInfoList, nil
}

func (m *OBSHelper) OpenFile(path string) (io.ReadCloser, error) {
input := &obs.GetObjectInput{}
input.Bucket = m.GetBucket()
input.Key = path
output, err := storage.ObsCli.GetObject(input)
if err != nil {
log.Error("OpenFile err. path=%s err=%v", path, err)
return nil, err
}
return output.Body, nil
}

func (m *OBSHelper) GetSignedDownloadUrl(key string) (string, error) {
input := &obs.CreateSignedUrlInput{}
input.Bucket = m.GetBucket()
input.Key = key

input.Expires = 60 * 60
input.Method = obs.HttpMethodGet
comma := strings.LastIndex(key, "/")
filename := key
if comma != -1 {
filename = key[comma+1:]
}
reqParams := make(map[string]string)
filename = url.PathEscape(filename)
reqParams["response-content-disposition"] = "attachment; filename=\"" + filename + "\""
input.QueryParams = reqParams
output, err := storage.ObsCli.CreateSignedUrl(input)
if err != nil {
log.Error("CreateSignedUrl failed:", err.Error())
return "", err
}

return output.SignedUrl, nil
}

func (m *OBSHelper) GetS3DownloadUrl(key string) string {
return "s3://" + setting.Bucket + "/" + strings.TrimPrefix(key, "/")
}

func (m *OBSHelper) CopyByPath(sourcePath, targetPath string, filterSuffix []string) error {
log.Info("CopyByPath sourcePath=%s,targetPath=%s", sourcePath, targetPath)
allFiles, _ := m.GetAllObjectsUnderDir(sourcePath)
var fileNames []string
for _, file := range allFiles {
if isMatchSuffix(file.FileName, filterSuffix) {
continue
}
fileNames = append(fileNames, file.FileName)
}
log.Info("Previous task all files", fileNames)
_, err := storage.ObsCopyManyFile(m.GetBucket(), sourcePath, m.GetBucket(), targetPath, fileNames)
if err != nil {
log.Error("CopyByPath ObsCopyManyFile error. sourcePath=%s targetPath=%s err=%v", sourcePath, targetPath, err)
return err
}
return nil
}

services/ai_task_service/upload/repo.go → services/ai_task_service/storage_helper/repo.go View File

@@ -1,4 +1,4 @@
package upload
package storage_helper

import (
"bufio"

+ 56
- 74
services/ai_task_service/task/cloudbrain_one_notebook_task.go View File

@@ -3,14 +3,12 @@ package task
import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"code.gitea.io/gitea/services/ai_task_service/context"
"strconv"
"strings"
)

@@ -23,60 +21,31 @@ func init() {
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.OpenICloudbrainOne,
JobType: models.JobTypeDebug,
Config: GetCloudbrainOneNotebookConfig,
},
}
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainOne, t)
}

func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasetSize).
Next(t.CheckDatasetExists).
Next(t.CheckBranchExists).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create CloudbrainOneNotebookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil

}

func (g CloudbrainOneNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig {
if opts.IsFileNoteBookRequest {
return entity.AITaskConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerFileNoteBookCode: {},
entity.ContainerCode: {
ContainerPath: "/code",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
NotArchive: true,
},
},
}
}
return entity.AITaskConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
func GetCloudbrainOneNotebookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
//默认配置
config := &entity.AITaskBaseConfig{
ActionType: models.ActionCreateDebugGPUTask,
IsActionUseJobId: false,
DatasetsLimitSizeGB: setting.DebugAttachSize,
DatasetsMaxNum: setting.MaxDatasetNum,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/code",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
NotArchive: true,
Uncompressed: true,
},
entity.ContainerDataset: {
ContainerPath: "/dataset",
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO},
NotArchive: true,
Uncompressed: true,
},
entity.ContainerPreTrainModel: {
ContainerPath: "/pretrainmodel",
@@ -84,22 +53,60 @@ func (g CloudbrainOneNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfig
AcceptStorageType: []entity.StorageType{entity.MINIO},
},
entity.ContainerOutPutPath: {
ContainerPath: "/model",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
ContainerPath: "/model",
StorageRelativePath: cloudbrain.ModelMountPath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
MKDIR: true,
},
},
}
//在线运行notebook配置
if opts.IsFileNoteBookRequest {
config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerFileNoteBookCode: {},
entity.ContainerCode: {
ContainerPath: "/code",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
Uncompressed: true,
},
},
}

}
return config
}

func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckBranchExists).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create CloudbrainOneNotebookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil

}

func (t CloudbrainOneNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.BuildRequest4Restart).
Next(t.CheckOutput4Restart).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckModel).
Next(t.CheckDatasetExists).
Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDatasets).
Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.BuildContainerData).
@@ -119,17 +126,6 @@ func (t CloudbrainOneNotebookTaskTemplate) Restart(ctx *context.CreationContext)

}

func (c CloudbrainOneNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{
ComputeSource: ctx.Request.ComputeSource.Name,
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest,
}).ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError {
c := g.GetMyCluster()
if c == nil {
@@ -175,17 +171,3 @@ func (g CloudbrainOneNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC
//云脑一没有再次调试接口,通过使用同样的参数新建接口来模拟
return g.CallCreationAPI(ctx)
}

func (CloudbrainOneNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
jobID := ctx.Response.JobID
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return response.NewBizError(err)
}

stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask)
return nil
}

+ 173
- 0
services/ai_task_service/task/cloudbrain_one_train_task.go View File

@@ -0,0 +1,173 @@
package task

import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"strings"
)

type CloudbrainOneTrainTaskTemplate struct {
DefaultAITaskTemplate
}

func init() {
t := &CloudbrainOneTrainTaskTemplate{
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.OpenICloudbrainOne,
JobType: models.JobTypeTrain,
Config: GetCloudbrainOneTrainConfig,
},
}
RegisterTask(models.JobTypeTrain, entity.OpenICloudbrainOne, t)
}

func GetCloudbrainOneTrainConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
config := &entity.AITaskBaseConfig{
ActionType: models.ActionCreateGPUTrainTask,
IsActionUseJobId: true,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/code",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
Uncompressed: true,
},
entity.ContainerDataset: {
ContainerPath: "/dataset",
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO},
Uncompressed: true,
},
entity.ContainerPreTrainModel: {
ContainerPath: "/pretrainmodel",
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO},
},
entity.ContainerOutPutPath: {
ContainerPath: "/model",
StorageRelativePath: cloudbrain.ModelMountPath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
},
entity.ContainerLogPath: {
ContainerPath: "/model",
StorageRelativePath: cloudbrain.ModelMountPath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
MKDIR: true,
},
}}

return config
}

func (t CloudbrainOneTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.HandleReqParameters).
Next(t.CheckPrivilege4Continue).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckWorkerNum).
Next(t.CheckMultiRequest).
Next(t.CheckBranchExists).
Next(t.CheckBootFile).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError {
c := g.GetMyCluster()
if c == nil {
return response.SYSTEM_ERROR
}
form := ctx.Request
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
},
}
createTime := timeutil.TimeStampNow()
res, err := c.CreateTrainJob(req)
if err != nil {
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err)
return response.NewBizError(err)
}
ctx.Response = &entity.CreationResponse{
JobID: res.JobID,
Status: res.Status,
CreateTime: createTime,
}
return nil
}

func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError {
c := g.GetMyCluster()
if c == nil {
return response.SYSTEM_ERROR
}
form := ctx.Request
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
},
}
createTime := timeutil.TimeStampNow()
res, err := c.CreateTrainJob(req)
if err != nil {
log.Error("CloudbrainOneTrainTaskTemplate CallRestartAPI err.req=%+v err=%v", req, err)
return response.NewBizError(err)
}
ctx.Response = &entity.CreationResponse{
JobID: res.JobID,
Status: res.Status,
CreateTime: createTime,
}
return nil
}

+ 42
- 65
services/ai_task_service/task/cloudbrain_two_notebook_task.go View File

@@ -5,15 +5,12 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/convert"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/cloudbrain/resource"
"strconv"
"strings"
)

@@ -26,43 +23,19 @@ func init() {
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.OpenICloudbrainTwo,
JobType: models.JobTypeDebug,
Config: GetCloudbrainTwoNotebookConfig,
},
}
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainTwo, t)
}

func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasetSize).
Next(t.CheckDatasetExists).
Next(t.CheckBranchExists).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create CloudbrainOneNotebookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil

}

func (g CloudbrainTwoNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig {
if opts.IsFileNoteBookRequest {
return entity.AITaskConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerFileNoteBookCode: {},
},
}
}

return entity.AITaskConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
func GetCloudbrainTwoNotebookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
//默认配置
config := &entity.AITaskBaseConfig{
ActionType: models.ActionCreateDebugNPUTask,
IsActionUseJobId: false,
DatasetsLimitSizeGB: setting.DebugAttachSize,
DatasetsMaxNum: setting.MaxDatasetNum,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
Disable: true,
@@ -78,17 +51,46 @@ func (g CloudbrainTwoNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfig
},
},
}

//在线运行notebook配置
if opts.IsFileNoteBookRequest {
config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerFileNoteBookCode: {},
},
}
}
return config
}

func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckBranchExists).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create CloudbrainOneNotebookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil

}

func (t CloudbrainTwoNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.BuildRequest4Restart).
Next(t.CheckOutput4Restart).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckModel).
Next(t.CheckDatasetExists).
Next(t.CheckIsCleared).
Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDatasets).
Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CallRestartAPI).
@@ -166,31 +168,6 @@ func (g CloudbrainTwoNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC
return nil
}

func (c CloudbrainTwoNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{
ComputeSource: ctx.Request.ComputeSource.Name,
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest,
}).ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

func (CloudbrainTwoNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
jobID := ctx.Response.JobID
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return response.NewBizError(err)
}

stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugNPUTask)
return nil
}

func (g CloudbrainTwoNotebookTaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) {
var aiCenterCode = models.AICenterOfCloudBrainTwo
if setting.ModelartsCD.Enabled {


+ 149
- 0
services/ai_task_service/task/cloudbrain_two_train_task.go View File

@@ -0,0 +1,149 @@
package task

import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
"encoding/json"
"strings"
)

type CloudbrainTwoTrainTaskTemplate struct {
DefaultAITaskTemplate
}

func init() {
t := &CloudbrainTwoTrainTaskTemplate{
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.OpenICloudbrainTwo,
JobType: models.JobTypeTrain,
Config: GetCloudbrainTwoTrainConfig,
},
}
RegisterTask(models.JobTypeTrain, entity.OpenICloudbrainTwo, t)
}

func GetCloudbrainTwoTrainConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
var config = &entity.AITaskBaseConfig{
ActionType: models.ActionCreateTrainTask,
IsActionUseJobId: true,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/code",
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
Uncompressed: true,
},
entity.ContainerDataset: {
ContainerPath: "/dataset",
ReadOnly: true,
Uncompressed: true,
AcceptStorageType: []entity.StorageType{entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: "/pretrainmodel",
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.OBS},
},
entity.ContainerOutPutPath: {
ContainerPath: "/output",
StorageRelativePath: "/output" + models.CloudbrainTwoDefaultVersion,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
MKDIR: true,
},
entity.ContainerLogPath: {
ContainerPath: "/log",
StorageRelativePath: "/log" + models.CloudbrainTwoDefaultVersion,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
MKDIR: true,
},
},
}
return config
}

func (t CloudbrainTwoTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.HandleReqParameters).
Next(t.CheckPrivilege4Continue).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckWorkerNum).
Next(t.CheckMultiRequest).
Next(t.CheckBranchExists).
Next(t.CheckBootFile).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (g CloudbrainTwoTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError {
c := g.GetMyCluster()
if c == nil {
return response.SYSTEM_ERROR
}
var resourcePools modelarts.ResourcePool
if err := json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
log.Error("Unmarshal error. %v", err)
return response.NewBizError(err)
} else if len(resourcePools.Info) == 0 {
log.Error("UresourcePools.Info is empty. %v", err)
return response.SYSTEM_ERROR
}
form := ctx.Request
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Description: form.Description,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
PoolId: resourcePools.Info[0].ID,
WorkServerNumber: form.WorkServerNumber,
},
},
}
createTime := timeutil.TimeStampNow()
res, err := c.CreateTrainJob(req)
if err != nil {
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err)
return response.NewBizError(err)
}
ctx.Response = &entity.CreationResponse{
JobID: res.JobID,
Status: res.Status,
CreateTime: createTime,
VersionID: res.VersionID,
VersionName: res.VersionName,
}
return nil
}

+ 88
- 103
services/ai_task_service/task/grampus_notebook_task.go View File

@@ -1,19 +1,14 @@
package task

import (
"encoding/json"
"strconv"
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"strings"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/context"
)

@@ -26,53 +21,52 @@ func init() {
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.C2Net,
JobType: models.JobTypeDebug,
Config: GetGrampusNoteBookConfig,
},
}
RegisterTask(models.JobTypeDebug, entity.C2Net, t)
}

func (t GrampusNoteBookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasetSize).
Next(t.CheckDatasetExists).
Next(t.CheckBranchExists).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (g GrampusNoteBookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig {
containerPrefix := ""
func GetGrampusNoteBookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
codePath := "/code"
datasetPath := "/dataset"
pretrainModelPath := "/pretrainmodel"

config := &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
},
}

if opts.ComputeSource == models.NPU || opts.ComputeSource == models.DCU {
return entity.AITaskConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: containerPrefix + codePath,
ContainerPath: codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: containerPrefix + datasetPath,
ContainerPath: datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: containerPrefix + pretrainModelPath,
ContainerPath: pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
@@ -81,39 +75,74 @@ func (g GrampusNoteBookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts)
}

if opts.ComputeSource == models.GCU {
containerPrefix = "/tmp"
config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/tmp" + codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: "/tmp" + datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: "/tmp" + pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
},
}
}

return entity.AITaskConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: containerPrefix + codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: containerPrefix + datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: containerPrefix + pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
},
switch opts.ComputeSource {
case models.NPU:
config.ActionType = models.ActionCreateGrampusNPUDebugTask
case models.GPU:
config.ActionType = models.ActionCreateGrampusGPUDebugTask
case models.GCU:
config.ActionType = models.ActionCreateGrampusGCUDebugTask
case models.MLU:
config.ActionType = models.ActionCreateGrampusMLUDebugTask
case models.DCU:
config.ActionType = models.ActionCreateGrampusDCUDebugTask

}
config.IsActionUseJobId = false
config.DatasetsLimitSizeGB = setting.DebugAttachSize
config.DatasetsMaxNum = setting.MaxDatasetNum
return config
}

func (t GrampusNoteBookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckBranchExists).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (t GrampusNoteBookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.BuildRequest4Restart).
Next(t.CheckOutput4Restart).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckModel).
Next(t.CheckDatasetExists).
Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckDatasets).
Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CallRestartAPI).
@@ -131,17 +160,6 @@ func (t GrampusNoteBookTaskTemplate) Restart(ctx *context.CreationContext) (*ent
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID, Status: ctx.NewCloudbrain.Status}, nil
}

func (c GrampusNoteBookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{
ComputeSource: ctx.Request.ComputeSource.Name,
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest,
}).ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

var autoStopDurationMs = int64(4 * 60 * 60 * 1000)

func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError {
@@ -172,9 +190,6 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex
},
},
}
reqJson, _ := json.Marshal(req)
log.Info("DCU REQ:" + string(reqJson))

createTime := timeutil.TimeStampNow()
res, err := c.CreateNoteBook(req)
if err != nil {
@@ -216,33 +231,3 @@ func (g GrampusNoteBookTaskTemplate) CallRestartAPI(ctx *context.CreationContext
}
return nil
}

func (GrampusNoteBookTaskTemplate) CheckOldJobPath(ctx *context.CreationContext) *response.BizError {
return nil
}

func (GrampusNoteBookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
//todo 需要进一步优化
var actionType models.ActionType
switch req.ComputeSource.Name {
case models.NPU:
actionType = models.ActionCreateGrampusNPUDebugTask
case models.GPU:
actionType = models.ActionCreateGrampusGPUDebugTask
case models.GCU:
actionType = models.ActionCreateGrampusGCUDebugTask
case models.MLU:
actionType = models.ActionCreateGrampusMLUDebugTask
case models.DCU:
actionType = models.ActionCreateGrampusDCUDebugTask
}
task, err := models.GetCloudbrainByCloudbrainID(ctx.NewCloudbrain.ID)
if err != nil {
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return response.NewBizError(err)
}
stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, actionType)
return nil
}

+ 36
- 65
services/ai_task_service/task/grampus_online_infer_task.go View File

@@ -1,17 +1,14 @@
package task

import (
"strconv"
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/cloudbrain/resource"
)
@@ -25,65 +22,69 @@ func init() {
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.C2Net,
JobType: models.JobTypeOnlineInference,
Config: GetGrampusOnlineInferConfig,
},
}
RegisterTask(models.JobTypeOnlineInference, entity.C2Net, t)
}

func (t GrampusOnlineInferTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
log.Info("GrampusOnlineInferTaskTemplate create")
c := &CreateOperator{}
err := c.Next(t.CheckParam).
Next(t.CheckMulti).
Next(t.CheckBootFile).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasetSize).
Next(t.CheckDatasetExists).
Next(t.CheckBranchExists).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (g GrampusOnlineInferTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig {
containerPrefix := ""
func GetGrampusOnlineInferConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
codePath := "/code"
datasetPath := "/dataset"
pretrainModelPath := "/pretrainmodel"
outputPath := "/output"

return entity.AITaskConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
config := &entity.AITaskBaseConfig{
ActionType: models.ActionCreateGrampusGPUOnlineInferTask,
IsActionUseJobId: false,
DatasetsLimitSizeGB: setting.DebugAttachSize,
DatasetsMaxNum: setting.MaxDatasetNum,
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: containerPrefix + codePath,
ContainerPath: codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: containerPrefix + datasetPath,
ContainerPath: datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: containerPrefix + pretrainModelPath,
ContainerPath: pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerOutPutPath: {
ContainerPath: containerPrefix + outputPath,
ContainerPath: outputPath,
AcceptStorageType: []entity.StorageType{entity.MINIO},
MKDIR: false,
},
},
}
return config
}

func (t GrampusOnlineInferTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
log.Info("GrampusOnlineInferTaskTemplate create")
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckBootFile).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).
Next(t.CheckBranchExists).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusNoteBookTask err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
}

func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError {
@@ -129,10 +130,7 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon
res, err := c.CreateOnlineInfer(req)
if err != nil {
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err)
ctx.Response = &entity.CreationResponse{
Error: err,
}
return nil
return response.NewBizError(err)
}
if res.JobID == "" {
log.Error("GrampusNoteBookTask CreateNoteBook failed.Cloudbrain.JobID=%s", ctx.SourceCloudbrain.JobID)
@@ -159,30 +157,3 @@ func (g GrampusOnlineInferTaskTemplate) LoadSpec(ctx *context.CreationContext) *
ctx.Spec = spec
return nil
}

func (c GrampusOnlineInferTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{
ComputeSource: ctx.Request.ComputeSource.Name,
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest,
}).ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

func (c GrampusOnlineInferTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
//todo 需要进一步优化
var actionType models.ActionType
actionType = models.ActionCreateGrampusGPUOnlineInferTask

task, err := models.GetCloudbrainByCloudbrainID(ctx.NewCloudbrain.ID)
if err != nil {
log.Error("GetCloudbrainByJobID failed: %v", err.Error())
return response.NewBizError(err)
}
stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, actionType)
return nil
}

+ 117
- 57
services/ai_task_service/task/grampus_train_task.go View File

@@ -3,11 +3,11 @@ package task
import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"code.gitea.io/gitea/services/ai_task_service/context"
"strings"
)
@@ -21,25 +21,105 @@ func init() {
DefaultAITaskTemplate: DefaultAITaskTemplate{
ClusterType: entity.C2Net,
JobType: models.JobTypeTrain,
Config: GetGrampusTrainTaskConfig,
},
}
RegisterTask(models.JobTypeTrain, entity.C2Net, t)
}

func GetGrampusTrainTaskConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
codePath := "/code"
datasetPath := "/dataset"
pretrainModelPath := "/pretrainmodel"
outputPath := "/output"
var config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/tmp" + codePath,
StorageRelativePath: codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: "/tmp" + datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: "/tmp" + pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerOutPutPath: {
ContainerPath: "/tmp" + outputPath,
StorageRelativePath: cloudbrain.ModelMountPath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.MINIO},
MKDIR: false,
},
},
}

if opts.ComputeSource == models.NPU {
config = &entity.AITaskBaseConfig{
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{
entity.ContainerCode: {
ContainerPath: "/cache" + codePath,
StorageRelativePath: codePath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
},
entity.ContainerDataset: {
ContainerPath: "/cache" + datasetPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerPreTrainModel: {
ContainerPath: "/cache" + pretrainModelPath,
ReadOnly: true,
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS},
},
entity.ContainerOutPutPath: {
ContainerPath: "/cache" + outputPath,
StorageRelativePath: setting.OutPutPath,
ReadOnly: false,
AcceptStorageType: []entity.StorageType{entity.OBS},
},
},
}
}
switch opts.ComputeSource {
case models.NPU:
config.ActionType = models.ActionCreateGrampusNPUTrainTask
case models.GPU:
config.ActionType = models.ActionCreateGrampusGPUTrainTask
case models.GCU:
config.ActionType = models.ActionCreateGrampusGCUTrainTask
}
config.IsActionUseJobId = true
return config
}

func (t GrampusTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) {
c := &CreateOperator{}
err := c.Next(t.CheckParam).
Next(t.CheckMulti).
err := c.Next(t.CheckParamFormat).
Next(t.HandleReqParameters).
Next(t.CheckPrivilege4Continue).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckBranchExists).
Next(t.CheckBootFile).
Next(t.CheckWorkerNum).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.BuildContainerData).
Next(t.CallCreationAPI).
Next(t.AfterCallCreationAPI4Sync).
Next(t.NotifyCreation).
Next(t.CheckDatasets).
Next(t.CheckModel).
Next(t.InsertCloudbrainRecord4Async).
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async).
Operate(ctx)
if err != nil {
log.Error("create GrampusTrainTask err.%v", err)
log.Error("create GrampusTrainTaskTemplate err.%v", err)
return nil, err
}
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil
@@ -51,34 +131,38 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext)
return response.SYSTEM_ERROR
}
form := ctx.Request
imageUrl := strings.TrimSpace(form.ImageUrl)
if form.ImageID != "" {
imageUrl = ""
}
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerData(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
Models: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerData(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: imageUrl,
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
RepoName: ctx.Repository.Name,
WorkServerNumber: ctx.Request.WorkServerNumber,
},
},
TaskConfig: ctx.Config,
}
createTime := timeutil.TimeStampNow()
res, err := c.CreateTrainJob(req)
if err != nil {
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err)
log.Error("GrampusTrainTaskTemplate CreateTrainJob err.req=%+v err=%v", req, err)
return response.NewBizError(err)
ctx.Response = &entity.CreationResponse{
Error: err,
}
return nil
}
ctx.Response = &entity.CreationResponse{
JobID: res.JobID,
@@ -88,17 +172,6 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext)
return nil
}

func (c GrampusTrainTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{
ComputeSource: ctx.Request.ComputeSource.Name,
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest,
}).ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError {
c := g.GetMyCluster()
if c == nil {
@@ -106,7 +179,8 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
}
form := ctx.Request
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
@@ -114,11 +188,11 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerData(entity.ContainerCode),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType),
Models: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerData(entity.ContainerOutPutPath),
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
@@ -127,7 +201,7 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
createTime := timeutil.TimeStampNow()
res, err := c.CreateTrainJob(req)
if err != nil {
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err)
log.Error("GrampusTrainTaskTemplate CallRestartAPI err.req=%+v err=%v", req, err)
return response.NewBizError(err)
}
ctx.Response = &entity.CreationResponse{
@@ -137,17 +211,3 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
}
return nil
}

func (GrampusTrainTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
jobID := ctx.Response.JobID
//todo 需要进一步优化
var actionType models.ActionType
if req.ComputeSource.Name == models.NPU {
actionType = models.ActionCreateGrampusNPUTrainTask
} else if req.ComputeSource.Name == models.GPU {
actionType = models.ActionCreateGrampusGPUTrainTask
}
notification.NotifyOtherTask(ctx.User, ctx.Repository, jobID, req.DisplayJobName, actionType)
return nil
}

+ 17
- 10
services/ai_task_service/task/opt.go View File

@@ -31,6 +31,7 @@ type CreateOperator struct {
FuncArray []CreationFuncNode
}

//添加同步节点
func (o *CreateOperator) Next(f ...CreateFunc) *CreateOperator {
if o.FuncArray == nil {
o.FuncArray = make([]CreationFuncNode, 0)
@@ -39,6 +40,7 @@ func (o *CreateOperator) Next(f ...CreateFunc) *CreateOperator {
return o
}

//添加异步节点
func (o *CreateOperator) AsyncNext(f ...CreateFunc) *CreateOperator {
if o.FuncArray == nil {
o.FuncArray = make([]CreationFuncNode, 0)
@@ -46,6 +48,9 @@ func (o *CreateOperator) AsyncNext(f ...CreateFunc) *CreateOperator {
o.FuncArray = append(o.FuncArray, CreationFuncNode{Funcs: f, IsAsync: true})
return o
}

//添加同步节点,参数的最后一个Fun是异常处理节点,其他的Fun是正常节点
//只有当正常节点返回error时,异常处理节点才会执行
func (o *CreateOperator) NextWithErrFun(f ...CreateFunc) *CreateOperator {
if o.FuncArray == nil {
o.FuncArray = make([]CreationFuncNode, 0)
@@ -64,6 +69,8 @@ func (o *CreateOperator) NextWithErrFun(f ...CreateFunc) *CreateOperator {
return o
}

//添加异步节点,参数的最后一个Fun是异常处理节点,其他的Fun是正常节点
//只有当正常节点返回error时,异常处理节点才会执行
func (o *CreateOperator) AsyncNextWithErrFun(f ...CreateFunc) *CreateOperator {
if o.FuncArray == nil {
o.FuncArray = make([]CreationFuncNode, 0)
@@ -102,6 +109,16 @@ func runFuncNode(node CreationFuncNode, ctx *context.CreationContext) *response.
combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2))
log.Error("PANIC:%v", combinedErr)
}
if err != nil && node.ErrFunc != nil {
ctx.Response = &entity.CreationResponse{
Error: errors.New(err.DefaultMsg),
}
newErr := node.ErrFunc(ctx)
if newErr != nil {
log.Error("runFuncNode ErrFunc error.%v", err)
return
}
}
}()
for _, f := range node.Funcs {
err = f(ctx)
@@ -110,15 +127,5 @@ func runFuncNode(node CreationFuncNode, ctx *context.CreationContext) *response.
break
}
}
if err != nil && node.ErrFunc != nil {
ctx.Response = &entity.CreationResponse{
Error: errors.New(err.DefaultMsg),
}
newErr := node.ErrFunc(ctx)
if newErr != nil {
log.Error("runFuncNode ErrFunc error.%v", err)
return err
}
}
return err
}

+ 158
- 130
services/ai_task_service/task/opt_handler.go View File

@@ -3,7 +3,7 @@ package task
import (
"encoding/json"
"fmt"
"os"
"path"
"strings"

"code.gitea.io/gitea/entity"
@@ -21,18 +21,18 @@ import (

type CreationHandler interface {
BuildRequest4Restart(ctx *context.CreationContext) *response.BizError
CheckParam(ctx *context.CreationContext) *response.BizError
CheckMulti(ctx *context.CreationContext) *response.BizError
CheckParamFormat(ctx *context.CreationContext) *response.BizError
CheckPrivilege4Continue(ctx *context.CreationContext) *response.BizError
HandleReqParameters(ctx *context.CreationContext) *response.BizError
CheckMultiRequest(ctx *context.CreationContext) *response.BizError
CheckDisplayJobName(ctx *context.CreationContext) *response.BizError
LoadSpec(ctx *context.CreationContext) *response.BizError
CheckPointBalance(ctx *context.CreationContext) *response.BizError
CheckDatasetExists(ctx *context.CreationContext) *response.BizError
CheckDatasetSize(ctx *context.CreationContext) *response.BizError
CheckDatasets(ctx *context.CreationContext) *response.BizError
CheckBranchExists(ctx *context.CreationContext) *response.BizError
CheckModel(ctx *context.CreationContext) *response.BizError
CheckBootFile(ctx *context.CreationContext) *response.BizError
CheckIsCleared(ctx *context.CreationContext) *response.BizError
CheckOutput4Restart(ctx *context.CreationContext) *response.BizError
CheckSourceTaskIsCleared(ctx *context.CreationContext) *response.BizError
BuildContainerData(ctx *context.CreationContext) *response.BizError
InsertCloudbrainRecord4Async(ctx *context.CreationContext) *response.BizError
CallCreationAPI(ctx *context.CreationContext) *response.BizError
@@ -48,6 +48,10 @@ type CreationHandler interface {
type DefaultCreationHandler struct {
}

func (g DefaultCreationHandler) BuildContainerData(ctx *context.CreationContext) *response.BizError {
return nil
}

func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext) *response.BizError {
task := ctx.SourceCloudbrain
if task == nil {
@@ -65,10 +69,6 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext)
log.Error("GetCloudbrainSpec err. %v", err)
return response.SPEC_NOT_AVAILABLE
}
computeSourceStr := ""
if c := models.GetComputeSourceInstance(task.ComputeResource); c != nil {
computeSourceStr = c.Name
}
imageUrl := task.Image
imageName := task.Image
imageId := task.ImageID
@@ -85,7 +85,7 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext)
DisplayJobName: task.DisplayJobName,
JobName: task.JobName,
SpecId: oldSpec.ID,
ComputeSourceStr: computeSourceStr,
ComputeSourceStr: task.GetStandardComputeSource(),
Cluster: entity.GetClusterTypeFromCloudbrainType(task.Type),
WorkServerNumber: task.WorkServerNumber,
BranchName: task.BranchName,
@@ -111,29 +111,48 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext)
return nil
}

func (DefaultCreationHandler) CheckDatasetExists(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckDataset.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
func (g DefaultCreationHandler) CheckDatasets(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckDatasets.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
datasetUUIDStr := ctx.Request.DatasetUUIDStr
if datasetUUIDStr == "" {
return nil
}
//check datasets num
uuids := strings.Split(datasetUUIDStr, ";")
attachs, _ := models.GetAttachmentsByUUIDs(uuids)
if ctx.Config.DatasetsMaxNum > 0 && len(uuids) > setting.MaxDatasetNum {
log.Error("the dataset count(%d) exceed the limit", len(uuids))
return response.DATASET_NUMBER_OVER_LIMIT
}

datasetInfos, err := models.GetDatasetInfo4AITask(ctx.Request.DatasetUUIDStr)
if err != nil {
log.Error("GetDatasetInfo failed: %v", err)
return response.SYSTEM_ERROR
}

if len(attachs) < len(uuids) {
if len(datasetInfos) < len(uuids) {
log.Info("CheckDataset hasDatasetDeleted.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return response.DATASET_NOT_EXISTS
}

//check datasets size
var attachSize int64
for _, infos := range datasetInfos {
attachSize += infos.Size
}
limitSizeGB := ctx.Config.DatasetsLimitSizeGB
if limitSizeGB > 0 && attachSize > int64(limitSizeGB*1000*1000*1000) {
log.Error("The DatasetSize exceeds the limit (%dGB)", limitSizeGB) // GB
return response.DATASET_SIZE_OVER_LIMIT.WithParams(limitSizeGB)
}

var datasetNames string
for i := 0; i < len(uuids); i++ {
for j := 0; j < len(attachs); j++ {
if uuids[i] == attachs[j].UUID {
datasetNames += attachs[j].Name + ";"
}
}
attach := datasetInfos[uuids[i]]
datasetNames += attach.Compressed.Name + ";"
}
ctx.Request.DatasetNames = strings.TrimSuffix(datasetNames, ";")
log.Info("CheckDataset success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
log.Info("CheckDatasets success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return nil
}

@@ -149,29 +168,6 @@ func (DefaultCreationHandler) CheckBranchExists(ctx *context.CreationContext) *r
return nil
}

func (DefaultCreationHandler) CheckDatasetSize(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckDatasetSize.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
if ctx.Request.DatasetUUIDStr == "" {
return nil
}
datasetInfos, _, err := models.GetDatasetInfo(ctx.Request.DatasetUUIDStr, ctx.Request.ComputeSource.Name)
if err != nil {
log.Error("GetDatasetInfo failed: %v", err)
return response.SYSTEM_ERROR
}
var attachSize int64
for _, infos := range datasetInfos {
attachSize += infos.Size
}
limitSize := ctx.AITaskConfig.DatasetMaxSize
if limitSize > 0 && attachSize > int64(limitSize) {
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB
return response.DATASET_SIZE_OVER_LIMIT.WithParams(setting.DebugAttachSize)
}
log.Info("CheckDatasetSize success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return nil
}

func (DefaultCreationHandler) CheckModel(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckModel.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
if hasModelNumOverLimit(ctx.Request.PretrainModelCkptName) { //检查模型数量是否超出限制
@@ -186,72 +182,19 @@ func (DefaultCreationHandler) CheckModel(ctx *context.CreationContext) *response
return nil
}

func (DefaultCreationHandler) CheckBootFile(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
branch := req.BranchName
if req.BootFile == "" {
return response.PARAM_ERROR
}
if !strings.HasSuffix(strings.TrimSpace(req.BootFile), ".py") {
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(req.BootFile))
return response.BOOT_FILE_MUST_BE_PYTHON
}
if branch == "" {
branch = ctx.Repository.DefaultBranch
}
commit, err := ctx.GitRepo.GetBranchCommit(branch)
if err != nil {
log.Error("CheckBootFile GetBranchCommit error,repoId:=%d err=%v", ctx.Repository.ID, err)
return response.BOOT_FILE_NOT_EXIST
}
if _, err := commit.GetTreeEntryByPath(req.BootFile); err != nil {
log.Error("CheckBootFile GetTreeEntryByPath error,repoId:=%d BootFile=%s err=%v", ctx.Repository.ID, req.BootFile, err)
return response.BOOT_FILE_NOT_EXIST
}
return nil
}

func (DefaultCreationHandler) CheckIsCleared(ctx *context.CreationContext) *response.BizError {
if ctx.SourceCloudbrain.Cleared {
return response.RESULT_CLEARD
}
return nil
}

func (DefaultCreationHandler) CheckOutput4Restart(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckOutput4Restart.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
func (DefaultCreationHandler) CheckSourceTaskIsCleared(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckSourceTaskIsCleared.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
task := ctx.SourceCloudbrain
if task == nil {
return nil
}
if task.Cleared {
return response.RESULT_CLEARD
}
if !(task.IsNPUTask() || task.IsDCUTask()) {
if _, err := os.Stat(getTaskOldJobPath(task)); err != nil {
log.Error("Can not find job minio path.displayJobName=%s jobType=%s cluster=%s err=%v", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster, err)
return response.RESULT_CLEARD
}
}
log.Info("CheckOutput4Restart success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)

log.Info("CheckSourceTaskIsCleared success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return nil
}

func getTaskOldJobPath(task *models.Cloudbrain) string {
var path string
if !task.IsNPUTask() {
path = setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + task.JobName
}
return path
}

func hasDatasetDeleted(datasetUUIDStr string) bool {
if datasetUUIDStr == "" {
return false
}
uuids := strings.Split(datasetUUIDStr, ";")
attachs, _ := models.GetAttachmentsByUUIDs(uuids)
return len(attachs) < len(uuids)
}

func hasModelFileDeleted(modelId, pretrainModelCkptName string) bool {
if modelId == "" {
return false
@@ -273,9 +216,53 @@ func hasModelNumOverLimit(pretrainModelCkptName string) bool {
return false
}

func (DefaultCreationHandler) CheckParam(ctx *context.CreationContext) *response.BizError {
func (DefaultCreationHandler) CheckParamFormat(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
log.Info("Start to CheckParam.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster)

c := models.GetComputeSourceInstance(req.ComputeSourceStr)
if c == nil {
log.Error("ComputeSourceStr invalid")
return response.PARAM_ERROR
}
ctx.Request.ComputeSource = c
ctx.Request.BootFile = strings.TrimSpace(ctx.Request.BootFile)

log.Info("CheckParam success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster)
return nil
}

func (DefaultCreationHandler) CheckPrivilege4Continue(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
//继续训练或者创建新版本时需要校验对旧云脑任务的权限
if !ctx.Request.IsContinueRequest {
return nil
}
log.Info("Start to CheckPrivilege4Continue.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster)
oldCloudbrainId := req.SourceCloudbrainId
if oldCloudbrainId <= 0 {
return response.PARAM_ERROR
}
oldCloudbrain, err := models.GetCloudbrainByCloudbrainID(oldCloudbrainId)
if err != nil {
log.Error("CheckPrivilege4NewVersion get old cloudbrain task error.oldCloudbrainId=%d err=%v", oldCloudbrainId, err)
if models.IsErrRecordNotExist(err) {
return response.PARAM_ERROR
}
return response.SYSTEM_ERROR
}
ctx.SourceCloudbrain = oldCloudbrain

if oldCloudbrain.UserID != ctx.User.ID && !ctx.User.IsAdmin {
return response.INSUFFICIENT_PERMISSION
}

log.Info("CheckPrivilege4Continue success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster)
return nil
}

func (DefaultCreationHandler) HandleReqParameters(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
var parameters models.Parameters
if req.Params != "" {
err := json.Unmarshal([]byte(req.Params), &parameters)
@@ -283,22 +270,46 @@ func (DefaultCreationHandler) CheckParam(ctx *context.CreationContext) *response
log.Error("Failed to Unmarshal params: %s (%v)", req.Params, err)
return response.PARAM_ERROR
}
// label去掉所有的空格,value去掉首位的空格
for i := 0; i < len(parameters.Parameter); i++ {
parameters.Parameter[i].Label = strings.ReplaceAll(parameters.Parameter[i].Label, " ", "")
parameters.Parameter[i].Value = strings.TrimSpace(parameters.Parameter[i].Value)
}
ctx.Request.ParamArray = parameters
p, err := json.Marshal(parameters)
if err == nil {
ctx.Request.Params = string(p)
}
}
c := models.GetComputeSourceInstance(req.ComputeSourceStr)
if c == nil {
log.Error("ComputeSourceStr invalid")
return nil
}

func (DefaultCreationHandler) CheckBootFile(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
branch := req.BranchName
if req.BootFile == "" {
return response.PARAM_ERROR
}
ctx.Request.ComputeSource = c

//todo 校验模型,数据集是否存在
log.Info("CheckParam success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster)

if !strings.HasSuffix(strings.TrimSpace(req.BootFile), ".py") {
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(req.BootFile))
return response.BOOT_FILE_MUST_BE_PYTHON
}
if branch == "" {
branch = ctx.Repository.DefaultBranch
}
commit, err := ctx.GitRepo.GetBranchCommit(branch)
if err != nil {
log.Error("CheckBootFile GetBranchCommit error,repoId:=%d err=%v", ctx.Repository.ID, err)
return response.BOOT_FILE_NOT_EXIST
}
if _, err := commit.GetTreeEntryByPath(req.BootFile); err != nil {
log.Error("CheckBootFile GetTreeEntryByPath error,repoId:=%d BootFile=%s err=%v", ctx.Repository.ID, req.BootFile, err)
return response.BOOT_FILE_NOT_EXIST
}
return nil
}

func (DefaultCreationHandler) CheckMulti(ctx *context.CreationContext) *response.BizError {
func (DefaultCreationHandler) CheckMultiRequest(ctx *context.CreationContext) *response.BizError {
jobType := string(ctx.Request.JobType)
log.Info("Start to CheckMulti success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)

@@ -357,18 +368,14 @@ func (DefaultCreationHandler) LoadSpec(ctx *context.CreationContext) *response.B
return nil
}

func (DefaultCreationHandler) BuildContainerData(ctx *context.CreationContext) *response.BizError {
log.Error("BuildContainerData not implements")
return response.SYSTEM_ERROR
}

func (DefaultCreationHandler) InsertCloudbrainRecord4Async(ctx *context.CreationContext) *response.BizError {
log.Info("Start to InsertCloudbrainRecord4Async.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)

req := ctx.Request

imageUrl := req.ImageUrl
if req.ImageUrl == "" && req.ImageName != "" {
req.ImageUrl = req.ImageName
imageUrl = req.ImageName
}
taskType := req.Cluster.GetCloudbrainType()
if taskType == models.TypeCloudBrainTwo && setting.ModelartsCD.Enabled {
@@ -391,15 +398,16 @@ func (DefaultCreationHandler) InsertCloudbrainRecord4Async(ctx *context.Creation
DatasetName: req.DatasetNames,
CommitID: ctx.CommitID,
IsLatestVersion: "1",
VersionCount: 1,
ComputeResource: req.ComputeSource.GetCloudbrainFormat(),
ImageID: req.ImageID,
Image: req.ImageUrl,
Image: imageUrl,
BranchName: branchName,
Parameters: req.Params,
BootFile: req.BootFile,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
EngineName: req.ImageUrl,
EngineName: imageUrl,
Spec: ctx.Spec,
ModelName: req.PretrainModelName,
ModelVersion: req.PretrainModelVersion,
@@ -435,8 +443,9 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon
return response.NewBizError(res.Error)
}

imageUrl := req.ImageUrl
if req.ImageUrl == "" && req.ImageName != "" {
req.ImageUrl = req.ImageName
imageUrl = req.ImageName
}
taskType := req.Cluster.GetCloudbrainType()
if taskType == models.TypeCloudBrainTwo && setting.ModelartsCD.Enabled {
@@ -455,13 +464,13 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon
IsLatestVersion: "1",
ComputeResource: req.ComputeSource.GetCloudbrainFormat(),
ImageID: req.ImageID,
Image: req.ImageUrl,
Image: imageUrl,
BranchName: req.BranchName,
Parameters: req.Params,
BootFile: req.BootFile,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
EngineName: req.ImageUrl,
EngineName: imageUrl,
Spec: ctx.Spec,
ModelName: req.PretrainModelName,
ModelVersion: req.PretrainModelVersion,
@@ -475,6 +484,13 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon
CreatedUnix: res.CreateTime,
UpdatedUnix: res.CreateTime,
GpuQueue: ctx.Spec.QueueCode,
Config: ctx.BuildCloudbrainConfig(),
}

config := ctx.BuildCloudbrainConfig()
if config != nil {
c.TrainUrl = path.Join("/", config.OutputBucket, config.OutputObjectPrefix)
c.LogUrl = path.Join("/", config.LogBucket, config.LogObjectPrefix)
}
err := models.CreateCloudbrain(c)

@@ -507,7 +523,14 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Async(ctx *context.CreationCo
c.CreatedUnix = res.CreateTime
c.UpdatedUnix = res.CreateTime
c.DatasetName = ctx.Request.DatasetNames
c.VersionName = res.VersionName
c.VersionID = res.VersionID

config := ctx.BuildCloudbrainConfig()
if config != nil {
c.TrainUrl = path.Join("/", config.OutputBucket, config.OutputObjectPrefix)
c.LogUrl = path.Join("/", config.LogBucket, config.LogObjectPrefix)
}
err := models.UpdateJob(c)
if err != nil {
log.Error("AfterCallCreationAPI4Async UpdateJob err.displayJobName=%s jobType=%s cluster=%s err=%v", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster, err)
@@ -515,6 +538,12 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Async(ctx *context.CreationCo
}
log.Info("AfterCallCreationAPI4Async success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)

//更新cloudbrain_config表
config.CloudbrainID = c.ID
_, err = models.InsertCloudbrainConfig(config)
if err != nil {
log.Error("InsertCloudbrainConfig error,config=%+v err=%v", config, err)
}
return nil

}
@@ -530,7 +559,6 @@ func (DefaultCreationHandler) CreateCloudbrainRecord4Restart(ctx *context.Creati
return response.RESTART_FAILED
}
req := ctx.Request

c := &models.Cloudbrain{
Status: TransAITaskStatus(res.Status),
UserID: ctx.SourceCloudbrain.UserID,
@@ -577,10 +605,6 @@ func (DefaultCreationHandler) CreateCloudbrainRecord4Restart(ctx *context.Creati
return nil
}

func (DefaultCreationHandler) NotifyCreation(ctx *context.CreationContext) *response.BizError {
return nil
}

func TransAITaskStatus(oldStatus string) string {
switch oldStatus {
case models.GrampusStatusPending:
@@ -655,3 +679,7 @@ func (DefaultCreationHandler) HandleErr4Async(ctx *context.CreationContext) *res
log.Info("HandleErr4Async success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
return nil
}

func (g DefaultCreationHandler) NotifyCreation(ctx *context.CreationContext) *response.BizError {
return nil
}

+ 265
- 60
services/ai_task_service/task/task_base.go View File

@@ -4,15 +4,20 @@ import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/convert"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/redis/redis_key"
"code.gitea.io/gitea/modules/redis/redis_lock"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/cluster"
"code.gitea.io/gitea/services/ai_task_service/container_builder"
"code.gitea.io/gitea/services/ai_task_service/context"
"code.gitea.io/gitea/services/cloudbrain/resource"
"errors"
"strconv"
"time"
)

@@ -43,20 +48,40 @@ type AITaskTemplate interface {
BriefQuery(cloudbrainId int64) (*entity.AITaskBriefInfo, *response.BizError)
Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError)
Update(cloudbrainId int64) *response.BizError
GetLog(cloudbrainId int64) (*entity.ClusterLog, *response.BizError)
GetOutput(cloudbrainId int64) *response.BizError
GetLog(opts entity.QueryLogOpts) (*entity.ClusterLog, *response.BizError)
GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError)
GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError)
GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError)
GetOutput(cloudbrainId int64, parentDir string) (*entity.AITaskOutput, *response.BizError)
GetAllOutput(opts entity.GetAllOutputReq) (*entity.AllAITaskOutput, *response.BizError)
GetDebugUrl(cloudbrainId int64, fileName ...string) (string, *response.BizError)
GetOperationProfile(cloudbrainId int64) (*entity.OperationProfile, *response.BizError)
GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError)
GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError)
GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError)
GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig
GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig
GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError)
GetAllowedWorkerNum(userId int64, computeSource *models.ComputeSource) ([]int, *response.BizError)
}

type GetConfigFunc func(entity.AITaskConfigKey) *entity.AITaskBaseConfig

type DefaultAITaskTemplate struct {
DefaultCreationHandler
ClusterType entity.ClusterType
JobType models.JobType
aiTaskConfig entity.AITaskConfig
ClusterType entity.ClusterType
JobType models.JobType
Config GetConfigFunc
}

func (g DefaultAITaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError {
if ctx.Config == nil {
return nil
}
err := container_builder.BuildContainerDataChain(ctx.Config.ContainerSteps).Run(ctx)
if err != nil {
return err
}
return nil
}

func (g DefaultAITaskTemplate) GetMyCluster() cluster.ClusterAdapter {
@@ -68,53 +93,81 @@ func (g DefaultAITaskTemplate) GetMyCluster() cluster.ClusterAdapter {
return c
}

func (g DefaultAITaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig {
return entity.AITaskConfig{}
}
func (g DefaultAITaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError {
req := ctx.Request
jobID := ctx.Response.JobID
config := ctx.Config
user := ctx.User
repo := ctx.Repository
displayJobName := req.DisplayJobName

func (d DefaultAITaskTemplate) GetDatasetPath(ctx *context.CreationContext) string {
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps
if config == nil {
return ""
}
opt := config[entity.ContainerDataset]
if opt == nil {
return ""
if config.IsActionUseJobId {
notification.NotifyOtherTask(user, repo, jobID, displayJobName, config.ActionType)
} else {
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("NotifyCreation GetCloudbrainByJobID failed: %v", err.Error())
return nil
}
stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(user, repo, stringId, displayJobName, config.ActionType)
}
return opt.ContainerPath

return nil
}
func (d DefaultAITaskTemplate) GetCodePath(ctx *context.CreationContext) string {
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps
if config == nil {
return ""
func (g DefaultAITaskTemplate) GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig {
if g.Config == nil {
return &entity.AITaskBaseConfig{}
}
opt := config[entity.ContainerCode]
if opt == nil {
return ""
c := g.Config(opts)
if c == nil {
return &entity.AITaskBaseConfig{}
}
return opt.ContainerPath
return c
}
func (d DefaultAITaskTemplate) GetPretrainModelPath(ctx *context.CreationContext) string {
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps
if config == nil {
return ""

func (g DefaultAITaskTemplate) GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId)
return nil, response.SYSTEM_ERROR
}
opt := config[entity.ContainerPreTrainModel]
if opt == nil {
return ""
res, err := GetAITaskNodeInfo(cloudbrainId, c.GetNodeInfo)
if err != nil {
log.Error("GetNodeInfo error,cloudbrainId=%d err=%v", cloudbrainId, err)
return nil, response.NewBizError(err)
}
return opt.ContainerPath
log.Info("GetNodeInfo success.cloudbrainId=%d", cloudbrainId)
return res, nil
}
func (d DefaultAITaskTemplate) GetOutputPath(ctx *context.CreationContext) string {
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps
if config == nil {
return ""

var GrampusNPUMultiNodeConfig *modelarts.MultiNodes
var CloudbrainTwoNPUMultiNodeConfig *modelarts.MultiNodes

func (g DefaultAITaskTemplate) GetAllowedWorkerNum(userId int64, computeSource *models.ComputeSource) ([]int, *response.BizError) {
if g.JobType == models.JobTypeTrain && g.ClusterType == entity.OpenICloudbrainTwo && computeSource.Name == models.NPU {
modelarts.InitMultiNode()
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
return info.Node, nil
}
}
}
}
opt := config[entity.ContainerOutPutPath]
if opt == nil {
return ""
if g.JobType == models.JobTypeTrain && g.ClusterType == entity.C2Net && computeSource.Name == models.NPU {
grampus.InitMultiNode()
if grampus.MultiNodeConfig != nil {
for _, info := range grampus.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
return info.Node, nil
}
}
}
}
return opt.ContainerPath
//未命中配置则只允许一个节点
return []int{1}, nil
}

func (g DefaultAITaskTemplate) Query(cloudbrainId int64) (*entity.AITaskDetailInfo, *response.BizError) {
@@ -151,7 +204,13 @@ func (g DefaultAITaskTemplate) Delete(cloudbrainId int64) *response.BizError {
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId)
return response.SYSTEM_ERROR
}
err := DelTask(cloudbrainId, c.DeleteNoteBook)
var err error
if g.JobType == models.JobTypeDebug {
err = DelTask(cloudbrainId, c.DeleteNoteBook)
} else {
err = DelTask(cloudbrainId, c.DeleteTrainJob)
}

if err != nil {
log.Error("DelTask error,cloudbrainId=%d err=%v", cloudbrainId, err)
return response.NewBizError(err)
@@ -166,7 +225,13 @@ func (g DefaultAITaskTemplate) Stop(cloudbrainId int64) (*entity.AITaskBriefInfo
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId)
return nil, response.SYSTEM_ERROR
}
err := StopTask(cloudbrainId, c.StopNoteBook)
var err error
if g.JobType == models.JobTypeDebug {
err = StopTask(cloudbrainId, c.StopNoteBook)
} else {
err = StopTask(cloudbrainId, c.StopTrainJob)
}

if err != nil {
log.Error("StopTask err.cloudbrainId=%d err=%v", cloudbrainId, err)
return nil, response.NewBizError(err)
@@ -215,7 +280,12 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError {
//二是处于PREPARING的时间超过了配置的等待时间,此时意味着异步创建任务时间过长或者出现了未知异常
if cloudbrain.NeedActiveStop() {
log.Info("AI task should active stop.cloudbrainId=%d", cloudbrainId)
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryNoteBookByJobName, c.StopNoteBook)
if g.JobType == models.JobTypeDebug {
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryNoteBookByJobName, c.StopNoteBook)
} else {
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryTrainJobByJobName, c.StopTrainJob)
}

if err != nil {
log.Error("StopAITaskByJobNameFromRemote err.cloudbrainId=%d err=%v", cloudbrainId, err)
return response.NewBizError(err)
@@ -228,8 +298,12 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError {
log.Info("AI task is preparing.No need to update from remote.cloudbrainId=%d", cloudbrainId)
return nil
}
if g.JobType == models.JobTypeDebug {
err = UpdateAITaskFromRemote(cloudbrain, c.QueryNoteBook)
} else {
err = UpdateAITaskFromRemote(cloudbrain, c.QueryTrainJob)
}

err = UpdateAITaskFromRemote(cloudbrain, c.QueryNoteBook)
if err != nil {
log.Error("UpdateAITaskFromRemote err.cloudbrainId=%d err=%v", cloudbrainId, err)
return response.NewBizError(err)
@@ -238,12 +312,92 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError {
return nil
}

func (g DefaultAITaskTemplate) GetLog(cloudbrainId int64) (*entity.ClusterLog, *response.BizError) {
return nil, nil
func (g DefaultAITaskTemplate) GetLog(opts entity.QueryLogOpts) (*entity.ClusterLog, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", opts)
return nil, response.SYSTEM_ERROR
}
s, err := QueryTaskLog(opts, c.GetLog)
if err != nil {
log.Error("GetLog err.cloudbrainId=%d err =%v", opts, err)
return &entity.ClusterLog{}, nil
}

return s, nil
}

func (g DefaultAITaskTemplate) GetOutput(cloudbrainId int64) *response.BizError {
return nil
func (g DefaultAITaskTemplate) GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", opts)
return nil, response.SYSTEM_ERROR
}
s, err := GetLogDownloadInfo(opts, c.GetLogDownloadInfo)
if err != nil {
log.Error("GetLog err.cloudbrainId=%d ", opts)
return nil, nil
}

return s, nil
}

func (g DefaultAITaskTemplate) GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", opts)
return nil, response.SYSTEM_ERROR
}
s, err := GetSingleOutputDownloadInfo(opts, c.GetSingleOutputDownloadInfo)
if err != nil {
log.Error("GetOutputDownloadInfo err.cloudbrainId=%d ", opts)
return nil, nil
}

return s, nil
}

func (g DefaultAITaskTemplate) GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", opts)
return nil, response.SYSTEM_ERROR
}
s, err := GetAllOutputDownloadInfo(opts, c.GetAllOutputDownloadInfo)
if err != nil {
log.Error("GetOutputDownloadInfo err.cloudbrainId=%d ", opts)
return nil, nil
}

return s, nil
}

func (g DefaultAITaskTemplate) GetOutput(cloudbrainId int64, parentDir string) (*entity.AITaskOutput, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId)
return nil, response.SYSTEM_ERROR
}
s, err := GetAITaskOutput(cloudbrainId, parentDir, c.GetOutput)
if err != nil {
log.Error("GetOutput err.cloudbrainId=%d err =%v", cloudbrainId, err)
return nil, nil
}
return s, nil
}

func (g DefaultAITaskTemplate) GetAllOutput(opts entity.GetAllOutputReq) (*entity.AllAITaskOutput, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,cloudbrainId=%d", opts)
return nil, response.SYSTEM_ERROR
}
s, err := GetAllAITaskOutput(opts, c.GetAllOutput)
if err != nil {
log.Error("GetOutput err.cloudbrainId=%d err =%v", opts, err)
return nil, nil
}
return s, nil
}

func (g DefaultAITaskTemplate) GetDebugUrl(cloudbrainId int64, fileName ...string) (string, *response.BizError) {
@@ -270,32 +424,60 @@ func (g DefaultAITaskTemplate) GetOperationProfile(cloudbrainId int64) (*entity.
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId)
return nil, response.SYSTEM_ERROR
}
s, err := GetOperationProfile(cloudbrainId, c.GetNoteBookOperationProfile)
var s *entity.OperationProfile
var err error
if g.JobType == models.JobTypeDebug {
s, err = GetOperationProfile(cloudbrainId, c.GetNoteBookOperationProfile)
} else {
s, err = GetOperationProfile(cloudbrainId, c.GetTrainJobOperationProfile)
}
if err != nil {
log.Error("QueryNoteBookUrl err.cloudbrainId=%d err =%v", cloudbrainId, err)
log.Error("GetOperationProfile err.cloudbrainId=%d err =%v", cloudbrainId, err)
return nil, nil
}
if s == nil {
s = &entity.OperationProfile{Events: []entity.ProfileEvent{}}
}
return s, nil
}

func (g DefaultAITaskTemplate) GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed,opts=%+v", opts)
return nil, response.SYSTEM_ERROR
}
res, err := GetResourceUsage(opts, c.GetResourceUsage)
if err != nil {
log.Error("GetOperationProfile err.opts=%+v err =%v", opts, err)
return nil, nil
}
return res, nil
}

func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed")
return nil, false, response.SYSTEM_ERROR
}
l, f, err := c.GetImages(entity.GetImageReq{
ComputeSource: computeSource,
JobType: g.JobType,
})

var images []entity.ClusterImage
var customFlag bool
var err error
if g.JobType == models.JobTypeDebug {
images, customFlag, err = c.GetNotebookImages(entity.GetImageReq{
ComputeSource: computeSource,
JobType: g.JobType,
})
} else {
images, customFlag, err = c.GetTrainImages(entity.GetImageReq{
ComputeSource: computeSource,
JobType: g.JobType,
})
}
if err != nil {
log.Error("GetImages err.computeSource=%s err =%v", computeSource.Name, err)
return nil, false, response.NewBizError(err)
}
return l, f, nil
return images, customFlag, nil
}

func (g DefaultAITaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) {
@@ -314,3 +496,26 @@ func (g DefaultAITaskTemplate) GetSpecs(userId int64, computeSource models.Compu
}
return r, nil
}

func (g DefaultAITaskTemplate) CheckWorkerNum(ctx *context.CreationContext) *response.BizError {
log.Info("Start to CheckMultiNode.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster)
serverNum := ctx.Request.WorkServerNumber
if serverNum <= 1 {
return nil
}
workerNums, _ := g.GetAllowedWorkerNum(ctx.User.ID, ctx.Request.ComputeSource)
if !isInNodes(workerNums, serverNum) {
return response.NO_NODE_RIGHR
}
return nil
}

func isInNodes(nodes []int, num int) bool {
for _, node := range nodes {
if node == num {
return true
}
}
return false

}

+ 50
- 6
services/ai_task_service/task/task_config.go View File

@@ -3,16 +3,60 @@ package task
import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"path"
"strings"
)

func GetAITaskConfigByCloudbrain(cloudbrain *models.Cloudbrain) entity.AITaskConfig {
func GetDetailConfigInfoByCloudbrain(cloudbrain *models.Cloudbrain) *entity.AITaskDetailConfigInfo {
aiConfig := cloudbrain.GetCloudbrainConfig()
if aiConfig != nil {
return entity.BuildAITaskDetailConfigInfo(aiConfig)
}
return getHistoricalConfigInfo(cloudbrain)
}

//历史任务在cloudbrain_config表中没有对应的记录,因此根据实时配置模拟
func getHistoricalConfigInfo(cloudbrain *models.Cloudbrain) *entity.AITaskDetailConfigInfo {
t, _ := GetAITaskTemplateFromCloudbrain(cloudbrain)
if t == nil {
return entity.AITaskConfig{}
return &entity.AITaskDetailConfigInfo{}
}
c := t.GetConfig(entity.AITaskConfigKey{ComputeSource: cloudbrain.GetStandardComputeSource()})
return &entity.AITaskDetailConfigInfo{
BaseConfig: c,
OutputObjectPrefix: GetContainerStorageObjectPrefix(c, cloudbrain.JobName, cloudbrain.VersionName, entity.ContainerOutPutPath),
OutputStorageType: GetContainerStorageType(c, entity.ContainerOutPutPath),
LogObjectPrefix: GetContainerStorageObjectPrefix(c, cloudbrain.JobName, cloudbrain.VersionName, entity.ContainerLogPath),
LogStorageType: GetContainerStorageType(c, entity.ContainerLogPath),
}
}

func GetContainerStorageObjectPrefix(c *entity.AITaskBaseConfig, jobName string, versionName string, containerType entity.ContainerDataType) string {
config := c.GetContainerConfig(containerType)
if config == nil {
return ""
}
st := config.AcceptStorageType
if st == nil && len(st) == 0 {
return ""
}
uploader := storage_helper.SelectUploaderFromStorageType(st[0])
//兼容历史任务所以加上了versionName,另外云脑二训练任务为了适配modelarts接口加上了默认版本,此时要剔除
localPath := config.GetLocalPath()
localPath = strings.TrimSuffix(localPath, models.CloudbrainTwoDefaultVersion)
objectKey := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), localPath, versionName)
return objectKey
}

func GetContainerStorageType(c *entity.AITaskBaseConfig, containerType entity.ContainerDataType) entity.StorageType {
outputConfig := c.GetContainerConfig(containerType)
if outputConfig == nil {
return ""
}
computeSource := models.GetComputeSourceInstance(cloudbrain.ComputeResource)
if computeSource == nil {
return entity.AITaskConfig{}
st := outputConfig.AcceptStorageType
if st == nil && len(st) == 0 {
return ""
}
return t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: computeSource.Name})
return st[0]
}

+ 16
- 8
services/ai_task_service/task/task_creation_info.go View File

@@ -28,14 +28,17 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio
}

//获取代码分支
if branches, _, err := req.GitRepo.GetBranches(0, 0); err == nil {
result.Branches = branches
if len(req.Repo.DefaultBranch) > 0 && req.GitRepo.IsBranchExist(req.Repo.DefaultBranch) {
result.DefaultBranch = req.Repo.DefaultBranch
} else if len(branches) > 0 {
result.DefaultBranch = branches[0]
if req.GitRepo != nil {
if branches, _, err := req.GitRepo.GetBranches(0, 0); err == nil {
result.Branches = branches
if len(req.Repo.DefaultBranch) > 0 && req.GitRepo.IsBranchExist(req.Repo.DefaultBranch) {
result.DefaultBranch = req.Repo.DefaultBranch
} else if len(branches) > 0 {
result.DefaultBranch = branches[0]
}
}
}

//查询积分余额
if a, err := account.GetAccount(req.User.ID); err == nil {
result.PointAccount = entity.ParsePointAccountInfo(a)
@@ -58,9 +61,14 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio
if specs, err := t.GetSpecs(req.User.ID, *req.ComputeSource); err == nil {
result.Specs = specs
}
c := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: req.ComputeSource.Name})
result.Config = entity.AITaskCreationConfig{
DatasetMaxSize: c.DatasetMaxSize,
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
}
//查询可用节点数
if workerNums, err := t.GetAllowedWorkerNum(req.User.ID, req.ComputeSource); err == nil {
result.AllowedWorkerNum = workerNums
} else {
result.AllowedWorkerNum = []int{1}
}
return result, nil
}

+ 195
- 0
services/ai_task_service/task/task_extend.go View File

@@ -0,0 +1,195 @@
package task

import (
"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"encoding/json"
"fmt"
"strings"
)

func GetModelDownload(task *models.Cloudbrain) []*models.ModelDownload {
var repositoryLink string
pretrainModelList := []*models.ModelDownload{}
ckptNames := strings.Split(task.CkptName, ";")
var model *models.AiModelManage
var err error
if task.ModelId == "" {
model, err = models.QueryModelByPath(task.PreTrainModelUrl)
} else {
model, err = models.QueryModelById(task.ModelId)
}
if err != nil || model == nil {
return pretrainModelList
}

if r, err := models.QueryModelRepoByModelID(model.ID); err == nil {
repositoryLink = r.Link()
}
for _, ckptName := range ckptNames {
var url string
if task.Type == models.TypeC2Net {
url = getModelContainerLink(task.DataUrl, ckptName)
} else {
url = getModelLocalLink(model, ckptName)
}
modelDownload := models.ModelDownload{
Name: ckptName,
DownloadLink: url,
IsDelete: false,
ModelName: model.Name,
}
if hasModelFileDeleted(task.ModelId, ckptName) {
log.Warn("Can not get model by path:" + url)
modelDownload.IsDelete = true
}
modelDownload.RepositoryLink = repositoryLink
pretrainModelList = append(pretrainModelList, &modelDownload)
}
return pretrainModelList
}

func getModelLocalLink(model *models.AiModelManage, ckptName string) string {
index := strings.Index(model.Path, "/")
key := model.Path[index+1:] + ckptName
url, _ := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, key)
return url
}

func GetCloudBrainDataSetInfo(task *models.Cloudbrain) []*models.DatasetDownload {
datasetDownload := getCloudBrainDatasetInfo4Local(task.Uuid, task.DatasetName, true)

//非虎鲸的任务返回本地地址
if task.Type != models.TypeC2Net {
return datasetDownload
}
//虎鲸的任务需要返回调度后的地址
datasetObsUrlList := make([]entity.NotebookDataset, 0)
_ = json.Unmarshal([]byte(task.DataUrl), &datasetObsUrlList)

for _, datasetInfo := range datasetDownload {
datasetInfo.DatasetDownloadLink = ""
for _, datasetObs := range datasetObsUrlList {
log.Info("datasetObsUrl:" + datasetObs.DatasetUrl + "datasetName:" + datasetInfo.DatasetName)
if strings.Contains(datasetObs.DatasetUrl, datasetInfo.DatasetName) {
datasetInfo.DatasetDownloadLink = datasetObs.DatasetUrl
break
}
}

}
return datasetDownload
}

func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown bool) []*models.DatasetDownload {
datasetDownload := make([]*models.DatasetDownload, 0)
if len(uuid) == 0 {
return datasetDownload
}
uuidList := strings.Split(uuid, ";")
datasetnameList := strings.Split(datasetname, ";")
for i, uuidStr := range uuidList {
name := ""
link := ""
url := ""
isDelete := false
attachment, err := models.GetAttachmentByUUID(uuidStr)
if err != nil {
log.Error("GetAttachmentByUUID failed:%v", err.Error())
if len(datasetnameList) <= i || len(datasetname) == 0 {
continue
}
name = datasetnameList[i]
isDelete = true
} else {
name = attachment.Name
dataset, err := models.GetDatasetByID(attachment.DatasetID)
if err != nil {
log.Error("GetDatasetByID failed:%v", err.Error())
} else {
repo, err := models.GetRepositoryByID(dataset.RepoID)
if err != nil {
log.Error("GetRepositoryByID failed:%v", err.Error())
} else {
link = repo.Link() + "/datasets"
}
}
if isNeedDown {
url = attachment.S3DownloadURL()
}
}

datasetDownload = append(datasetDownload, &models.DatasetDownload{
DatasetName: name,
DatasetDownloadLink: url,
RepositoryLink: link,
IsDelete: isDelete,
UUID: uuidStr,
})
}
log.Info("dataset length=" + fmt.Sprint(len(datasetDownload)))
return datasetDownload
}

//根据实际调度的智算中心修正规格
func correctAITaskSpec(task *models.Cloudbrain) {
if task.AiCenter == "" {
return
}
s := strings.Split(task.AiCenter, "+")
if len(s) < 2 {
return
}

realCenterCode := s[0]
if realCenterCode == "" {
return
}

oldSpec, err := models.GetCloudbrainSpecByID(task.ID)
if err != nil {
log.Error("correctAITaskSpec GetCloudbrainSpecByID err.taskId=%d err=%v", task.ID, err)
return
}
if oldSpec == nil {
log.Error("correctAITaskSpec GetCloudbrainSpecByID spec is empty.taskId=%d ", task.ID)
return
}
if oldSpec.AiCenterCode == realCenterCode {
return
}
//智算中心不一样时才需要处理
r, err := models.FindSpecs(models.FindSpecsOptions{
SourceSpecId: oldSpec.SourceSpecId,
AiCenterCode: realCenterCode,
})
if err != nil {
log.Error("correctAITaskSpec FindSpecs err.taskId=%d err=%v", task.ID, err)
return
}
if r == nil || len(r) == 0 {
log.Error("correctAITaskSpec FindSpecs 0.taskId=%d ", task.ID)
return
}
n, err := models.UpdateCloudbrainSpec(task.ID, r[0])
if err == nil && n > 0 {
log.Info("correctAITaskSpec success,taskId=%d oldCenter=%s realCenter=%s", task.ID, oldSpec.AiCenterCode, realCenterCode)
}
}

func getModelContainerLink(dataUrl string, ckptName string) string {
if dataUrl == "" {
return ""
}
datasetObsUrlList := make([]entity.NotebookDataset, 0)
_ = json.Unmarshal([]byte(dataUrl), &datasetObsUrlList)
for _, datasetObs := range datasetObsUrlList {
if strings.Contains(datasetObs.DatasetUrl, ckptName) {
return datasetObs.DatasetUrl
}
}
return ""
}

+ 226
- 238
services/ai_task_service/task/task_service.go View File

@@ -1,14 +1,6 @@
package task

import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/convert"
@@ -24,15 +16,30 @@ import (
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask"
"code.gitea.io/gitea/services/cloudbrain/resource"
"code.gitea.io/gitea/services/lock"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"path"
"strconv"
"strings"
)

type QueryFunc func(string) (*entity.QueryTaskResponse, error)
type QueryFunc func(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error)
type QueryListFunc func(string) ([]*entity.QueryTaskResponse, error)
type DeleteFunc func(string) error
type StopFunc func(string) error
type GetLogFunc func(string) (*entity.ClusterLog, error)
type DeleteFunc func(opts entity.JobIdAndVersionId) error
type StopFunc func(opts entity.JobIdAndVersionId) error
type GetLogFunc func(entity.ClusterLogOpts) (*entity.ClusterLog, error)
type GetLogDownloadInfoFunc func(entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error)
type GetNotebookUrlFunc func(string) (string, error)
type GetNodeInfoFunc func(entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error)
type GetOutputFunc func(entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error)
type GetAllOutputFunc func(entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error)
type GetSingleOutputDownloadInfoFunc func(req entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error)
type GetAllOutputDownloadInfoFunc func(req entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error)
type GetOperationProfileFunc func(string) (*entity.OperationProfile, error)
type GetResourceUsageFunc func(entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error)

func BuildAITaskInfo(cloudbrainId int64) (*entity.AITaskDetailInfo, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(cloudbrainId)
@@ -49,10 +56,11 @@ func BuildAITaskByCloudbrain(cloudbrain *models.Cloudbrain) (*entity.AITaskDetai
if err != nil {
return nil, err
}
return buildAITaskInfo(cloudbrain, creator, GetAITaskConfigByCloudbrain(cloudbrain))
c := GetDetailConfigInfoByCloudbrain(cloudbrain)
return buildAITaskInfo(cloudbrain, creator, c)
}

func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entity.AITaskConfig) (*entity.AITaskDetailInfo, error) {
func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config *entity.AITaskDetailConfigInfo) (*entity.AITaskDetailInfo, error) {
spec, err := resource.GetCloudbrainSpec(task.ID)
if err != nil {
log.Error("buildAITaskInfo GetCloudbrainSpec error,id =%d ,err =%v", task.ID, err)
@@ -70,11 +78,6 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit
if task.WorkServerNumber > 1 {
n = task.WorkServerNumber
}
computeSource := ""
c := models.GetComputeSourceInstance(task.ComputeResource)
if c != nil {
computeSource = c.Name
}
imageName := task.Image
imageUrl := task.Image
imageId := task.ImageID
@@ -84,6 +87,11 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit
if imageId == "" && task.EngineID > 0 {
imageId = fmt.Sprint(task.EngineID)
}

baseConfig := &entity.AITaskBaseConfig{}
if config != nil && config.BaseConfig != nil {
baseConfig = config.BaseConfig
}
return &entity.AITaskDetailInfo{
ID: task.ID,
JobID: task.JobID,
@@ -91,7 +99,7 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit
JobType: task.JobType,
DisplayJobName: task.DisplayJobName,
FormattedDuration: task.TrainJobDuration,
ComputeSource: computeSource,
ComputeSource: task.GetStandardComputeSource(),
PreVersionName: task.PreVersionName,
CurrentVersionName: task.VersionName,
WorkServerNumber: n,
@@ -103,14 +111,16 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit
Cluster: string(entity.GetClusterTypeFromCloudbrainType(task.Type)),
Parameters: parseAITaskParameters(task.Parameters),
CreatedUnix: task.CreatedUnix,
CodePath: config.GetContainerPath(entity.ContainerCode),
DatasetPath: config.GetContainerPath(entity.ContainerDataset),
PretrainModelPath: config.GetContainerPath(entity.ContainerPreTrainModel),
OutputPath: config.GetContainerPath(entity.ContainerOutPutPath),
CodeUrl: task.TrainUrl,
CodePath: baseConfig.GetContainerPath(entity.ContainerCode),
DatasetPath: baseConfig.GetContainerPath(entity.ContainerDataset),
PretrainModelPath: baseConfig.GetContainerPath(entity.ContainerPreTrainModel),
OutputPath: baseConfig.GetContainerPath(entity.ContainerOutPutPath),
CodeUrl: task.RemoteCodeUrl,
PretrainModelName: task.ModelName,
PretrainModelVersion: task.ModelVersion,
PretrainCkptName: task.CkptName,
PretrainModelUrl: task.PreTrainModelUrl,
PretrainModelId: task.ModelId,
StartTime: task.StartTime,
EndTime: task.EndTime,
Description: task.Description,
@@ -122,6 +132,7 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit
ImageUrl: imageUrl,
CreatorName: creator.GetDisplayName(),
EngineName: task.EngineName,
UserId: task.UserID,
}, nil
}

@@ -138,7 +149,7 @@ func parseAITaskParameters(paramStr string) *models.Parameters {
return parameters
}

func QueryTaskEarlyVersionList(id int64, operatorId int64) ([]*entity.AITaskDetailInfo, error) {
func QueryTaskEarlyVersionList(id int64) ([]*entity.AITaskDetailInfo, error) {
task, err := models.GetCloudbrainByCloudbrainID(id)
if err != nil {
return nil, err
@@ -158,10 +169,6 @@ func QueryTaskEarlyVersionList(id int64, operatorId int64) ([]*entity.AITaskDeta
log.Error("QueryTaskEarlyVersionList convertCloudbrainToAITaskDetailInfo err.id=%d currentId=%d err=%v", id, taskList[i].ID, err)
return nil, err
}
if operatorId == 0 || taskList[i].UserID != operatorId {
t.RemoveDatasets()
t.RemovePretrainModelList()
}
resultList[i] = t
}
return resultList, nil
@@ -178,7 +185,7 @@ func QueryTaskBriefInfo(id int64) (*entity.AITaskBriefInfo, error) {

func UpdateAITaskFromRemote(task *models.Cloudbrain, remoteFunc QueryFunc) error {
log.Info("start to UpdateAITaskFromRemote.task.DisplayJobName = %s task.Status = %s", task.DisplayJobName, task.Status)
res, err := remoteFunc(task.JobID)
res, err := remoteFunc(entity.JobIdAndVersionId{JobID: task.JobID, VersionID: task.VersionID})
log.Info("remoteQueryFunc task.DisplayJobName = %s res = %+v ", task.DisplayJobName, res)
if err != nil {
log.Error("query from remote err.cloudbrainID = %d err=%v", task.ID, err)
@@ -215,7 +222,7 @@ func StopAITaskByJobNameFromRemote(task *models.Cloudbrain, queryFunc QueryListF
if v.StartedAt < task.CreatedUnix-5*60 {
continue
}
if err = stopFunc(v.JobId); err != nil {
if err = stopFunc(entity.JobIdAndVersionId{JobID: v.JobId, VersionID: v.VersionId}); err != nil {
log.Error("stop task err. name=%s jobId=%s err=%v", task.JobName, v.JobId)
return err
}
@@ -258,7 +265,7 @@ func UpdateByQueryResponse(res *entity.QueryTaskResponse, task *models.Cloudbrai

task.JobID = res.JobId

task.TrainUrl = res.CodeUrl
task.RemoteCodeUrl = res.CodeUrl
task.DataUrl = res.DataUrl
task.ContainerID = res.ContainerID
task.ContainerIp = res.ContainerIP
@@ -321,52 +328,6 @@ func isCloudbrainOneNotebookReady(jobId string) bool {

}

//根据实际调度的智算中心修正规格
func correctAITaskSpec(task *models.Cloudbrain) {
if task.AiCenter == "" {
return
}
s := strings.Split(task.AiCenter, "+")
if len(s) < 2 {
return
}

realCenterCode := s[0]
if realCenterCode == "" {
return
}

oldSpec, err := models.GetCloudbrainSpecByID(task.ID)
if err != nil {
log.Error("correctAITaskSpec GetCloudbrainSpecByID err.taskId=%d err=%v", task.ID, err)
return
}
if oldSpec == nil {
log.Error("correctAITaskSpec GetCloudbrainSpecByID spec is empty.taskId=%d ", task.ID)
return
}
if oldSpec.AiCenterCode == realCenterCode {
return
}
//智算中心不一样时才需要处理
r, err := models.FindSpecs(models.FindSpecsOptions{
SourceSpecId: oldSpec.SourceSpecId,
AiCenterCode: realCenterCode,
})
if err != nil {
log.Error("correctAITaskSpec FindSpecs err.taskId=%d err=%v", task.ID, err)
return
}
if r == nil || len(r) == 0 {
log.Error("correctAITaskSpec FindSpecs 0.taskId=%d ", task.ID)
return
}
n, err := models.UpdateCloudbrainSpec(task.ID, r[0])
if err == nil && n > 0 {
log.Info("correctAITaskSpec success,taskId=%d oldCenter=%s realCenter=%s", task.ID, oldSpec.AiCenterCode, realCenterCode)
}
}

func DelTask(id int64, deleteRemote DeleteFunc) error {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id)
if err != nil {
@@ -379,7 +340,7 @@ func DelTask(id int64, deleteRemote DeleteFunc) error {

//删除远端记录
if cloudbrain.JobID != "" {
err = deleteRemote(cloudbrain.JobID)
err = deleteRemote(entity.JobIdAndVersionId{JobID: cloudbrain.JobID, VersionID: cloudbrain.VersionID})
if err != nil {
log.Error("delete from remote err.%v", err)
return err
@@ -409,22 +370,94 @@ func StopTask(id int64, stopRemote StopFunc) error {
if cloudbrain.IsPreparing() || cloudbrain.IsCreating() {
return nil
}
err = stopRemote(cloudbrain.JobID)
err = stopRemote(entity.JobIdAndVersionId{JobID: cloudbrain.JobID, VersionID: cloudbrain.VersionID})
if err != nil {
log.Error("stop from remote err.%v", err)
return errors.New(response.STOP_FAILED.TrCode)
}

//返回数据
return nil
}

func QueryTaskLog(id int64, getLogRemote GetLogFunc) (*entity.ClusterLog, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id)
//jobId string, baseLine int64, lines int64, order int64
func QueryTaskLog(opts entity.QueryLogOpts, getLogRemote GetLogFunc) (*entity.ClusterLog, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
if cloudbrain.JobID == "" {
return &entity.ClusterLog{
Content: "",
}, nil
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
return getLogRemote(entity.ClusterLogOpts{
JobId: cloudbrain.JobID,
BaseLine: opts.BaseLine,
Lines: opts.Lines,
Direction: opts.Order,
ObjectKeyPrefix: aiConfig.LogObjectPrefix,
StorageType: aiConfig.LogStorageType,
VersionID: cloudbrain.VersionID,
NodeId: opts.NodeId,
LogFileName: opts.LogFileName,
WorkServerNum: cloudbrain.WorkServerNumber,
})
}

func GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq, getLogDownloadInfo GetLogDownloadInfoFunc) (*entity.FileDownloadInfo, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
return getLogRemote(cloudbrain.JobID)
if cloudbrain.JobID == "" {
return nil, nil
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
return getLogDownloadInfo(entity.ClusterLogDownloadInfoOpts{
JobId: cloudbrain.JobID,
ObjectKeyPrefix: aiConfig.LogObjectPrefix,
StorageType: aiConfig.LogStorageType,
NodeId: opts.NodeId,
LogFileName: opts.LogFileName,
WorkServerNum: cloudbrain.WorkServerNumber,
JobName: cloudbrain.JobName,
DisplayJobName: cloudbrain.DisplayJobName,
})
}

func GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq, f GetSingleOutputDownloadInfoFunc) (*entity.FileDownloadInfo, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
if cloudbrain.JobID == "" {
return nil, nil
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
fileRelativePath := path.Join(aiConfig.OutputObjectPrefix, opts.ParentDir, opts.FileName)
return f(entity.ClusterOutputDownloadInfoOpts{
JobId: cloudbrain.JobID,
Path: fileRelativePath,
StorageType: aiConfig.OutputStorageType,
})
}

func GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq, f GetAllOutputDownloadInfoFunc) (*entity.FileDownloadInfo, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
if cloudbrain.JobID == "" {
return nil, nil
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
return f(entity.ClusterOutputDownloadInfoOpts{
JobId: cloudbrain.JobID,
Path: aiConfig.OutputObjectPrefix,
StorageType: aiConfig.OutputStorageType,
JobName: cloudbrain.JobName,
})
}

func QueryNoteBookUrl(id int64, getNoteBookUrl GetNotebookUrlFunc, fileName string) (string, error) {
@@ -449,6 +482,85 @@ func QueryNoteBookUrl(id int64, getNoteBookUrl GetNotebookUrlFunc, fileName stri
return url, nil
}

func GetAITaskNodeInfo(id int64, getNodeInfo GetNodeInfoFunc) ([]entity.AITaskNodeInfo, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id)
if err != nil {
return nil, err
}

res, err := getNodeInfo(entity.ClusterNodeInfoOpts{
JobId: cloudbrain.JobID,
WorkServerNum: cloudbrain.WorkServerNumber,
VersionId: cloudbrain.VersionID,
})
if err != nil {
log.Error("getNodeInfo error.id = %d err=%v", id, err)
return nil, err
}
return res, nil
}
func GetAITaskOutput(id int64, parentDir string, getOutput GetOutputFunc) (*entity.AITaskOutput, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id)
if err != nil {
return nil, err
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
res, err := getOutput(entity.ClusterOutputOpts{
JobId: cloudbrain.JobID,
ObjectKeyPrefix: aiConfig.OutputObjectPrefix,
StorageType: aiConfig.OutputStorageType,
ParentDir: parentDir,
})
if err != nil {
log.Error("GetAITaskOutput getOutput from cluster error.id=%d parentDir=%s err=%v ", id, parentDir, err)
return nil, err
}
return &entity.AITaskOutput{
Status: res.Status,
Path: res.Path,
FileList: res.FileList,
IsTaskTerminal: cloudbrain.IsTerminal(),
}, nil
}

func GetAllAITaskOutput(opts entity.GetAllOutputReq, getOutput GetAllOutputFunc) (*entity.AllAITaskOutput, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain)
res, err := getOutput(entity.ClusterOutputOpts{
JobId: cloudbrain.JobID,
ObjectKeyPrefix: aiConfig.OutputObjectPrefix,
StorageType: aiConfig.OutputStorageType,
})
if err != nil {
log.Error("GetAllAITaskOutput getOutput from cluster error.id=%d err=%v ", opts.CloudbrainId, err)
return nil, err
}

return filterOutputFile(res, opts.Suffix), nil
}

func filterOutputFile(sourceOutput *entity.AllAITaskOutput, suffixArray []string) *entity.AllAITaskOutput {
if len(suffixArray) == 0 || sourceOutput == nil || len(sourceOutput.FileList) == 0 {
return sourceOutput
}
sourceFiles := sourceOutput.FileList
var files = make([]storage.FileInfo, 0)
for i := 0; i < len(sourceFiles); i++ {
f := sourceFiles[i]
for j := 0; j < len(suffixArray); j++ {
if strings.HasSuffix(f.FileName, suffixArray[j]) {
files = append(files, f)
break
}
}

}
return &entity.AllAITaskOutput{FileList: files}
}

func transferFileNotebookUrl(oldUrl string, fileName string) string {
u, err := url.Parse(oldUrl)
if err != nil {
@@ -478,6 +590,26 @@ func GetFileNoteBookDebugUrl(url string, filename string) string {
return url + middle + filename
}

func GetResourceUsage(opts entity.GetResourceUsageOpts, fun GetResourceUsageFunc) (*entity.ResourceUsage, error) {
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId)
if err != nil {
return nil, err
}
if cloudbrain.JobID == "" {
return &entity.ResourceUsage{}, nil
}
return fun(entity.ClusterResourceUsageOpts{
JobId: cloudbrain.JobID,
StartTime: int64(cloudbrain.StartTime),
EndTime: int64(cloudbrain.EndTime),
NodeId: opts.NodeId,
ComputeSource: models.GetComputeSourceStandardFormat(cloudbrain.ComputeResource),
WorkServerNumber: cloudbrain.WorkServerNumber,
VersionID: cloudbrain.VersionID,
LogFileName: opts.LogFileName,
})
}

func CreateAITask(form entity.CreateReq, gitRepo *git.Repository, repo *models.Repository, user *models.User) (*entity.CreateTaskRes, *response.BizError) {
t, err := GetAITaskTemplate(form.JobType, form.Cluster)
if err != nil {
@@ -495,18 +627,12 @@ func CreateAITask(form entity.CreateReq, gitRepo *git.Repository, repo *models.R
log.Error("lock processed failed:%s", errMsg)
return nil, response.BuildDefaultBizError(errMsg, errMsg)
}
c := models.GetComputeSourceInstance(form.ComputeSourceStr)
if c == nil {
log.Error("ComputeSourceStr invalid")
return nil, response.PARAM_ERROR
}
config := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: c.Name})
return t.Create(&creation_context.CreationContext{
Request: &form,
GitRepo: gitRepo,
Repository: repo,
User: user,
AITaskConfig: config,
Request: &form,
GitRepo: gitRepo,
Repository: repo,
User: user,
Config: t.GetConfig(entity.AITaskConfigKey{ComputeSource: form.ComputeSourceStr}),
})
}

@@ -532,18 +658,12 @@ func RestartAITask(cloudbrainId int64, gitRepo *git.Repository, repo *models.Rep
log.Error("lock processed failed:%s", errMsg)
return nil, response.BuildDefaultBizError(errMsg, errMsg)
}
c := models.GetComputeSourceInstance(cloudbrain.ComputeResource)
if c == nil {
log.Error("ComputeSourceStr invalid")
return nil, response.PARAM_ERROR
}
config := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: c.Name})
return t.Restart(&creation_context.CreationContext{
GitRepo: gitRepo,
Repository: repo,
User: user,
SourceCloudbrain: cloudbrain,
AITaskConfig: config,
Config: t.GetConfig(entity.AITaskConfigKey{ComputeSource: models.GetComputeSourceStandardFormat(cloudbrain.ComputeResource)}),
})
}

@@ -558,6 +678,9 @@ func GetOperationProfile(id int64, getOperationProfile GetOperationProfileFunc)
if errMsg != "" {
defaultRes = &entity.OperationProfile{Events: []entity.ProfileEvent{{Reason: "Error", Message: errMsg}}}
}
if cloudbrain.JobID == "" {
return defaultRes, nil
}
s, err := getOperationProfile(cloudbrain.JobID)
if err != nil || s == nil {
return defaultRes, nil
@@ -648,80 +771,6 @@ func DelCloudbrain(task *models.Cloudbrain) *response.BizError {
return t.Delete(task.ID)
}

func GetCloudBrainDataSetInfo(task *models.Cloudbrain) []*models.DatasetDownload {
datasetDownload := getCloudBrainDatasetInfo4Local(task.Uuid, task.DatasetName, true)

//非虎鲸的任务返回本地地址
if task.Type != models.TypeC2Net {
return datasetDownload
}
//虎鲸的任务需要返回调度后的地址
datasetObsUrlList := make([]entity.NotebookDataset, 0)
_ = json.Unmarshal([]byte(task.DataUrl), &datasetObsUrlList)

for _, datasetInfo := range datasetDownload {
datasetInfo.DatasetDownloadLink = ""
for _, datasetObs := range datasetObsUrlList {
log.Info("datasetObsUrl:" + datasetObs.DatasetUrl + "datasetName:" + datasetInfo.DatasetName)
if strings.Contains(datasetObs.DatasetUrl, datasetInfo.DatasetName) {
datasetInfo.DatasetDownloadLink = datasetObs.DatasetUrl
break
}
}

}
return datasetDownload
}

func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown bool) []*models.DatasetDownload {
datasetDownload := make([]*models.DatasetDownload, 0)
if len(uuid) == 0 {
return datasetDownload
}
uuidList := strings.Split(uuid, ";")
datasetnameList := strings.Split(datasetname, ";")
for i, uuidStr := range uuidList {
name := ""
link := ""
url := ""
isDelete := false
attachment, err := models.GetAttachmentByUUID(uuidStr)
if err != nil {
log.Error("GetAttachmentByUUID failed:%v", err.Error())
if len(datasetnameList) <= i || len(datasetname) == 0 {
continue
}
name = datasetnameList[i]
isDelete = true
} else {
name = attachment.Name
dataset, err := models.GetDatasetByID(attachment.DatasetID)
if err != nil {
log.Error("GetDatasetByID failed:%v", err.Error())
} else {
repo, err := models.GetRepositoryByID(dataset.RepoID)
if err != nil {
log.Error("GetRepositoryByID failed:%v", err.Error())
} else {
link = repo.Link() + "/datasets"
}
}
if isNeedDown {
url = attachment.S3DownloadURL()
}
}

datasetDownload = append(datasetDownload, &models.DatasetDownload{
DatasetName: name,
DatasetDownloadLink: url,
RepositoryLink: link,
IsDelete: isDelete,
})
}
log.Info("dataset length=" + fmt.Sprint(len(datasetDownload)))
return datasetDownload
}

func HandleNewAITaskStop(cloudbrainId int64) (result *entity.AITaskBriefInfo, isHandled bool, err error) {
task, err := models.GetCloudbrainByCloudbrainID(cloudbrainId)
if err != nil {
@@ -750,64 +799,3 @@ func HandleNewAITaskDelete(cloudbrainId int64) (isHandled bool, err error) {
}
return true, nil
}

func GetModelDownload(task *models.Cloudbrain) []*models.ModelDownload {
var repositoryLink string
pretrainModelList := []*models.ModelDownload{}
ckptNames := strings.Split(task.CkptName, ";")
var model *models.AiModelManage
var err error
if task.ModelId == "" {
model, err = models.QueryModelByPath(task.PreTrainModelUrl)
} else {
model, err = models.QueryModelById(task.ModelId)
}
if err != nil || model == nil {
return pretrainModelList
}

if r, err := models.QueryModelRepoByModelID(model.ID); err == nil {
repositoryLink = r.Link()
}
for _, ckptName := range ckptNames {
var url string
if task.Type == models.TypeC2Net {
url = getModelContainerLink(task.DataUrl, ckptName)
} else {
url = getModelLocalLink(model, ckptName)
}
modelDownload := models.ModelDownload{
Name: ckptName,
DownloadLink: url,
IsDelete: false,
}
if hasModelFileDeleted(task.ModelId, ckptName) {
log.Warn("Can not get model by path:" + url)
modelDownload.IsDelete = true
}
modelDownload.RepositoryLink = repositoryLink
pretrainModelList = append(pretrainModelList, &modelDownload)
}
return pretrainModelList
}

func getModelLocalLink(model *models.AiModelManage, ckptName string) string {
index := strings.Index(model.Path, "/")
key := model.Path[index+1:] + ckptName
url, _ := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, key)
return url
}

func getModelContainerLink(dataUrl string, ckptName string) string {
if dataUrl == "" {
return ""
}
datasetObsUrlList := make([]entity.NotebookDataset, 0)
_ = json.Unmarshal([]byte(dataUrl), &datasetObsUrlList)
for _, datasetObs := range datasetObsUrlList {
if strings.Contains(datasetObs.DatasetUrl, ckptName) {
return datasetObs.DatasetUrl
}
}
return ""
}

+ 0
- 29
services/ai_task_service/upload/client.go View File

@@ -1,29 +0,0 @@
package upload

import (
"code.gitea.io/gitea/entity"
)

type UploaderConfig struct {
Bucket string
Endpoint string
}

type Uploader interface {
UploadDir(codePath, jobName string) error
GetRealPath(objectKey string) string
GetBucket() string
GetEndpoint() string
GetJobDefaultObjectKeyPrefix(jobName string) string
MKDIR(path string) error
}

func SelectUploaderFromStorageType(storageType entity.StorageType) Uploader {
switch storageType {
case entity.OBS:
return &OBSUploader{}
case entity.MINIO:
return &MinioUploader{}
}
return nil
}

+ 0
- 40
services/ai_task_service/upload/minio.go View File

@@ -1,40 +0,0 @@
package upload

import (
"bytes"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"strings"
)

type MinioUploader struct {
}

func (m *MinioUploader) UploadDir(codePath, objectKeyPrefix string) error {
return UploadDirToMinio(codePath, objectKeyPrefix, "")
}
func (m *MinioUploader) GetJobDefaultObjectKeyPrefix(jobName string) string {
return setting.CBCodePathPrefix + jobName
}
func (m *MinioUploader) GetRealPath(objectKey string) string {
return setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + strings.TrimPrefix(objectKey, "/")

}

func (m *MinioUploader) GetBucket() string {
return setting.Attachment.Minio.Bucket
}

func (m *MinioUploader) GetEndpoint() string {
return setting.Attachment.Minio.Endpoint
}

const README = "README"

func (m *MinioUploader) MKDIR(path string) error {
//无法直接创建空文件夹,上传一个readme文件模拟
path = strings.TrimSuffix(path, "/") + "/" + README
val := "You can put the files into this directory and download the files by the web page."
_, err := storage.Attachments.UploadContent(m.GetBucket(), path, bytes.NewReader([]byte(val)))
return err
}

+ 0
- 46
services/ai_task_service/upload/obs.go View File

@@ -1,46 +0,0 @@
package upload

import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/obs"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"strings"
)

type OBSUploader struct {
}

func (m *OBSUploader) UploadDir(codePath, objectKeyPrefix string) error {
return UploadDirToObs(codePath, objectKeyPrefix, "")
}

func (m *OBSUploader) GetJobDefaultObjectKeyPrefix(jobName string) string {
return setting.CodePathPrefix + jobName
}

func (m *OBSUploader) GetRealPath(objectKey string) string {
return ""
}

func (m *OBSUploader) GetBucket() string {
return setting.Bucket
}
func (m *OBSUploader) MKDIR(path string) error {
path = strings.TrimSuffix(path, "/") + "/"
input := &obs.PutObjectInput{}
input.Bucket = setting.Bucket
input.Key = path
_, err := storage.ObsCli.PutObject(input)
if err != nil {
log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
return err
}
return nil
}

func (m *OBSUploader) GetEndpoint() string {
index := strings.Index(setting.Endpoint, "//")
endpoint := setting.Endpoint[index+2:]
return endpoint
}

+ 15
- 0
services/cloudbrain/cloudbrainTask/ai_model.go View File

@@ -28,3 +28,18 @@ func IsModelFileExists(model *models.AiModelManage, fileName string) bool {
}
return false
}

func CheckAndGetFileSize(model *models.AiModelManage, fileName string) (bool, int64) {
if model.Type == models.TypeCloudBrainTwo {
key := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + fileName
log.Info("IsModelFileExists TypeCloudBrainTwo key=%s", key)
return storage.ObsCheckAndGetFileSize(setting.Bucket, key)
} else if model.Type == models.TypeCloudBrainOne {
prefix := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/"
objectName := prefix + fileName
log.Info("IsModelFileExists TypeCloudBrainOne objectName=%s", objectName)
return storage.MinioCheckAndGetFileSize(setting.Attachment.Minio.Bucket, objectName)

}
return false, 0
}

+ 20
- 13
templates/admin/cloudbrain/list.tmpl View File

@@ -89,7 +89,7 @@
<div class="row">
<!-- 任务名 -->
{{$JobID := '0'}}
{{if eq .JobType "DEBUG" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "MODELSAFETY" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}}
{{if eq .JobType "DEBUG" "TRAIN" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "MODELSAFETY" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}}
{{$JobID = .Cloudbrain.ID}}
{{else}}
{{$JobID = .JobID}}
@@ -149,7 +149,8 @@
style="width: 6% !important;">
<span class="job-status" id="{{$JobID}}"
data-repopath='{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "DEBUG" "ONLINEINFERENCE"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "INFERENCE"}}/modelarts/inference-job{{else if eq .JobType "TRAIN"}}/modelarts/train-job{{else if eq .JobType "BENCHMARK" "MODELSAFETY"}}/cloudbrain{{end}}'
data-jobid="{{$JobID}}" data-version="{{.VersionName}}">
data-jobid="{{$JobID}}" data-version="{{.VersionName}}"
data-cloudbrainid="{{.Cloudbrain.ID}}">
<span><i id="{{$JobID}}-icon" style="vertical-align: middle;"
class="{{.Status}}"></i><span id="{{$JobID}}-text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
@@ -239,7 +240,7 @@
{{$.CsrfTokenHtml}}
{{if eq .Status "RUNNING" "WAITING" "CREATING" "STARTING"}}
<a style="margin: 0 1rem;" id="ai-debug-{{$JobID}}"
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING"}}disabled {{else}}blue {{end}}button'
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING" "CREATED_FAILED"}}disabled {{else}}blue {{end}}button'
data-jobid="{{$JobID}}"
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'>
{{$.i18n.Tr "repo.debug"}}
@@ -247,7 +248,7 @@
{{else}}
{{if not .BootFile}}
<a id="ai-debug-{{$JobID}}"
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING"}} disabled {{else}}blue {{end}}button'
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING" "CREATED_FAILED"}} disabled {{else}}blue {{end}}button'
data-jobid="{{$JobID}}"
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'>
{{$.i18n.Tr "repo.debug_again"}}
@@ -276,7 +277,7 @@
<form id="stopForm-{{$JobID}}" style="margin-left:-1px;">
{{$.CsrfTokenHtml}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}"
class='ui basic ai_stop {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED" "STOPPING"}}disabled {{else}} blue {{end}}button'
class='ui basic ai_stop {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button'
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/modelsafety/{{$JobID}}/stop'
data-jobid="{{$JobID}}">
{{$.i18n.Tr "repo.stop"}}
@@ -287,7 +288,7 @@
<form id="stopForm-{{$JobID}}" style="margin-left:-1px;">
{{$.CsrfTokenHtml}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}"
class='ui basic ai_stop {{if eq .Status "KILLED" "FAILED" "CREATE_FAILED" "CREATED_FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED" "STOPPING"}}disabled {{else}} blue {{end}}button'
class='ui basic ai_stop {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button'
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else if eq .JobType "BENCHMARK" }}/cloudbrain/benchmark{{else if eq .ComputeResource "NPU" }}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/stop'
data-jobid="{{$JobID}}" data-bootfile="{{.BootFile}}">
{{$.i18n.Tr "repo.stop"}}
@@ -295,9 +296,11 @@
</form>
{{else}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}"
class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "CREATE_FAILED" "CREATED_FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button"
class="ui basic ai_stop_version {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button"
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}/{{if eq .JobType "INFERENCE"}}{{if eq .Cloudbrain.Type 1}}modelarts/inference-job{{else}}cloudbrain/train-job{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}modelarts/train-job{{else if eq .Cloudbrain.Type 0}}cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}grampus/train-job{{end}}{{end}}"
data-jobid="{{$JobID}}" data-version="{{.VersionName}}">
data-jobid="{{$JobID}}"
data-cloudbrainid="{{.Cloudbrain.ID}}"
data-version="{{.VersionName}}">
{{$.i18n.Tr "repo.stop"}}
</a>
{{end}}
@@ -306,7 +309,7 @@
<!-- 修改任务 -->
{{if and (eq .JobType "TRAIN") (not .FineTune)}}
<div class="ui compact buttons __btn_edit__">
<a style="padding: 0.5rem 1rem;" class="ui basic blue button" href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}/grampus/train-job/{{.JobID}}{{end}}/create_version{{if .VersionName}}?version_name={{.VersionName}}{{end}}">
<a style="padding: 0.5rem 1rem;" class="ui basic blue button" href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job/{{ToLower .ComputeResource}}{{end}}/create?modify=true&id={{$JobID}}">
{{$.i18n.Tr "repo.modelarts.modify"}}
</a>
</div>
@@ -317,6 +320,7 @@
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/modelsafety/{{$JobID}}/del?isadminpage=true'
method="post">
{{$.CsrfTokenHtml}}
<input type="hidden" value="{{.Cloudbrain.ID}}" style="display:none" name="id" />
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{$JobID}}"
class="ui basic ai_delete blue button"
style="border-radius: .28571429rem;">
@@ -327,10 +331,13 @@
<form class="ui compact buttons" id="delForm-{{$JobID}}"
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "BENCHMARK"}}/cloudbrain/benchmark{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE") (eq .JobType "SNN4ECOSET") (eq .JobType "SIM2BRAIN_SNN")}}/cloudbrain{{else if eq .JobType "DEBUG" "ONLINEINFERENCE"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}{{else if eq .JobType "INFERENCE"}}{{if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{end}}{{end}}/{{$JobID}}/del?isadminpage=true'
method="post">
{{$.CsrfTokenHtml}}
{{$.CsrfTokenHtml}}
<input type="hidden" value="{{.Cloudbrain.ID}}" style="display:none" name="id" />
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{$JobID}}"
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}/modelarts/inference-job/{{$JobID}}/del_version?isadminpage=true"
data-version="{{.VersionName}}" class="ui basic ai_delete blue button"
data-version="{{.VersionName}}"
data-cloudbrainid="{{.Cloudbrain.ID}}"
class="ui basic ai_delete blue button"
style="border-radius: .28571429rem;">
{{$.i18n.Tr "repo.delete"}}
</a>
@@ -341,7 +348,7 @@
</div>
{{else}}
{{$JobID := '0'}}
{{if eq .JobType "DEBUG" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "SNN4ECOSET" "SIM2BRAIN_SNN"}}
{{if eq .JobType "DEBUG" "TRAIN" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "SNN4ECOSET" "SIM2BRAIN_SNN"}}
{{$JobID = .Cloudbrain.ID}}
{{else}}
{{$JobID = .JobID}}
@@ -381,7 +388,7 @@
<div class="two wide column text center nowrap"
style="width: 6% !important;">
<span class="job-status" id="{{$JobID}}" data-jobid="{{$JobID}}"
data-version="{{.VersionName}}">
data-version="{{.VersionName}}" data-cloudbrainid="{{.Cloudbrain.ID}}">
<span><i id="{{$JobID}}-icon" style="vertical-align: middle;"
class="{{.Status}}"></i><span id="{{$JobID}}-text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>


+ 1
- 0
templates/admin/cloudbrain/search.tmpl View File

@@ -67,6 +67,7 @@
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=COMPLETED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="COMPLETED">COMPLETED</a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=SUCCEEDED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SUCCEEDED">SUCCEEDED</a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="FAILED">FAILED </a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=CREATED_FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CREATED_FAILED">CREATED_FAILED </a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=other&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="OTHER">OTHER</a>
</div>


+ 1
- 0
templates/admin/cloudbrain/search_dashboard.tmpl View File

@@ -78,6 +78,7 @@
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=COMPLETED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="COMPLETED">COMPLETED</a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=SUCCEEDED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SUCCEEDED">SUCCEEDED</a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="FAILED">FAILED </a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=CREATED_FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CREATED_FAILED">CREATED_FAILED </a>
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=other&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="OTHER">OTHER</a>
</div>


+ 1
- 273
templates/repo/cloudbrain/trainjob/new.tmpl View File

@@ -1,273 +1 @@
{{template "base/head" .}}
<style>
.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}

.width {
width: 100% !important;
}
.width48 {
width: 48.5% !important;
}
.width80 {
width: 80.7% !important;
margin-left: 10px;
}

.width806 {
width: 80.6% !important;
margin-left: -2px;
}

.width85 {
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}

.width81 {
margin-left: 1.5rem !important;
width: 81% !important;
}

.add {
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}

.min {
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_image_name" value="{{.image}}">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "ckpt_url" "/model" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name"
placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}"
tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required
maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;"
for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div class="inline required field" style="display: none;">
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label>
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px'
name="job_type">
<option name="job_type" value="TRAIN">TRAIN</option>
</select>
</div>
<div id="images-new-cb">

</div>

<div class="inline field min_title required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}"
tabindex="3" autofocus required maxlength="255">
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3"
autofocus required maxlength="255">
{{end}}
<span>
<i class="question circle icon link"
data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}}
data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para"
style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i
class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}"
{{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}
name="spec_id">
</select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini" ></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link" ></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
{{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/inference.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}}
<div class="inline field" style="padding: 1rem 0;">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__"
href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .train_specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>
{{ template "repo/cloudbrain/cloudbraincreate" .}}

+ 273
- 0
templates/repo/cloudbrain/trainjob/new_ori.tmpl View File

@@ -0,0 +1,273 @@
{{template "base/head" .}}
<style>
.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}

.width {
width: 100% !important;
}
.width48 {
width: 48.5% !important;
}
.width80 {
width: 80.7% !important;
margin-left: 10px;
}

.width806 {
width: 80.6% !important;
margin-left: -2px;
}

.width85 {
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}

.width81 {
margin-left: 1.5rem !important;
width: 81% !important;
}

.add {
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}

.min {
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_image_name" value="{{.image}}">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "ckpt_url" "/model" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name"
placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}"
tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required
maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;"
for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div class="inline required field" style="display: none;">
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label>
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px'
name="job_type">
<option name="job_type" value="TRAIN">TRAIN</option>
</select>
</div>
<div id="images-new-cb">

</div>

<div class="inline field min_title required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}"
tabindex="3" autofocus required maxlength="255">
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3"
autofocus required maxlength="255">
{{end}}
<span>
<i class="question circle icon link"
data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}}
data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para"
style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i
class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label>
<select id="__specs__" class="ui dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}"
{{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}
name="spec_id">
</select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini" ></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link" ></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
{{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/inference.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}}
<div class="inline field" style="padding: 1rem 0;">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__"
href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .train_specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>

+ 1
- 746
templates/repo/cloudbrain/trainjob/show.tmpl View File

@@ -1,746 +1 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css">
<style>
.model_file_bread {
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem;
}
.menuContent{
position: absolute;
background: #ffffff;
left: 0;
right: 26px;
top: 36px;
z-index:999;
border: 1px solid #96c8da;
border-top: 0;
border-bottom-right-radius: 4px;
border-bottom-left-radius: 4px;
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%);
}
</style>
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div style="float: right;">
{{if and ($.canDownload) (ne .Status "WAITING") }}
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model"
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{end}}
{{if and ($.canDownload) (ne .Status "WAITING") }}
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model">
{{$.i18n.Tr "repo.export_result_to_dataset"}}
<div class="export-popup" id="{{.VersionName}}-popup">
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div>
</div>
</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a>
{{end}}
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>

<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon"
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black"
id="{{.VersionName}}-duration-span">{{$.duration}}</span>
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}"><i
class="redo icon redo-color"></i></span>

</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item detail-log-tab" data-tab="third{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item load-model-file" data-tab="four{{$k}}" data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_creator"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.User.Name}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" id="{{.VersionName}}-startTime">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
--
{{end}}
</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
id="{{.VersionName}}-duration">
{{$.duration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.model_name"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelconvert.modelversion"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "cloudbrain.mirror"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn" style="cursor:pointer"
data-clipboard-text="{{.Image}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-error="{{$.i18n.Tr "repo.copy_link_error"}}"
data-content="{{$.i18n.Tr "repo.copy_link"}}"
data-variation="inverted tiny"
>
<span title="{{.Image}}">{{.Image}}</span>
</span>
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span>
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}}
</td>

<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w"></div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content spec">
<div class="text-span text-span-w"></div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Description}}">
{{if .Description}}{{.Description}}{{else}}--{{end}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div style="clear:both">
{{if $.datasetDownload}}
<table style="border:none" class="ui fixed small stackable table">
<thead>
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th>
</tr></thead>
<tbody>
{{range $m ,$n := $.datasetDownload}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
{{if eq .IsDelete true}}
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}})
{{else}}
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a>
{{end}}
</td>
</tr>
{{end}}

</tbody>
</table>
{{end}}
</div>
</div>

</div>
</div>
<div class="ui tab" data-tab="third{{$k}}">
<div class="detail-log-content detail-log-content-{{.VersionName}}"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }}
</div>
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;">
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div>
<div style="padding:0 50px 10px 30px;height:100%">
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }}
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="four{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div style="display: flex;justify-content: space-between;">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">result</div>
<div class="divider"> / </div>
</div>
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}&jobName={{.JobName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span>
</a>
</div>
<div id="dir_list{{.VersionName}}">
</div>
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;">
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i>
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span>
</div>
</div>
</div>
</div>
</div>
<!-- {{template "custom/max_log" .}} -->
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> 删除任务
</div>

<div class="content">
<p>你确认删除该任务么?此任务一旦删除不可恢复。</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> 取消操作
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> 确定操作
</div>
</div>
</div>
</div>
<!--
<div id="menuContent" class="menuContent" style="display:none; position: absolute;z-index:9999">
<ul id="treeDemo" class="ztree" style="margin-top:0; width: 83%; height: 100%;"></ul>
</div> -->
<!-- 创建模型 -->
<div id="newmodel">
<div class="ui modal second">
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);">
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4>
</div>
<div class="content content-padding">
<form id="formId" method="POST" class="ui form">
<div class="ui error message">
</div>
{{$.CsrfTokenHtml}}
<input type="hidden" name="trainTaskCreate" value="true">

<div class="required inline field">
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label>
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required>
<input type="hidden" id="versionName" name="versionName" value="V0001">
<input style="width: 45%;" id="JobName" readonly required>
</div>

<div class="required inline field" id="modelname">
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label>
<input style="width: 45%;" id="name" name="name" required maxlength="25"
onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div>
<div class="required inline field" id="verionname">
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label>
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255">
</div>
<div class="unite min_title inline field required">
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label>
<div class="ui dropdown selection search width70" id="choice_Engine">
<input type="hidden" id="engine" name="engine" required>
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div>
<i class="dropdown icon"></i>
<div class="menu" id="job-Engine">
<option class="active item" data-value="0">PyTorch</option>
<option class="item" data-value="1">TensorFlow</option>
<option class="item" data-value="4">PaddlePaddle</option>
<option class="item" data-value="5">OneFlow</option>
<option class="item" data-value="6">MXNet</option>
<option class="item" data-value="3">Other</option>
</div>
</div>

</div>
<div class="unite min_title inline fields required">
<div class="field required">
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label>
</div>
<div class="thirteen wide field" style="position:relative">
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile">
<div id="menuContent" class="menuContent" style="display:none;">
<ul id="treeDemo" class="ztree"></ul>
</div>
</div>
</div>
<div class="inline field">
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label>
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255"
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'>
</div>
{{if eq $.Repository.IsPrivate false}}
<div class="inline fields">
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}}&nbsp;&nbsp;&nbsp;</label>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" checked="checked" value="false">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label>
</div>
</div>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" value="true">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label>
</div>
</div>
</div>
{{end}}
<div class="inline field">
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label>
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3"
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}'
onchange="this.value=this.value.substring(0, 255)"
onkeydown="this.value=this.value.substring(0, 255)"
onkeyup="this.value=this.value.substring(0, 256)"></textarea>
</div>

<div class="inline field" style="margin-left: 75px;">
<button onclick="createModel()" type="button" class="ui create_train_job green button"
style="position: absolute;">
{{.i18n.Tr "repo.model.manage.sava_model"}}
</button>
</div>
</form>
<div class="actions" style="display: inline-block;margin-left: 180px;">
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button>
</div>
</div>
</div>
</div>
{{template "custom/export_dataset" .}}
</div>
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
var userName;
var repoPath;
$(document).ready(function(){
var url = window.location.href;
var urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
});
var setting = {
check: {
enable: true,
chkboxType: {"Y":"ps", "N":"ps"}
},
view: {
dblClickExpand: false
},
callback: {
beforeClick: beforeClick,
onCheck: onCheck
}
};

function beforeClick(treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo");
zTree.checkNode(treeNode, !treeNode.checked, null, true);
return false;
}
function onCheck(e, treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo"),
nodes = zTree.getCheckedNodes(true),
v = "";
for (var i=0, l=nodes.length; i<l; i++) {
if(nodes[i].isParent){
continue;
}
var pathNodes = nodes[i].getPath();
var path ="";
for(var j=0;j<pathNodes.length;j++){
if(j ==0){
path += pathNodes[j].name;
}else{
path += "/" + pathNodes[j].name;
}
}
v += path + ";";
}
if (v.length > 0 ) v = v.substring(0, v.length-1);
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", v);
}
function showMenu() {
var cityObj = $("#modelSelectedFile");
var cityOffset = $("#modelSelectedFile").offset();
//$("#menuContent").css({left:cityOffset.left + "px", top:cityOffset.top + cityObj.outerHeight() + "px"}).slideDown("fast");
$("#menuContent").slideDown("fast");
$("body").bind("mousedown", onBodyDown);
}
function hideMenu() {
$("#menuContent").fadeOut("fast");
$("body").unbind("mousedown", onBodyDown);
}
function onBodyDown(event) {
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) {
hideMenu();
}
}
let dirKey="isOnlyDir--:&";
function loadSelectedModelFile(trainJob){
console.log("trainJob=" + trainJob);
$('#choice_file').dropdown('clear')
$("#model-file").empty()
if(trainJob ==null || trainJob ==""){
console.log("trainJob is null");
}else{
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=0&VersionName=${trainJob.VersionName}`, (data) => {
const n_length = data.length
let file_html=''
let firstFileName =''
var zNodes=[];
var nodesMap={};
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
break;
}
if(parentNodeMap[fileSplits[j]] == null){
parentNodeMap[fileSplits[j]] = {};
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
if(data[i].FileName[data[i].FileName.length -1] =="/"){
if(Object.keys(parentNodeMap).length ==0){
parentNodeMap[dirKey]="true";
}
}
break;
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
convertToNode(zNodes,nodesMap);
$.fn.zTree.init($("#treeDemo"), setting, zNodes);
})
}
}

function convertToNode(nodeList,nodesMap){
var keyList = Object.keys(nodesMap);
keyList.sort(function(a,b){
return a-b;
});
var isFirst = true;
for(var i=0; i<keyList.length;i++){
var node = {};
node["name"] = keyList[i];
nodeList.push(node);
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){
if(nodesMap[keyList[i]][dirKey] != null){
node["open"] = false;
node["isParent"] = true;
}else{
node["children"]=[];
if(isFirst){
node["open"] = true;
isFirst= false;
}
convertToNode(node["children"],nodesMap[keyList[i]]);
}
}
}
}
function showcreate(obj) {
$('.ui.modal.second')
.modal({
centered: false,
onShow: function () {
$('input[name="version"]').addClass('model_disabled')
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled')
$('input[name="jobId"]').val(obj.JobID)
$('input[name="versionName"]').val("V0001")
$('#choice_Engine .default.text').text("PyTorch");
$('#choice_Engine input[name="engine"]').val(0)
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" })
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" })
createModelName();
loadSelectedModelFile(obj);
},
onHide: function () {
$('.ui.dimmer').css({ "background-color": "" })
$('.ui.error.message').text()
$('.ui.error.message').css('display', 'none')
}
})
.modal('show')
}
function createModel() {
if(!$('input#modelSelectedFile').val()){
$('input#modelSelectedFile').parent().addClass('error')
return
}
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model`
let data = $("#formId").serialize()
var radio = document.getElementsByName("isPrivate");
if(radio == null || radio.length == 0){
data +="&isPrivate=true";
}
$("#mask").css({ "display": "block", "z-index": "9999" })
$.ajax({
url: url_href,
type: 'POST',
data: data,
success: function (res) {
const modelName = $('#formId #name').val();
$('input[name="engine_name"]').val("");
$('input[name="engine"]').val("");
$('input[name="jobId"]').val("");
$('input[name="label"]').val("");
$('input[name="description"]').val("");
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", "");
document.getElementById("formId").reset();
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}`
$('.ui.modal.second').modal('hide')
},
error: function (xhr) {
// 隐藏 loading
// 只有请求不正常(状态码不为200)才会执行
$('.ui.error.message').text(xhr.responseText)
$('.ui.error.message').css('display', 'block')
},
complete: function (xhr) {
$("#mask").css({ "display": "none", "z-index": "1" })
}
})

}
function createModelName() {
let repoName = location.pathname.split('/')[2]
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4)
$('#name').val(modelName)
$('#version').val("0.0.1")
}

$('.menu .item').tab()

$(document).ready(function () {
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } });
});
$(document).ready(function () {
$('.secondary.menu .item').tab();
});

;(function() {
var SPEC = {{ .Spec }};
var showPoint = false;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>
{{ template "repo/cloudbrain/cloudbraindetail" .}}

+ 746
- 0
templates/repo/cloudbrain/trainjob/show_ori.tmpl View File

@@ -0,0 +1,746 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css">
<style>
.model_file_bread {
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem;
}
.menuContent{
position: absolute;
background: #ffffff;
left: 0;
right: 26px;
top: 36px;
z-index:999;
border: 1px solid #96c8da;
border-top: 0;
border-bottom-right-radius: 4px;
border-bottom-left-radius: 4px;
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%);
}
</style>
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div style="float: right;">
{{if and ($.canDownload) (ne .Status "WAITING") }}
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model"
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{end}}
{{if and ($.canDownload) (ne .Status "WAITING") }}
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model">
{{$.i18n.Tr "repo.export_result_to_dataset"}}
<div class="export-popup" id="{{.VersionName}}-popup">
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div>
</div>
</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a>
{{end}}
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>

<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon"
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black"
id="{{.VersionName}}-duration-span">{{$.duration}}</span>
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}"><i
class="redo icon redo-color"></i></span>

</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item detail-log-tab" data-tab="third{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item load-model-file" data-tab="four{{$k}}" data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_creator"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.User.Name}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" id="{{.VersionName}}-startTime">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
--
{{end}}
</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
id="{{.VersionName}}-duration">
{{$.duration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.model_name"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelconvert.modelversion"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "cloudbrain.mirror"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn" style="cursor:pointer"
data-clipboard-text="{{.Image}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-error="{{$.i18n.Tr "repo.copy_link_error"}}"
data-content="{{$.i18n.Tr "repo.copy_link"}}"
data-variation="inverted tiny"
>
<span title="{{.Image}}">{{.Image}}</span>
</span>
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span>
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}}
</td>

<td class="ti-text-form-content resorce_type">
<div class="text-span text-span-w"></div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content spec">
<div class="text-span text-span-w"></div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Description}}">
{{if .Description}}{{.Description}}{{else}}--{{end}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div style="clear:both">
{{if $.datasetDownload}}
<table style="border:none" class="ui fixed small stackable table">
<thead>
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th>
</tr></thead>
<tbody>
{{range $m ,$n := $.datasetDownload}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
{{if eq .IsDelete true}}
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}})
{{else}}
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a>
{{end}}
</td>
</tr>
{{end}}

</tbody>
</table>
{{end}}
</div>
</div>

</div>
</div>
<div class="ui tab" data-tab="third{{$k}}">
<div class="detail-log-content detail-log-content-{{.VersionName}}"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }}
</div>
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;">
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div>
<div style="padding:0 50px 10px 30px;height:100%">
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }}
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="four{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div style="display: flex;justify-content: space-between;">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">result</div>
<div class="divider"> / </div>
</div>
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}&jobName={{.JobName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span>
</a>
</div>
<div id="dir_list{{.VersionName}}">
</div>
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;">
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i>
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span>
</div>
</div>
</div>
</div>
</div>
<!-- {{template "custom/max_log" .}} -->
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> 删除任务
</div>

<div class="content">
<p>你确认删除该任务么?此任务一旦删除不可恢复。</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> 取消操作
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> 确定操作
</div>
</div>
</div>
</div>
<!--
<div id="menuContent" class="menuContent" style="display:none; position: absolute;z-index:9999">
<ul id="treeDemo" class="ztree" style="margin-top:0; width: 83%; height: 100%;"></ul>
</div> -->
<!-- 创建模型 -->
<div id="newmodel">
<div class="ui modal second">
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);">
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4>
</div>
<div class="content content-padding">
<form id="formId" method="POST" class="ui form">
<div class="ui error message">
</div>
{{$.CsrfTokenHtml}}
<input type="hidden" name="trainTaskCreate" value="true">

<div class="required inline field">
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label>
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required>
<input type="hidden" id="versionName" name="versionName" value="V0001">
<input style="width: 45%;" id="JobName" readonly required>
</div>

<div class="required inline field" id="modelname">
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label>
<input style="width: 45%;" id="name" name="name" required maxlength="25"
onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div>
<div class="required inline field" id="verionname">
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label>
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255">
</div>
<div class="unite min_title inline field required">
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label>
<div class="ui dropdown selection search width70" id="choice_Engine">
<input type="hidden" id="engine" name="engine" required>
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div>
<i class="dropdown icon"></i>
<div class="menu" id="job-Engine">
<option class="active item" data-value="0">PyTorch</option>
<option class="item" data-value="1">TensorFlow</option>
<option class="item" data-value="4">PaddlePaddle</option>
<option class="item" data-value="5">OneFlow</option>
<option class="item" data-value="6">MXNet</option>
<option class="item" data-value="3">Other</option>
</div>
</div>

</div>
<div class="unite min_title inline fields required">
<div class="field required">
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label>
</div>
<div class="thirteen wide field" style="position:relative">
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile">
<div id="menuContent" class="menuContent" style="display:none;">
<ul id="treeDemo" class="ztree"></ul>
</div>
</div>
</div>
<div class="inline field">
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label>
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255"
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'>
</div>
{{if eq $.Repository.IsPrivate false}}
<div class="inline fields">
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}}&nbsp;&nbsp;&nbsp;</label>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" checked="checked" value="false">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label>
</div>
</div>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" value="true">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label>
</div>
</div>
</div>
{{end}}
<div class="inline field">
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label>
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3"
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}'
onchange="this.value=this.value.substring(0, 255)"
onkeydown="this.value=this.value.substring(0, 255)"
onkeyup="this.value=this.value.substring(0, 256)"></textarea>
</div>

<div class="inline field" style="margin-left: 75px;">
<button onclick="createModel()" type="button" class="ui create_train_job green button"
style="position: absolute;">
{{.i18n.Tr "repo.model.manage.sava_model"}}
</button>
</div>
</form>
<div class="actions" style="display: inline-block;margin-left: 180px;">
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button>
</div>
</div>
</div>
</div>
{{template "custom/export_dataset" .}}
</div>
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<script>
var userName;
var repoPath;
$(document).ready(function(){
var url = window.location.href;
var urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
});
var setting = {
check: {
enable: true,
chkboxType: {"Y":"ps", "N":"ps"}
},
view: {
dblClickExpand: false
},
callback: {
beforeClick: beforeClick,
onCheck: onCheck
}
};

function beforeClick(treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo");
zTree.checkNode(treeNode, !treeNode.checked, null, true);
return false;
}
function onCheck(e, treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo"),
nodes = zTree.getCheckedNodes(true),
v = "";
for (var i=0, l=nodes.length; i<l; i++) {
if(nodes[i].isParent){
continue;
}
var pathNodes = nodes[i].getPath();
var path ="";
for(var j=0;j<pathNodes.length;j++){
if(j ==0){
path += pathNodes[j].name;
}else{
path += "/" + pathNodes[j].name;
}
}
v += path + ";";
}
if (v.length > 0 ) v = v.substring(0, v.length-1);
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", v);
}
function showMenu() {
var cityObj = $("#modelSelectedFile");
var cityOffset = $("#modelSelectedFile").offset();
//$("#menuContent").css({left:cityOffset.left + "px", top:cityOffset.top + cityObj.outerHeight() + "px"}).slideDown("fast");
$("#menuContent").slideDown("fast");
$("body").bind("mousedown", onBodyDown);
}
function hideMenu() {
$("#menuContent").fadeOut("fast");
$("body").unbind("mousedown", onBodyDown);
}
function onBodyDown(event) {
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) {
hideMenu();
}
}
let dirKey="isOnlyDir--:&";
function loadSelectedModelFile(trainJob){
console.log("trainJob=" + trainJob);
$('#choice_file').dropdown('clear')
$("#model-file").empty()
if(trainJob ==null || trainJob ==""){
console.log("trainJob is null");
}else{
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=0&VersionName=${trainJob.VersionName}`, (data) => {
const n_length = data.length
let file_html=''
let firstFileName =''
var zNodes=[];
var nodesMap={};
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
break;
}
if(parentNodeMap[fileSplits[j]] == null){
parentNodeMap[fileSplits[j]] = {};
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
if(data[i].FileName[data[i].FileName.length -1] =="/"){
if(Object.keys(parentNodeMap).length ==0){
parentNodeMap[dirKey]="true";
}
}
break;
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
convertToNode(zNodes,nodesMap);
$.fn.zTree.init($("#treeDemo"), setting, zNodes);
})
}
}

function convertToNode(nodeList,nodesMap){
var keyList = Object.keys(nodesMap);
keyList.sort(function(a,b){
return a-b;
});
var isFirst = true;
for(var i=0; i<keyList.length;i++){
var node = {};
node["name"] = keyList[i];
nodeList.push(node);
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){
if(nodesMap[keyList[i]][dirKey] != null){
node["open"] = false;
node["isParent"] = true;
}else{
node["children"]=[];
if(isFirst){
node["open"] = true;
isFirst= false;
}
convertToNode(node["children"],nodesMap[keyList[i]]);
}
}
}
}
function showcreate(obj) {
$('.ui.modal.second')
.modal({
centered: false,
onShow: function () {
$('input[name="version"]').addClass('model_disabled')
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled')
$('input[name="jobId"]').val(obj.JobID)
$('input[name="versionName"]').val("V0001")
$('#choice_Engine .default.text').text("PyTorch");
$('#choice_Engine input[name="engine"]').val(0)
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" })
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" })
createModelName();
loadSelectedModelFile(obj);
},
onHide: function () {
$('.ui.dimmer').css({ "background-color": "" })
$('.ui.error.message').text()
$('.ui.error.message').css('display', 'none')
}
})
.modal('show')
}
function createModel() {
if(!$('input#modelSelectedFile').val()){
$('input#modelSelectedFile').parent().addClass('error')
return
}
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model`
let data = $("#formId").serialize()
var radio = document.getElementsByName("isPrivate");
if(radio == null || radio.length == 0){
data +="&isPrivate=true";
}
$("#mask").css({ "display": "block", "z-index": "9999" })
$.ajax({
url: url_href,
type: 'POST',
data: data,
success: function (res) {
const modelName = $('#formId #name').val();
$('input[name="engine_name"]').val("");
$('input[name="engine"]').val("");
$('input[name="jobId"]').val("");
$('input[name="label"]').val("");
$('input[name="description"]').val("");
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", "");
document.getElementById("formId").reset();
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}`
$('.ui.modal.second').modal('hide')
},
error: function (xhr) {
// 隐藏 loading
// 只有请求不正常(状态码不为200)才会执行
$('.ui.error.message').text(xhr.responseText)
$('.ui.error.message').css('display', 'block')
},
complete: function (xhr) {
$("#mask").css({ "display": "none", "z-index": "1" })
}
})

}
function createModelName() {
let repoName = location.pathname.split('/')[2]
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4)
$('#name').val(modelName)
$('#version').val("0.0.1")
}

$('.menu .item').tab()

$(document).ready(function () {
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } });
});
$(document).ready(function () {
$('.secondary.menu .item').tab();
});

;(function() {
var SPEC = {{ .Spec }};
var showPoint = false;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
$('td.ti-text-form-content.spec div').text(specStr);
SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType));
})();
</script>

+ 1
- 257
templates/repo/grampus/trainjob/gcu/new.tmpl View File

@@ -1,257 +1 @@
{{template "base/head" .}}
<style>
.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}
.width{
width:100% !important;
}
.width80{
width: 80.7% !important;
margin-left: 10px;
}
.width85{
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}
.width81{
width: 81% !important;
}
.width48 {
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.min{
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="image" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required unite min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!-- {{template "custom/select_model" .}} -->
<div class="required min_title inline field" id="engine_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label>
<select class="ui dropdown cloudbrain_image width81" id="trainjob_images" name="image_id">
{{if .image_id}}
{{range .images}}
{{if eq $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{range .images}}
{{if ne $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{else}}
{{range .images}}
<option name="image_id" value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
</select>
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GCU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link" data-position="right center" data-variation="mini"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">

<input style="border-radius: 0;text-align: center;"type="hidden" name="work_server_number" id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" readonly>
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
<option name="server_id" value="1">1</option>
</select>
</div>

</div>
</div>

<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>
{{ template "repo/cloudbrain/cloudbraincreate" .}}

+ 257
- 0
templates/repo/grampus/trainjob/gcu/new_ori.tmpl View File

@@ -0,0 +1,257 @@
{{template "base/head" .}}
<style>
.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}
.width{
width:100% !important;
}
.width80{
width: 80.7% !important;
margin-left: 10px;
}
.width85{
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}
.width81{
width: 81% !important;
}
.width48 {
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.min{
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="image" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required unite min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!-- {{template "custom/select_model" .}} -->
<div class="required min_title inline field" id="engine_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label>
<select class="ui dropdown cloudbrain_image width81" id="trainjob_images" name="image_id">
{{if .image_id}}
{{range .images}}
{{if eq $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{range .images}}
{{if ne $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{else}}
{{range .images}}
<option name="image_id" value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
</select>
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GCU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link" data-position="right center" data-variation="mini"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">

<input style="border-radius: 0;text-align: center;"type="hidden" name="work_server_number" id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" readonly>
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_id">
<option name="server_id" value="1">1</option>
</select>
</div>

</div>
</div>

<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>

+ 1
- 261
templates/repo/grampus/trainjob/gpu/new.tmpl View File

@@ -1,261 +1 @@
{{template "base/head" .}}
<style>

.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}
.width{
width:100% !important;
}
.width80{
width: 80.7% !important;
margin-left: 10px;
}
.width806{
width: 80.6% !important;
margin-left: -2px;
}
.width85{
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}
.width81{
width: 81% !important;
}
.width48{
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.min{
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.label-fix-width{
width: 140px !important;
text-align: right;
font-family: SourceHanSansSC-medium !important;
color: rgba(16, 16, 16, 100) !important;
font-size: 14px !important;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-image="2" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=CPU/GPU{{end}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_name" value="">
<input type="hidden" id="ai_flavor_name" name="flavor_name" value="">
<input type="hidden" id="ai_image_name" value="{{.image}}">

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div id="images-new-grampus">

<div id="images-new-cb">
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>


<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<!-- {{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}} -->
<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>
{{ template "repo/cloudbrain/cloudbraincreate" .}}

+ 261
- 0
templates/repo/grampus/trainjob/gpu/new_ori.tmpl View File

@@ -0,0 +1,261 @@
{{template "base/head" .}}
<style>

.min_title{
font-size: 14px !important;
margin-bottom: 2rem !important;
}
.width{
width:100% !important;
}
.width80{
width: 80.7% !important;
margin-left: 10px;
}
.width806{
width: 80.6% !important;
margin-left: -2px;
}
.width85{
width: 85% !important;
margin-left: 10.5rem !important;
align-items: center;
}
.width81{
width: 81% !important;
}
.width48{
width: 48.5% !important;
}
.add{font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 0px 5px 5px 0px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.min{
font-size: 18px;
padding: 0.5rem;
border: 1px solid rgba(187, 187, 187, 100);
border-radius: 5px 0px 0px 5px;
line-height: 21px;
text-align: center;
color: #C2C7CC;
}
.label-fix-width{
width: 140px !important;
text-align: right;
font-family: SourceHanSansSC-medium !important;
color: rgba(16, 16, 16, 100) !important;
font-size: 14px !important;
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-image="2" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=CPU/GPU{{end}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_name" value="">
<input type="hidden" id="ai_flavor_name" name="flavor_name" value="">
<input type="hidden" id="ai_image_name" value="{{.image}}">

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div id="images-new-grampus">

<div id="images-new-cb">
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>


<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<!-- {{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}} -->
<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>

+ 1
- 247
templates/repo/grampus/trainjob/npu/new.tmpl View File

@@ -1,247 +1 @@
{{template "base/head" .}}
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=NPU{{end}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_name" value="">
<input type="hidden" id="ai_flavor_name" name="flavor_name" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule_1" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required unite min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" data-multiple="true"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div class="required min_title inline field" id="engine_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label>
<select class="ui dropdown width48" id="trainjob_images" name="image_id">
{{if .image_id}}
{{range .images}}
{{if eq $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{range .images}}
{{if ne $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{else}}
{{range .images}}
<option name="image_id" value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
</select>
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_Example/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_number">
{{if .WorkNode}}
{{range .WorkNode}}
{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}
{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>

</div>
</div>
<!-- {{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}} -->
<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>
{{ template "repo/cloudbrain/cloudbraincreate" .}}

+ 247
- 0
templates/repo/grampus/trainjob/npu/new_ori.tmpl View File

@@ -0,0 +1,247 @@
{{template "base/head" .}}
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=NPU{{end}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_name" value="">
<input type="hidden" id="ai_flavor_name" name="flavor_name" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
Ascend NPU</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
GCU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule_1" | Safe}}</span>
</div>
</div>
<div class="required min_title inline field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="min_title inline field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required unite min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" data-multiple="true"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>
<div class="required min_title inline field" id="engine_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label>
<select class="ui dropdown width48" id="trainjob_images" name="image_id">
{{if .image_id}}
{{range .images}}
{{if eq $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{range .images}}
{{if ne $.image_id .ID}}
<option value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
{{else}}
{{range .images}}
<option name="image_id" value="{{.ID}}">{{.Name}}</option>
{{end}}
{{end}}
</select>
</div>

<div class="inline min_title field required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_Example/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">

</div>
</div>

<div class="required min_title inline field" id="flavor_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_number">
{{if .WorkNode}}
{{range .WorkNode}}
{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}
{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>

</div>
</div>
<!-- {{if not .IsCreate}}
<div class="inline min_title field" >
<label class="label-fix-width"></label>
<div class="ui checkbox" style="margin-right:1rem">
<input type="checkbox" name="is_continue" value="true">
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label>
</div>
<span >
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>
{{end}} -->
<div class="inline min_title field">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{ .Specs }};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
var backUrl = new URLSearchParams(window.location.search).get("backurl");
if (backUrl) {
$('.__btn-cancel-back__').attr('href', backUrl);
}
})();
</script>

+ 1
- 861
templates/repo/grampus/trainjob/show.tmpl View File

@@ -1,861 +1 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css">
<style>
.model_file_bread {
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem;
}
.menuContent{
position: absolute;
background: #ffffff;
left: 0;
right: 26px;
top: 36px;
z-index:999;
border: 1px solid #96c8da;
border-top: 0;
border-bottom-right-radius: 4px;
border-bottom-left-radius: 4px;
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%);
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/grampus/train-job" data-jobid="{{.JobID}}"
data-version="{{.VersionName}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div style="float: right;">
{{$.CsrfTokenHtml}}
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon"
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black"
id="{{.VersionName}}-duration-span">{{.TrainJobDuration}}</span>
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}">
<i class="redo icon redo-color"></i>
</span>
</div>
<div style="float: right;">
{{if and ($.canDownload) (ne .Status "WAITING")}}
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model"
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}},EngineName:{{.EngineName}},ComputeResource:{{.ComputeResource}},Type:{{.Type}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{end}}
{{if and ($.canDownload) (ne .Status "WAITING")}}
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model">
{{$.i18n.Tr "repo.export_result_to_dataset"}}
<div class="export-popup" id="{{.VersionName}}-popup">
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div>
</div>
</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a>
{{end}}
</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">

<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item detail-log-tab" data-tab="second{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
{{if ne .ComputeResource "GCU"}}
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}" data-path="{{$.RepoRelPath}}/grampus/train-job/{{.JobID}}/metrics">{{$.i18n.Tr "cloudbrain.resource_use"}}</a>
{{end}}
{{if eq .ComputeResource "CPU/GPU"}}
<a class="item run_info" data-tab="five{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
{{end}}
<a class="item load-model-file" data-tab="third{{$k}}" data-can-reschedule="{{$.canReschedule}}" data-retry-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model/reschedule" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_creator"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.User.Name}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.run_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{if .VersionName}}{{.VersionName}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" id="{{.VersionName}}-startTime">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
--
{{end}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
id="{{.VersionName}}-duration">
{{.TrainJobDuration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.model_name"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelconvert.modelversion"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
{{ if eq $.Spec.ComputeResource "GPU"}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "cloudbrain.mirror"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn-image" style="cursor:pointer"
data-clipboard-text="{{.EngineName}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-error="{{$.i18n.Tr "repo.copy_link_error"}}"
data-content="{{$.i18n.Tr "repo.copy_link"}}"
data-variation="inverted tiny"
>
<span title="{{.EngineName}}">{{.EngineName}}</span>
</span>
</div>
</td>
</tr>
{{else}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.AI_driver"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.EngineName}}
</div>
</td>
</tr>
{{end}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.grampus.train_job.ai_center"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-ai_center">
{{if $.ai_center}}{{$.ai_center}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content spec">
<div class="text-span text-span-w"></div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.compute_node"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.WorkServerNumber}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
title="{{.Description}}">
{{if .Description}}{{.Description}}{{else}}--{{end}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div style="clear:both">
<table style="border:none" class="ui fixed small stackable table">
<thead>
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th>
</tr></thead>
<tbody>
{{range $m ,$n := $.datasetDownload}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
{{if eq .IsDelete true}}
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}})
{{else}}
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a>
{{end}}
</td>
</tr>
{{end}}
</tbody>
</table>
</div>
</div>
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div class="detail-log-content detail-log-content-{{.VersionName}}"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-noscroll="true"
data-logapiurl="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_log"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }}
</div>
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;">
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div>
<div style="padding:0 50px 10px 30px;height:100%">
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-noscroll="true"
data-logapiurl="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_log"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }}
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="five{{$k}}">
<div style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<div class="ui attached info" id="info{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<span class="info_text">
</span>
</div>

</div>

</div>
<div class="ui tab" data-tab="four{{$k}}" style="position: relative;">
<i class="ri-refresh-line metric_chart"
style="position: absolute;right: 25%;color:#3291f8;z-index:99;cursor: pointer;"
data-version="{{.VersionName}}"></i>
<div class="node-select" style="display:none;">
<select class="ui dropdown"></select>
</div>
<div class="ui inverted active dimmer" style="display: none;">
<div class="ui loader"></div>
</div>
<div id="metric-{{.VersionName}}" style="height: 260px;width: 870px;"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-workservernumber="{{.WorkServerNumber}}"
></div>
</div>
<div class="ui tab" data-tab="third{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div style="display: flex;justify-content: space-between;">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">{{.VersionName}}</div>
<div class="divider"> / </div>
</div>
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span>
</a>
</div>
<div id="dir_list{{.VersionName}}">
</div>
{{if eq .ComputeResource "CPU/GPU"}}
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;">
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i>
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span>
</div>
{{end}}
</div>

</div>
</div>
</div>
<!-- {{template "custom/max_log" .}} -->
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> {{.i18n.Tr "cloudbrain.delete_task"}}
</div>

<div class="content">
<p>{{.i18n.Tr "cloudbrain.task_delete_confirm"}}</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> {{.i18n.Tr "cloudbrain.operate_cancel"}}
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> {{.i18n.Tr "cloudbrain.operate_confirm"}}
</div>
</div>
</div>
</div>
<!-- 创建模型 -->
<div id="newmodel">
<div class="ui modal second">
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);">
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4>
</div>
<div class="content content-padding">
<form id="formId" method="POST" class="ui form">
<div class="ui error message">
</div>
{{$.CsrfTokenHtml}}
<input type="hidden" name="trainTaskCreate" value="true">

<div class="required inline field">
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label>
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required>
<input type="hidden" id="versionName" name="versionName" value="V0001">
<input style="width: 45%;" id="JobName" readonly required>
</div>

<div class="required inline field" id="modelname">
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label>
<input style="width: 45%;" id="name" name="name" required maxlength="25"
onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div>
<div class="required inline field" id="verionname">
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label>
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255">
</div>
<div class="unite min_title inline field required">
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label>
<div class="ui dropdown selection search width70" id="choice_Engine">
<input type="hidden" id="engine" name="engine" required>
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div>
<i class="dropdown icon"></i>
<div class="menu" id="job-Engine">
<option class="active item" data-value="0">PyTorch</option>
<option class="item" data-value="1">TensorFlow</option>
<option class="item" data-value="2">MindSpore</option>
<option class="item" data-value="4">PaddlePaddle</option>
<option class="item" data-value="5">OneFlow</option>
<option class="item" data-value="6">MXNet</option>
<option class="item" data-value="3">Other</option>
</div>
</div>

</div>
<div class="unite min_title inline fields required">
<div class="field required">
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label>
</div>
<div class="thirteen wide field" style="position:relative">
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile" >
<div id="menuContent" class="menuContent" style="display:none;">
<ul id="treeDemo" class="ztree"></ul>
</div>
</div>
</div>
<div class="inline field">
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label>
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255"
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'>
</div>
{{if eq $.Repository.IsPrivate false}}
<div class="inline fields">
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}}&nbsp;&nbsp;&nbsp;</label>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" checked="checked" value="false">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label>
</div>
</div>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" value="true">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label>
</div>
</div>
</div>
{{end}}
<div class="inline field">
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label>
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3"
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}'
onchange="this.value=this.value.substring(0, 255)"
onkeydown="this.value=this.value.substring(0, 255)"
onkeyup="this.value=this.value.substring(0, 256)"></textarea>
</div>

<div class="inline field" style="margin-left: 75px;">
<button onclick="createModel()" type="button" class="ui create_train_job green button"
style="position: absolute;">
{{.i18n.Tr "repo.model.manage.sava_model"}}
</button>
</div>
</form>
<div class="actions" style="display: inline-block;margin-left: 180px;">
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button>
</div>
</div>


</div>
</div>
{{template "custom/export_dataset" .}}
</div>
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPEC = {{ .Spec }};
var showPoint = false;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
$('td.ti-text-form-content.spec').text(specStr);
})();
var setting = {
check: {
enable: true,
chkboxType: {"Y":"ps", "N":"ps"}
},
view: {
dblClickExpand: false
},
callback: {
beforeClick: beforeClick,
onCheck: onCheck
}
};

function beforeClick(treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo");
zTree.checkNode(treeNode, !treeNode.checked, null, true);
return false;
}
function onCheck(e, treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo"),
nodes = zTree.getCheckedNodes(true),
v = "";
for (var i=0, l=nodes.length; i<l; i++) {
if(nodes[i].isParent){
continue;
}
var pathNodes = nodes[i].getPath();
var path ="";
for(var j=0;j<pathNodes.length;j++){
if(j ==0){
path += pathNodes[j].name;
}else{
path += "/" + pathNodes[j].name;
}
}
v += path + ";";
}
if (v.length > 0 ) v = v.substring(0, v.length-1);
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", v);
}
function showMenu() {
var cityObj = $("#modelSelectedFile");
var cityOffset = $("#modelSelectedFile").offset();
$("#menuContent").slideDown("fast");
$("body").bind("mousedown", onBodyDown);
}
function hideMenu() {
$("#menuContent").fadeOut("fast");
$("body").unbind("mousedown", onBodyDown);
}
function onBodyDown(event) {
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) {
hideMenu();
}
}
let dirKey="isOnlyDir--:&";
function loadSelectedModelFile(trainJob){
$('#choice_file').dropdown('clear')
$("#model-file").empty()
if(trainJob ==null || trainJob ==""){
}else{
let type = trainJob.Type;
if(type == 2){
if(trainJob.ComputeResource=="NPU"){
type=1;
}else{
type=0;
}
}
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=${type}&VersionName=${trainJob.VersionName}`, (data) => {
const n_length = data.length
let file_html=''
let firstFileName =''
var zNodes=[];
var nodesMap={};
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
break;
}
if(parentNodeMap[fileSplits[j]] == null){
parentNodeMap[fileSplits[j]] = {};
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
if(data[i].FileName[data[i].FileName.length -1] =="/"){
if(Object.keys(parentNodeMap).length ==0){
parentNodeMap[dirKey]="true";
}
}
break;
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
convertToNode(zNodes,nodesMap);
$.fn.zTree.init($("#treeDemo"), setting, zNodes);
})
}
}

function convertToNode(nodeList,nodesMap){
var keyList = Object.keys(nodesMap);
keyList.sort(function(a,b){
return a-b;
});
var isFirst = true;
for(var i=0; i<keyList.length;i++){
var node = {};
node["name"] = keyList[i];
nodeList.push(node);
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){
if(nodesMap[keyList[i]][dirKey] != null){
node["open"] = false;
node["isParent"] = true;
}else{
node["children"]=[];
if(isFirst){
node["open"] = true;
isFirst= false;
}
convertToNode(node["children"],nodesMap[keyList[i]]);
}
}
}
}
function showcreate(obj) {
$('.ui.modal.second')
.modal({
centered: false,
onShow: function () {
$('input[name="version"]').addClass('model_disabled')
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled')
$('input[name="jobId"]').val(obj.JobID)
$('input[name="versionName"]').val("V0001")
if(obj.ComputeResource=="NPU"){
if (obj.EngineName != null && obj.EngineName != "") {
srcEngine = obj.EngineName.split('-')[0]
srcEngine = srcEngine.trim().toLowerCase();
if (srcEngine == 'tensorflow') {
$('#choice_Engine .default.text').text("TensorFlow");
$('#choice_Engine input[name="engine"]').val(1)
}
if (srcEngine == 'mindspore') {
$('#choice_Engine .default.text').text("MindSpore");
$('#choice_Engine input[name="engine"]').val(2)
}
}
}else{
$('#choice_Engine .default.text').text("PyTorch");
$('#choice_Engine input[name="engine"]').val(0)
}
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" })
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" })
createModelName();
loadSelectedModelFile(obj);
},
onHide: function () {
$('.ui.dimmer').css({ "background-color": "" })
$('.ui.error.message').text()
$('.ui.error.message').css('display', 'none')
}
})
.modal('show')
}
function createModel() {
if(!$('input#modelSelectedFile').val()){
$('input#modelSelectedFile').parent().addClass('error')
return
}
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model`
let data = $("#formId").serialize()
var radio = document.getElementsByName("isPrivate");
if(radio == null || radio.length == 0){
data +="&isPrivate=true";
}
$("#mask").css({ "display": "block", "z-index": "9999" })
$.ajax({
url: url_href,
type: 'POST',
data: data,
success: function (res) {
const modelName = $('#formId #name').val();
$('input[name="engine_name"]').val("");
$('input[name="engine"]').val("");
$('input[name="jobId"]').val("");
$('input[name="label"]').val("");
$('input[name="description"]').val("");
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", "");
document.getElementById("formId").reset();
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}`
$('.ui.modal.second').modal('hide')
},
error: function (xhr) {
// 隐藏 loading
// 只有请求不正常(状态码不为200)才会执行
$('.ui.error.message').text(xhr.responseText)
$('.ui.error.message').css('display', 'block')
},
complete: function (xhr) {
$("#mask").css({ "display": "none", "z-index": "1" })
}
})

}
function createModelName() {
let repoName = location.pathname.split('/')[2]
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4)
$('#name').val(modelName)
$('#version').val("0.0.1")
}

$('.menu .item').tab()
$(document).ready(function () {
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } });
});
$(document).ready(function () {
$('.secondary.menu .item').tab();
});

let userName
let repoPath
let jobID
let downlaodFlag = {{ $.canDownload }}
$(document).ready(function () {
let url = window.location.href;
let urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
jobID = urlArr.slice(-1)[0]
})
function stopBubbling(e) {
e = window.event || e;
if (e.stopPropagation) {
e.stopPropagation(); //阻止事件 冒泡传播
} else {
e.cancelBubble = true; //ie兼容
}
}

function renderSize(value) {
if (null == value || value == '') {
return "0 Bytes";
}
var unitArr = new Array("Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB");
var index = 0;
var srcsize = parseFloat(value);
index = Math.floor(Math.log(srcsize) / Math.log(1024));
var size = srcsize / Math.pow(1024, index);
size = size.toFixed(0);//保留的小数位数
return size + unitArr[index];
}
</script>
{{ template "repo/cloudbrain/cloudbraindetail" .}}

+ 861
- 0
templates/repo/grampus/trainjob/show_ori.tmpl View File

@@ -0,0 +1,861 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css">
<style>
.model_file_bread {
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem;
}
.menuContent{
position: absolute;
background: #ffffff;
left: 0;
right: 26px;
top: 36px;
z-index:999;
border: 1px solid #96c8da;
border-top: 0;
border-bottom-right-radius: 4px;
border-bottom-left-radius: 4px;
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%);
}
</style>
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/grampus/train-job" data-jobid="{{.JobID}}"
data-version="{{.VersionName}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div style="float: right;">
{{$.CsrfTokenHtml}}
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon"
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black"
id="{{.VersionName}}-duration-span">{{.TrainJobDuration}}</span>
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}">
<i class="redo icon redo-color"></i>
</span>
</div>
<div style="float: right;">
{{if and ($.canDownload) (ne .Status "WAITING")}}
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model"
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}},EngineName:{{.EngineName}},ComputeResource:{{.ComputeResource}},Type:{{.Type}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{end}}
{{if and ($.canDownload) (ne .Status "WAITING")}}
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model">
{{$.i18n.Tr "repo.export_result_to_dataset"}}
<div class="export-popup" id="{{.VersionName}}-popup">
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div>
</div>
</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a>
{{end}}
</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">

<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item detail-log-tab" data-tab="second{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
{{if ne .ComputeResource "GCU"}}
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}" data-path="{{$.RepoRelPath}}/grampus/train-job/{{.JobID}}/metrics">{{$.i18n.Tr "cloudbrain.resource_use"}}</a>
{{end}}
{{if eq .ComputeResource "CPU/GPU"}}
<a class="item run_info" data-tab="five{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
{{end}}
<a class="item load-model-file" data-tab="third{{$k}}" data-can-reschedule="{{$.canReschedule}}" data-retry-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model/reschedule" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_creator"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.User.Name}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.run_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{if .VersionName}}{{.VersionName}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" id="{{.VersionName}}-startTime">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
--
{{end}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
id="{{.VersionName}}-duration">
{{.TrainJobDuration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.model_name"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelconvert.modelversion"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
{{ if eq $.Spec.ComputeResource "GPU"}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "cloudbrain.mirror"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn-image" style="cursor:pointer"
data-clipboard-text="{{.EngineName}}"
data-success="{{$.i18n.Tr "repo.copy_link_success"}}"
data-error="{{$.i18n.Tr "repo.copy_link_error"}}"
data-content="{{$.i18n.Tr "repo.copy_link"}}"
data-variation="inverted tiny"
>
<span title="{{.EngineName}}">{{.EngineName}}</span>
</span>
</div>
</td>
</tr>
{{else}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.AI_driver"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.EngineName}}
</div>
</td>
</tr>
{{end}}
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>

<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.grampus.train_job.ai_center"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-ai_center">
{{if $.ai_center}}{{$.ai_center}}{{else}}--{{end}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content spec">
<div class="text-span text-span-w"></div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.compute_node"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.WorkServerNumber}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
title="{{.Description}}">
{{if .Description}}{{.Description}}{{else}}--{{end}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div style="clear:both">
<table style="border:none" class="ui fixed small stackable table">
<thead>
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th>
</tr></thead>
<tbody>
{{range $m ,$n := $.datasetDownload}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
{{if eq .IsDelete true}}
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}})
{{else}}
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a>
{{end}}
</td>
</tr>
{{end}}
</tbody>
</table>
</div>
</div>
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div class="detail-log-content detail-log-content-{{.VersionName}}"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-noscroll="true"
data-logapiurl="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_log"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }}
</div>
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;">
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div>
<div style="padding:0 50px 10px 30px;height:100%">
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-noscroll="true"
data-logapiurl="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/log"
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_log"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }}
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="five{{$k}}">
<div style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<div class="ui attached info" id="info{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<span class="info_text">
</span>
</div>

</div>

</div>
<div class="ui tab" data-tab="four{{$k}}" style="position: relative;">
<i class="ri-refresh-line metric_chart"
style="position: absolute;right: 25%;color:#3291f8;z-index:99;cursor: pointer;"
data-version="{{.VersionName}}"></i>
<div class="node-select" style="display:none;">
<select class="ui dropdown"></select>
</div>
<div class="ui inverted active dimmer" style="display: none;">
<div class="ui loader"></div>
</div>
<div id="metric-{{.VersionName}}" style="height: 260px;width: 870px;"
{{if eq .ComputeResource "NPU"}}
data-multiplenode="true"
{{end}}
data-workservernumber="{{.WorkServerNumber}}"
></div>
</div>
<div class="ui tab" data-tab="third{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div style="display: flex;justify-content: space-between;">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">{{.VersionName}}</div>
<div class="divider"> / </div>
</div>
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/grampus/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span>
</a>
</div>
<div id="dir_list{{.VersionName}}">
</div>
{{if eq .ComputeResource "CPU/GPU"}}
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;">
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i>
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span>
</div>
{{end}}
</div>

</div>
</div>
</div>
<!-- {{template "custom/max_log" .}} -->
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> {{.i18n.Tr "cloudbrain.delete_task"}}
</div>

<div class="content">
<p>{{.i18n.Tr "cloudbrain.task_delete_confirm"}}</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> {{.i18n.Tr "cloudbrain.operate_cancel"}}
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> {{.i18n.Tr "cloudbrain.operate_confirm"}}
</div>
</div>
</div>
</div>
<!-- 创建模型 -->
<div id="newmodel">
<div class="ui modal second">
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);">
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4>
</div>
<div class="content content-padding">
<form id="formId" method="POST" class="ui form">
<div class="ui error message">
</div>
{{$.CsrfTokenHtml}}
<input type="hidden" name="trainTaskCreate" value="true">

<div class="required inline field">
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label>
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required>
<input type="hidden" id="versionName" name="versionName" value="V0001">
<input style="width: 45%;" id="JobName" readonly required>
</div>

<div class="required inline field" id="modelname">
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label>
<input style="width: 45%;" id="name" name="name" required maxlength="25"
onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div>
<div class="required inline field" id="verionname">
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label>
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255">
</div>
<div class="unite min_title inline field required">
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label>
<div class="ui dropdown selection search width70" id="choice_Engine">
<input type="hidden" id="engine" name="engine" required>
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div>
<i class="dropdown icon"></i>
<div class="menu" id="job-Engine">
<option class="active item" data-value="0">PyTorch</option>
<option class="item" data-value="1">TensorFlow</option>
<option class="item" data-value="2">MindSpore</option>
<option class="item" data-value="4">PaddlePaddle</option>
<option class="item" data-value="5">OneFlow</option>
<option class="item" data-value="6">MXNet</option>
<option class="item" data-value="3">Other</option>
</div>
</div>

</div>
<div class="unite min_title inline fields required">
<div class="field required">
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label>
</div>
<div class="thirteen wide field" style="position:relative">
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile" >
<div id="menuContent" class="menuContent" style="display:none;">
<ul id="treeDemo" class="ztree"></ul>
</div>
</div>
</div>
<div class="inline field">
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label>
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255"
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'>
</div>
{{if eq $.Repository.IsPrivate false}}
<div class="inline fields">
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}}&nbsp;&nbsp;&nbsp;</label>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" checked="checked" value="false">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label>
</div>
</div>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" value="true">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label>
</div>
</div>
</div>
{{end}}
<div class="inline field">
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label>
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3"
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}'
onchange="this.value=this.value.substring(0, 255)"
onkeydown="this.value=this.value.substring(0, 255)"
onkeyup="this.value=this.value.substring(0, 256)"></textarea>
</div>

<div class="inline field" style="margin-left: 75px;">
<button onclick="createModel()" type="button" class="ui create_train_job green button"
style="position: absolute;">
{{.i18n.Tr "repo.model.manage.sava_model"}}
</button>
</div>
</form>
<div class="actions" style="display: inline-block;margin-left: 180px;">
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button>
</div>
</div>


</div>
</div>
{{template "custom/export_dataset" .}}
</div>
{{template "base/footer" .}}
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script>
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPEC = {{ .Spec }};
var showPoint = false;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
$('td.ti-text-form-content.spec').text(specStr);
})();
var setting = {
check: {
enable: true,
chkboxType: {"Y":"ps", "N":"ps"}
},
view: {
dblClickExpand: false
},
callback: {
beforeClick: beforeClick,
onCheck: onCheck
}
};

function beforeClick(treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo");
zTree.checkNode(treeNode, !treeNode.checked, null, true);
return false;
}
function onCheck(e, treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo"),
nodes = zTree.getCheckedNodes(true),
v = "";
for (var i=0, l=nodes.length; i<l; i++) {
if(nodes[i].isParent){
continue;
}
var pathNodes = nodes[i].getPath();
var path ="";
for(var j=0;j<pathNodes.length;j++){
if(j ==0){
path += pathNodes[j].name;
}else{
path += "/" + pathNodes[j].name;
}
}
v += path + ";";
}
if (v.length > 0 ) v = v.substring(0, v.length-1);
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", v);
}
function showMenu() {
var cityObj = $("#modelSelectedFile");
var cityOffset = $("#modelSelectedFile").offset();
$("#menuContent").slideDown("fast");
$("body").bind("mousedown", onBodyDown);
}
function hideMenu() {
$("#menuContent").fadeOut("fast");
$("body").unbind("mousedown", onBodyDown);
}
function onBodyDown(event) {
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) {
hideMenu();
}
}
let dirKey="isOnlyDir--:&";
function loadSelectedModelFile(trainJob){
$('#choice_file').dropdown('clear')
$("#model-file").empty()
if(trainJob ==null || trainJob ==""){
}else{
let type = trainJob.Type;
if(type == 2){
if(trainJob.ComputeResource=="NPU"){
type=1;
}else{
type=0;
}
}
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=${type}&VersionName=${trainJob.VersionName}`, (data) => {
const n_length = data.length
let file_html=''
let firstFileName =''
var zNodes=[];
var nodesMap={};
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
break;
}
if(parentNodeMap[fileSplits[j]] == null){
parentNodeMap[fileSplits[j]] = {};
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
if(data[i].FileName[data[i].FileName.length -1] =="/"){
if(Object.keys(parentNodeMap).length ==0){
parentNodeMap[dirKey]="true";
}
}
break;
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
convertToNode(zNodes,nodesMap);
$.fn.zTree.init($("#treeDemo"), setting, zNodes);
})
}
}

function convertToNode(nodeList,nodesMap){
var keyList = Object.keys(nodesMap);
keyList.sort(function(a,b){
return a-b;
});
var isFirst = true;
for(var i=0; i<keyList.length;i++){
var node = {};
node["name"] = keyList[i];
nodeList.push(node);
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){
if(nodesMap[keyList[i]][dirKey] != null){
node["open"] = false;
node["isParent"] = true;
}else{
node["children"]=[];
if(isFirst){
node["open"] = true;
isFirst= false;
}
convertToNode(node["children"],nodesMap[keyList[i]]);
}
}
}
}
function showcreate(obj) {
$('.ui.modal.second')
.modal({
centered: false,
onShow: function () {
$('input[name="version"]').addClass('model_disabled')
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled')
$('input[name="jobId"]').val(obj.JobID)
$('input[name="versionName"]').val("V0001")
if(obj.ComputeResource=="NPU"){
if (obj.EngineName != null && obj.EngineName != "") {
srcEngine = obj.EngineName.split('-')[0]
srcEngine = srcEngine.trim().toLowerCase();
if (srcEngine == 'tensorflow') {
$('#choice_Engine .default.text').text("TensorFlow");
$('#choice_Engine input[name="engine"]').val(1)
}
if (srcEngine == 'mindspore') {
$('#choice_Engine .default.text').text("MindSpore");
$('#choice_Engine input[name="engine"]').val(2)
}
}
}else{
$('#choice_Engine .default.text').text("PyTorch");
$('#choice_Engine input[name="engine"]').val(0)
}
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" })
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" })
createModelName();
loadSelectedModelFile(obj);
},
onHide: function () {
$('.ui.dimmer').css({ "background-color": "" })
$('.ui.error.message').text()
$('.ui.error.message').css('display', 'none')
}
})
.modal('show')
}
function createModel() {
if(!$('input#modelSelectedFile').val()){
$('input#modelSelectedFile').parent().addClass('error')
return
}
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model`
let data = $("#formId").serialize()
var radio = document.getElementsByName("isPrivate");
if(radio == null || radio.length == 0){
data +="&isPrivate=true";
}
$("#mask").css({ "display": "block", "z-index": "9999" })
$.ajax({
url: url_href,
type: 'POST',
data: data,
success: function (res) {
const modelName = $('#formId #name').val();
$('input[name="engine_name"]').val("");
$('input[name="engine"]').val("");
$('input[name="jobId"]').val("");
$('input[name="label"]').val("");
$('input[name="description"]').val("");
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", "");
document.getElementById("formId").reset();
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}`
$('.ui.modal.second').modal('hide')
},
error: function (xhr) {
// 隐藏 loading
// 只有请求不正常(状态码不为200)才会执行
$('.ui.error.message').text(xhr.responseText)
$('.ui.error.message').css('display', 'block')
},
complete: function (xhr) {
$("#mask").css({ "display": "none", "z-index": "1" })
}
})

}
function createModelName() {
let repoName = location.pathname.split('/')[2]
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4)
$('#name').val(modelName)
$('#version').val("0.0.1")
}

$('.menu .item').tab()
$(document).ready(function () {
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } });
});
$(document).ready(function () {
$('.secondary.menu .item').tab();
});

let userName
let repoPath
let jobID
let downlaodFlag = {{ $.canDownload }}
$(document).ready(function () {
let url = window.location.href;
let urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
jobID = urlArr.slice(-1)[0]
})
function stopBubbling(e) {
e = window.event || e;
if (e.stopPropagation) {
e.stopPropagation(); //阻止事件 冒泡传播
} else {
e.cancelBubble = true; //ie兼容
}
}

function renderSize(value) {
if (null == value || value == '') {
return "0 Bytes";
}
var unitArr = new Array("Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB");
var index = 0;
var srcsize = parseFloat(value);
index = Math.floor(Math.log(srcsize) / Math.log(1024));
var size = srcsize / Math.pow(1024, index);
size = size.toFixed(0);//保留的小数位数
return size + unitArr[index];
}
</script>

+ 3
- 288
templates/repo/modelarts/trainjob/index.tmpl View File

@@ -1,293 +1,8 @@
<!-- 头部导航栏 -->
{{template "base/head" .}}

<style>
.padding0{
padding: 0 !important;
}
</style>

<!-- 弹窗 -->
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>

<!-- 提示框 -->
<div class="alert"></div>

<link rel="stylesheet" href="{{StaticUrlPrefix}}/css/vp-cloudbrain-list.css?v={{MD5 AppVer}}" />
<div class="repository release dataset-list view">
{{template "repo/header" .}}
<!-- 列表容器 -->
<div class="ui container">
{{template "base/alert" .}}
<div class="ui two column stackable grid ">
<div class="column">
<div class="ui blue small menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/debugjob?debugListType=all">{{$.i18n.Tr "repo.modelarts.notebook"}}</a>
<a class="active item" href="{{.RepoLink}}/modelarts/train-job?modelarts/train-job?listType=all">{{$.i18n.Tr "repo.modelarts.train_job"}}</a>
<a class="item" href="{{.RepoLink}}/modelarts/inference-job">{{$.i18n.Tr "repo.modelarts.infer_job"}}</a>
<a class="item" href="{{.RepoLink}}/cloudbrain/benchmark">{{$.i18n.Tr "repo.modelarts.evaluate_job"}}</a>
<a class="item" href="{{.RepoLink}}/grampus/onlineinfer">{{$.i18n.Tr "repo.modelarts.online_infer"}}</a>
{{if MLOPS}}
<a class="item" href="{{MlopsHost}}/AIStudio/mlops/deploy-online/edge-inference?reponame={{.Repository.Name}}&repoId={{.Repository.ID}}" target="_blank">
在线推理1
</a>
<a class="item" href="{{MlopsHost}}/AIStudio/mlops/pipeline/list?reponame={{.Repository.Name}}&repoId={{.Repository.ID}}" target="_blank">
流水线
</a>
{{end}}
</div>
</div>
<div class="column right aligned">
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;">
{{svg "octicon-server" 16}}
<div class="default text" style="color: rgba(0,0,0,.87);"></div>
<i class="dropdown icon"></i>
<div class="menu">
<div class="item" data-value="all">{{$.i18n.Tr "repo.gpu_type_all"}}</div>
<div class="item" data-value="CPU/GPU">CPU/GPU</div>
<div class="item" data-value="NPU">NPU</div>
<div class="item" data-value="GCU">GCU</div>
</div>
</div>
{{if .Permission.CanWrite $.UnitTypeCloudBrain}}
<a class="ui green button" href="{{.RepoLink}}/cloudbrain/train-job/create">{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{else}}
<a class="ui disabled button" >{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{end}}
</div>
</div>
{{if eq 0 (len .Tasks)}}
<div class="ui placeholder segment bgtask-none">
<div class="ui icon header bgtask-header-pic"></div>
<div class="bgtask-content-header">{{$.i18n.Tr "repo.train_task_not_created"}}</div>
<div class="bgtask-content">
{{if $.RepoIsEmpty}}
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.repo_not_initialized" .RepoLink | Safe}}</div>
{{end}}
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.dataset_desc"}}</div>
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.platform_instructions" "https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/train" | Safe}}</div>
</div>
</div>
{{else}}
<!-- 中下列表展示区 -->
<div class="ui grid">
<div class="row">
<div class="ui sixteen wide column">
<!-- 任务展示 -->
<div class="dataset list">

<!-- 表头 -->
<div class="ui grid stackable" style="background: #f0f0f0;;">
<div class="row">
<div class="three wide column padding0">
<span style="margin:0 6px">{{$.i18n.Tr "repo.cloudbrain_task"}}</span>
</div>
<div class="one wide column text center padding0">
<span style="margin:0 6px">{{$.i18n.Tr "repo.modelarts.version_nums"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.status"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.createtime"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_status_runtime"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.cluster.computing_resources"}}</span>
</div>
<div class="one wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_creator"}}</span>
</div>
<div class="three wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_operate"}}</span>
</div>
</div>
</div>

{{range .Tasks}}
<div class="ui grid stackable item">
<div class="row">
<!-- 任务名 -->
<div class="three wide column padding0">
<a class="title" href='{{if eq .Cloudbrain.Type 1 }}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}' title="{{.DisplayJobName}}" style="font-size: 14px;">

<span class="fitted" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span>
</a>
</div>
<!-- 版本数量 -->
<div class="one wide column text center padding0">
<span style="font-size: 12px;">{{.VersionCount}} </span>
</div>
<!-- 任务状态 -->
<div class="two wide column padding0" style="padding-left: 2.2rem !important;">
<span class="job-status" id="{{.JobID}}" data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
<span><i id="{{.JobID}}-icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="{{.JobID}}-text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
</div>
<!-- 任务创建时间 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;" class="">{{TimeSinceUnix .Cloudbrain.CreatedUnix $.Lang}}</span>
</div>
<!-- 任务运行时间 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;" id="duration-{{.JobID}}">{{.TrainJobDuration}}</span>
</div>
<!-- 计算资源 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;">
{{if eq .Cloudbrain.Type 2}}
{{$.i18n.Tr "cloudbrain.resource_cluster_c2net_simple"}}
{{else}}
{{$.i18n.Tr "cloudbrain.resource_cluster_openi_simple"}}
{{end}}
{{.ComputeResource}}</span>
</div>
<!-- 创建者 -->
<div class="one wide column text center padding0">
{{if .User.Name}}
<a href="{{AppSubUrl}}/{{.User.Name}}" title="{{.User.Name}}"><img class="ui avatar image" src="{{.User.RelAvatarLink}}"></a>
{{else}}
<a title="Ghost"><img class="ui avatar image" src="{{AppSubUrl}}/user/avatar/Ghost/-1"></a>
{{end}}
</div>

<div class="three wide column text center padding0">
<!-- 停止任务 -->
<div class="ui compact buttons">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{.JobID}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" data-repopath='{{$.RepoRelPath}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}' data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
{{$.i18n.Tr "repo.stop"}}
</a>
{{else}}
<a class="ui basic disabled button">
{{$.i18n.Tr "repo.stop"}}
</a>
{{end}}

</div>
<!-- 修改任务 -->
<div class="ui compact buttons">
{{$.CsrfTokenHtml}}
{{if and .CanModify (not .FineTune)}}
<a style="padding: 0.5rem 1rem;" class="ui basic blue button __btn_edit__" href="{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/create_version{{if .VersionName}}?version_name={{.VersionName}}{{end}}">
{{$.i18n.Tr "repo.modelarts.modify"}}
</a>
{{else}}
<a class="ui basic disabled button" style="{{if .FineTune}}display:none;{{end}}">
{{$.i18n.Tr "repo.modelarts.modify"}}
</a>
{{end}}

</div>
<!-- 删除任务 -->
<form class="ui compact buttons" id="delForm-{{.JobID}}" action='{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/del' method="post">
<input type="hidden" name="listType" value="{{$.ListType}}">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{.JobID}}" class="ui basic ai_delete blue button" style="border-radius: .28571429rem;">
{{$.i18n.Tr "repo.delete"}}
</a>
{{else}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" class="ui basic button disabled" style="border-radius: .28571429rem;">
{{$.i18n.Tr "repo.delete"}}
</a>
{{end}}
</form>
</div>
</div>
</div>
{{end}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>
{{template "custom/save_most_time" .}}
</div>
</div>
</div>
{{end}}

</div>

</div>
</div>

</div>

<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> {{.i18n.Tr "cloudbrain.delete_task"}}
</div>

<div class="content">
<p>{{.i18n.Tr "cloudbrain.task_delete_confirm"}}</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> {{.i18n.Tr "cloudbrain.operate_cancel"}}
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> {{.i18n.Tr "cloudbrain.operate_confirm"}}
</div>
</div>
</div>
</div>
<div id="__vue-root"></div>
</div>
<script src="{{StaticUrlPrefix}}/js/vp-cloudbrain-list.js?v={{MD5 AppVer}}"></script>
{{template "base/footer" .}}

<script>
let url = {{$.RepoLink}};
let all = {{$.i18n.Tr "repo.gpu_type_all"}}
$(document).ready(function(){
const params = new URLSearchParams(location.search)
if(!location.search){
$('.default.text').text(all)
}else{
if(!params.has('listType') || params.get('listType')=='all'){
$('.default.text').text(all)
}
else{
$('.default.text').text(params.get('listType'))
}
}

$('.ui.selection.dropdown').dropdown({
onChange:function(value){
location.href = `${url}/modelarts/train-job?listType=${value}`
}
})
})
document.addEventListener('DOMContentLoaded', function() {
var editbtns = $('.__btn_edit__');
var curHref = window.location.href;
for (var i = 0, iLen = editbtns.length; i < iLen; i++) {
var buttonEl = editbtns.eq(i);
var oHref = buttonEl.attr('href');
var hasSearch = oHref.split('?').length > 1;
buttonEl.attr('href', oHref + (hasSearch ? '&' : '?') + 'backurl=' + encodeURIComponent(curHref));
}
});
</script>

+ 292
- 0
templates/repo/modelarts/trainjob/index_ori.tmpl View File

@@ -0,0 +1,292 @@
<!-- 头部导航栏 -->
{{template "base/head" .}}

<style>
.padding0{
padding: 0 !important;
}
</style>

<!-- 弹窗 -->
<div id="mask">
<div id="loadingPage">
<div class="rect1"></div>
<div class="rect2"></div>
<div class="rect3"></div>
<div class="rect4"></div>
<div class="rect5"></div>
</div>
</div>

<!-- 提示框 -->
<div class="alert"></div>

<div class="repository release dataset-list view">
{{template "repo/header" .}}
<!-- 列表容器 -->
<div class="ui container">
{{template "base/alert" .}}
<div class="ui two column stackable grid ">
<div class="column">
<div class="ui blue small menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/debugjob?debugListType=all">{{$.i18n.Tr "repo.modelarts.notebook"}}</a>
<a class="active item" href="{{.RepoLink}}/modelarts/train-job?modelarts/train-job?listType=all">{{$.i18n.Tr "repo.modelarts.train_job"}}</a>
<a class="item" href="{{.RepoLink}}/modelarts/inference-job">{{$.i18n.Tr "repo.modelarts.infer_job"}}</a>
<a class="item" href="{{.RepoLink}}/cloudbrain/benchmark">{{$.i18n.Tr "repo.modelarts.evaluate_job"}}</a>
{{if MLOPS}}
<a class="item" href="{{MlopsHost}}/AIStudio/mlops/deploy-online/edge-inference?reponame={{.Repository.Name}}&repoId={{.Repository.ID}}" target="_blank">
在线推理
</a>
<a class="item" href="{{MlopsHost}}/AIStudio/mlops/pipeline/list?reponame={{.Repository.Name}}&repoId={{.Repository.ID}}" target="_blank">
流水线
</a>
{{end}}
</div>
</div>
<div class="column right aligned">
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;">
{{svg "octicon-server" 16}}
<div class="default text" style="color: rgba(0,0,0,.87);"></div>
<i class="dropdown icon"></i>
<div class="menu">
<div class="item" data-value="all">{{$.i18n.Tr "repo.gpu_type_all"}}</div>
<div class="item" data-value="CPU/GPU">CPU/GPU</div>
<div class="item" data-value="NPU">NPU</div>
<div class="item" data-value="GCU">GCU</div>
</div>
</div>
{{if .Permission.CanWrite $.UnitTypeCloudBrain}}
<a class="ui green button" href="{{.RepoLink}}/cloudbrain/train-job/create">{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{else}}
<a class="ui disabled button" >{{$.i18n.Tr "repo.modelarts.train_job.new_train"}}</a>
{{end}}
</div>
</div>
{{if eq 0 (len .Tasks)}}
<div class="ui placeholder segment bgtask-none">
<div class="ui icon header bgtask-header-pic"></div>
<div class="bgtask-content-header">{{$.i18n.Tr "repo.train_task_not_created"}}</div>
<div class="bgtask-content">
{{if $.RepoIsEmpty}}
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.repo_not_initialized" .RepoLink | Safe}}</div>
{{end}}
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.dataset_desc"}}</div>
<div class="bgtask-content-txt">{{$.i18n.Tr "repo.platform_instructions" | Safe}}</div>
</div>
</div>
{{else}}
<!-- 中下列表展示区 -->
<div class="ui grid">
<div class="row">
<div class="ui sixteen wide column">
<!-- 任务展示 -->
<div class="dataset list">

<!-- 表头 -->
<div class="ui grid stackable" style="background: #f0f0f0;;">
<div class="row">
<div class="three wide column padding0">
<span style="margin:0 6px">{{$.i18n.Tr "repo.cloudbrain_task"}}</span>
</div>
<div class="one wide column text center padding0">
<span style="margin:0 6px">{{$.i18n.Tr "repo.modelarts.version_nums"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.status"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.createtime"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_status_runtime"}}</span>
</div>
<div class="two wide column text center padding0">
<span>{{$.i18n.Tr "repo.modelarts.cluster.computing_resources"}}</span>
</div>
<div class="one wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_creator"}}</span>
</div>
<div class="three wide column text center padding0">
<span>{{$.i18n.Tr "repo.cloudbrain_operate"}}</span>
</div>
</div>
</div>

{{range .Tasks}}
<div class="ui grid stackable item">
<div class="row">
<!-- 任务名 -->
<div class="three wide column padding0">
<a class="title" href='{{if eq .Cloudbrain.Type 1 }}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}' title="{{.DisplayJobName}}" style="font-size: 14px;">

<span class="fitted" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span>
</a>
</div>
<!-- 版本数量 -->
<div class="one wide column text center padding0">
<span style="font-size: 12px;">{{.VersionCount}} </span>
</div>
<!-- 任务状态 -->
<div class="two wide column padding0" style="padding-left: 2.2rem !important;">
<span class="job-status" id="{{.JobID}}" data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
<span><i id="{{.JobID}}-icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="{{.JobID}}-text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
</div>
<!-- 任务创建时间 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;" class="">{{TimeSinceUnix .Cloudbrain.CreatedUnix $.Lang}}</span>
</div>
<!-- 任务运行时间 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;" id="duration-{{.JobID}}">{{.TrainJobDuration}}</span>
</div>
<!-- 计算资源 -->
<div class="two wide column text center padding0">
<span style="font-size: 12px;">
{{if eq .Cloudbrain.Type 2}}
{{$.i18n.Tr "cloudbrain.resource_cluster_c2net_simple"}}
{{else}}
{{$.i18n.Tr "cloudbrain.resource_cluster_openi_simple"}}
{{end}}
{{.ComputeResource}}</span>
</div>
<!-- 创建者 -->
<div class="one wide column text center padding0">
{{if .User.Name}}
<a href="{{AppSubUrl}}/{{.User.Name}}" title="{{.User.Name}}"><img class="ui avatar image" src="{{.User.RelAvatarLink}}"></a>
{{else}}
<a title="Ghost"><img class="ui avatar image" src="{{AppSubUrl}}/user/avatar/Ghost/-1"></a>
{{end}}
</div>

<div class="three wide column text center padding0">
<!-- 停止任务 -->
<div class="ui compact buttons">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{.JobID}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" data-repopath='{{$.RepoRelPath}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}' data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
{{$.i18n.Tr "repo.stop"}}
</a>
{{else}}
<a class="ui basic disabled button">
{{$.i18n.Tr "repo.stop"}}
</a>
{{end}}

</div>
<!-- 修改任务 -->
<div class="ui compact buttons">
{{$.CsrfTokenHtml}}
{{if and .CanModify (not .FineTune)}}
<a style="padding: 0.5rem 1rem;" class="ui basic blue button __btn_edit__" href="{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/create_version{{if .VersionName}}?version_name={{.VersionName}}{{end}}">
{{$.i18n.Tr "repo.modelarts.modify"}}
</a>
{{else}}
<a class="ui basic disabled button" style="{{if .FineTune}}display:none;{{end}}">
{{$.i18n.Tr "repo.modelarts.modify"}}
</a>
{{end}}

</div>
<!-- 删除任务 -->
<form class="ui compact buttons" id="delForm-{{.JobID}}" action='{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/del' method="post">
<input type="hidden" name="listType" value="{{$.ListType}}">
{{$.CsrfTokenHtml}}
{{if .CanDel}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{.JobID}}" class="ui basic ai_delete blue button" style="border-radius: .28571429rem;">
{{$.i18n.Tr "repo.delete"}}
</a>
{{else}}
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" class="ui basic button disabled" style="border-radius: .28571429rem;">
{{$.i18n.Tr "repo.delete"}}
</a>
{{end}}
</form>
</div>
</div>
</div>
{{end}}
<div id="app" style="margin-top: 2rem;">
<div class="center">
<el-pagination
background
@current-change="handleCurrentChange"
:current-page="page"
:page-sizes="[10]"
:page-size="10"
layout="total, sizes, prev, pager, next, jumper"
:total="{{.Page.Paginater.Total}}">
</el-pagination>
</div>
</div>
</div>
{{template "custom/save_most_time" .}}
</div>
</div>
</div>
{{end}}

</div>

</div>
</div>

</div>

<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> {{.i18n.Tr "cloudbrain.delete_task"}}
</div>

<div class="content">
<p>{{.i18n.Tr "cloudbrain.task_delete_confirm"}}</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> {{.i18n.Tr "cloudbrain.operate_cancel"}}
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> {{.i18n.Tr "cloudbrain.operate_confirm"}}
</div>
</div>
</div>
</div>
</div>
{{template "base/footer" .}}

<script>
let url = {{$.RepoLink}};
let all = {{$.i18n.Tr "repo.gpu_type_all"}}
$(document).ready(function(){
const params = new URLSearchParams(location.search)
if(!location.search){
$('.default.text').text(all)
}else{
if(!params.has('listType') || params.get('listType')=='all'){
$('.default.text').text(all)
}
else{
$('.default.text').text(params.get('listType'))
}
}

$('.ui.selection.dropdown').dropdown({
onChange:function(value){
location.href = `${url}/modelarts/train-job?listType=${value}`
}
})
})
document.addEventListener('DOMContentLoaded', function() {
var editbtns = $('.__btn_edit__');
var curHref = window.location.href;
for (var i = 0, iLen = editbtns.length; i < iLen; i++) {
var buttonEl = editbtns.eq(i);
var oHref = buttonEl.attr('href');
var hasSearch = oHref.split('?').length > 1;
buttonEl.attr('href', oHref + (hasSearch ? '&' : '?') + 'backurl=' + encodeURIComponent(curHref));
}
});
</script>

+ 1
- 269
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -1,269 +1 @@
{{template "base/head" .}}
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_names" value="">
<input type="hidden" id="ai_flaver_name" name="flaver_names" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>

<div class="required inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
CPU/GPU
</a>
<a class="active item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}}</span>
</div>
</div>
<div class="required inline min_title field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" data-multiple="true"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>

<div class="required inline min_title fields" style="width: 92.5%;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}}</label>
<div class="field" style="flex: 1.5;">
<select class="ui dropdown width" id="trainjob_engines">
{{range .engines}}
<option value="{{.Value}}">{{.Value}}</option>
{{end}}
</select>
</div>

<div class="field" style="flex: 2;" id="engine_name">
<select class="ui dropdown width" id="trainjob_engine_versions" name="engine_id">
{{if .engine_id}}
{{range .engine_versions}}
{{if eq $.engine_id .ID}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
{{range .engine_versions}}
{{if ne $.engine_id .ID}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
{{else}}
{{range .engine_versions}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
</select>

</div>

</div>

<div class="inline field min_title required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MINIST_Example" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">
</div>
</div>

<div class="required min_title field " style="display: none;">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.resource_pool"}}</label>
<select class="ui dropdown" id="trainjob_resource_pool" style='width:385px' name="pool_id">
{{range .resource_pools}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
</select>
</div>

<div class="required grouped fields" style="display: none;">
<label style="font-weight: normal;"
for="resource_type">{{.i18n.Tr "repo.modelarts.train_job.resource_type"}}</label>
<div class="field">
<div class="ui grid">
<div class="column">
<div class="ui radio checkbox">
<input type="radio" name="resource_type" checked="" tabindex="0">
</div>
</div>
<div class="three wide column">train-private-1</div>
<div class="three wide column">{{svg "octicon-verified" 16}} 运行中</div>
<div class="three wide column"> CPU:192 核 2048GiB</div>
</div>
</div>
</div>
<div class="required inline min_title field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}} style="color:red"></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_number">
{{if .WorkNode}}
{{range .WorkNode}}

{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}

{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>

</div>
</div>
<div class="inline field" style="padding: 1rem 0;">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{.Specs}};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
})();
</script>
{{ template "repo/cloudbrain/cloudbraincreate" .}}

+ 269
- 0
templates/repo/modelarts/trainjob/new_ori.tmpl View File

@@ -0,0 +1,269 @@
{{template "base/head" .}}
{{template "custom/global_mask" .}}
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div>
{{if eq .NotStopTaskCount 0}}
{{template "base/alert" .}}
{{end}}
{{template "custom/alert_cb" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
</h4>
<div class="ui attached segment">
<!-- equal width -->
<form id="form_id" class="ui form" action="{{.Link}}" method="post">
{{.CsrfTokenHtml}}
<input type="hidden" name="action" value="update">
<input type="hidden" id="ai_engine_name" name="engine_names" value="">
<input type="hidden" id="ai_flaver_name" name="flaver_names" value="">
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4>
<div class="required min_title inline field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>

<div class="required inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
CPU/GPU
</a>
<a class="active item" href="{{.RepoLink}}/modelarts/train-job/create">
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16"
height="16">
<path fill="none" d="M0 0h24v24H0z" />
<path
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" />
</svg>
Ascend NPU</a>
</div>
</div>
<div class="min_title inline field" style="margin-top:-10px;">
<label class="label-fix-width" style="font-weight: normal;"></label>
{{template "custom/task_wait_count" .}}
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;">
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i>
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.train_dataset_path_rule" | Safe}}</span>
</div>
</div>
<div class="required inline min_title field" style="margin-bottom: 0rem !important;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label>
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36">
</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label>
{{if .description}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea>
{{else}}
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea>
{{end}}
</div>
<div class="ui divider"></div>

<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4>


<div class="required inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label>
<select class="ui dropdown width80 left2" id="code_version" name="branch_name">
{{if .branch_name}}
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branch_name }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{else}}
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option>
{{range $k, $v :=.Branches}}
{{ if ne $v $.branchName }}
<option name="branch_name" value="{{$v}}">{{$v}}</option>
{{end}}
{{end}}
{{end}}
</select>
</div>
<!--{{template "custom/select_model" .}} -->
<div>
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" data-multiple="true"
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div>
<div id="select-multi-model"></div>
</div>

<div class="required inline min_title fields" style="width: 92.5%;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}}</label>
<div class="field" style="flex: 1.5;">
<select class="ui dropdown width" id="trainjob_engines">
{{range .engines}}
<option value="{{.Value}}">{{.Value}}</option>
{{end}}
</select>
</div>

<div class="field" style="flex: 2;" id="engine_name">
<select class="ui dropdown width" id="trainjob_engine_versions" name="engine_id">
{{if .engine_id}}
{{range .engine_versions}}
{{if eq $.engine_id .ID}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
{{range .engine_versions}}
{{if ne $.engine_id .ID}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
{{else}}
{{range .engine_versions}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
{{end}}
</select>

</div>

</div>

<div class="inline field min_title required">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label>
{{if .boot_file}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" >
{{else}}
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" >
{{end}}
<span>
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i>
</span>
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MINIST_Example" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a>
</div>

<div id="select-multi-dataset">

</div>
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span>
<div class="inline min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label>
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span>
<input id="store_run_para" type="hidden" name="run_para_list">
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}">
</div>
</div>

<div class="required min_title field " style="display: none;">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.resource_pool"}}</label>
<select class="ui dropdown" id="trainjob_resource_pool" style='width:385px' name="pool_id">
{{range .resource_pools}}
<option value="{{.ID}}">{{.Value}}</option>
{{end}}
</select>
</div>

<div class="required grouped fields" style="display: none;">
<label style="font-weight: normal;"
for="resource_type">{{.i18n.Tr "repo.modelarts.train_job.resource_type"}}</label>
<div class="field">
<div class="ui grid">
<div class="column">
<div class="ui radio checkbox">
<input type="radio" name="resource_type" checked="" tabindex="0">
</div>
</div>
<div class="three wide column">train-private-1</div>
<div class="three wide column">{{svg "octicon-verified" 16}} 运行中</div>
<div class="three wide column"> CPU:192 核 2048GiB</div>
</div>
</div>
</div>
<div class="required inline min_title field" id="flaver_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label>
<select class="ui dropdown width48" id="__specs__" name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}} style="color:red"></select>
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span>
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a>
{{if .CloudBrainPaySwitch}}
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;">
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span>
<span style="float:right;">
<i class="question circle icon link"></i>
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a>
</span>
</div>
{{end}}
</div>
<div class="inline required min_title field">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label>

<div class="ui labeled input" style="width: 5%;">
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select">
<select class="ui dropdown width" style='width: 100%;' name="work_server_number">
{{if .WorkNode}}
{{range .WorkNode}}

{{if $.work_server_number}}
{{if eq . $.work_server_number }}
<option name="server_id" selected value="{{.}}">{{.}}</option>
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{else}}
<option name="server_id" value="{{.}}">{{.}}</option>
{{end}}
{{end}}

{{else}}
<option name="server_id" value="1">1</option>
{{end}}
</select>
</div>

</div>
</div>
<div class="inline field" style="padding: 1rem 0;">
<label class="label-fix-width"></label>
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}">
{{.i18n.Tr "repo.cloudbrain.new"}}
</button>
<a class="ui button" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a>
</div>

<!-- 模态框 -->

</form>
</div>
</div>
</div>
{{template "base/footer" .}}
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script>
<script>
;(function() {
var SPECS = {{.Specs}};
var showPoint = {{ .CloudBrainPaySwitch }};
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}},
});
})();
</script>

+ 1
- 1
templates/repo/modelarts/trainjob/para_manage.tmpl View File

@@ -112,7 +112,7 @@
if (obj.style.color == "rgb(204, 204, 204)") {
return
} else {
var delId = obj.parentNode.id
var delId = obj.parentNode.getAttribute('id')
flag = 1;
$('.ui.basic.modal')
.modal({


+ 1
- 870
templates/repo/modelarts/trainjob/show.tmpl View File

@@ -1,870 +1 @@
{{template "base/head" .}}
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css">
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script>

<style>
.model_file_bread {
margin-bottom: -0.5rem !important;
padding-left: 1rem;
padding-top: 0.5rem;
}
.menuContent{
position: absolute;
background: #ffffff;
left: 0;
right: 26px;
top: 36px;
z-index:999;
border: 1px solid #96c8da;
border-top: 0;
border-bottom-right-radius: 4px;
border-bottom-left-radius: 4px;
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%);
}
</style>
{{template "custom/global_mask" .}}
<div class="alert"></div>
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<h4 class="ui header" id="vertical-segment">
<div class="ui breadcrumb">
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all">
{{.i18n.Tr "repo.cloudbrain"}}
</a>
<div class="divider"> / </div>
<a class="section" href="{{$.RepoLink}}/modelarts/train-job">
{{$.i18n.Tr "repo.modelarts.train_job"}}
</a>
<div class="divider"> / </div>
<div class="active section">{{.displayJobName}}</div>
</div>
</h4>
{{range $k ,$v := .version_list_task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}"
data-version="{{.VersionName}}">
<div class="{{if eq $k 0}}active{{end}} title padding0">
<div class="according-panel-heading">
<div class="accordion-panel-title">
<i class="dropdown icon"></i>
<span class="accordion-panel-title-content">
<span>
<div style="float: right;">
{{$.CsrfTokenHtml}}
{{if and (.CanModify) (ne .Status "WAITING")}}
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model"
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}},EngineID:{{.EngineID}},EngineName:{{.EngineName}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a>
{{end}}
{{if and (.CanModify) (ne .Status "WAITING")}}
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model">
{{$.i18n.Tr "repo.export_result_to_dataset"}}
<div class="export-popup" id="{{.VersionName}}-popup">
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div>
</div>
</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a>
{{end}}
{{if and .CanModify (not .FineTune)}}
<a class="ti-action-menu-item"
href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/create_version?version_name={{.VersionName}}&path=show">{{$.i18n.Tr "repo.modelarts.modify"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" style="{{if .FineTune}}display:none;{{end}}"
href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/create_version?version_name={{.VersionName}}">{{$.i18n.Tr "repo.modelarts.modify"}}</a>
{{end}}
{{if .CanDel}}
<a class="ti-action-menu-item stop-show-version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{end}}"
id="{{.VersionName}}-stop"
data-jobid="{{.JobID}}"
data-repopath="{{$.RepoRelPath}}/modelarts/train-job"
data-version = "{{.VersionName}}"
>{{$.i18n.Tr "repo.stop"}}</a>
{{else}}
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-stop">{{$.i18n.Tr "repo.stop"}}</a>
{{end}}


{{if .CanDel}}
<a class='ti-action-menu-item delete-show-version {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING" "RUNNING" "KILLING" "INIT"}}disabled {{end}}'
id="{{.VersionName}}-delete"
data-jobid="{{.JobID}}"
data-repopath="{{$.RepoRelPath}}/modelarts/train-job"
data-version = "{{.VersionName}}"
style="color: #FF4D4F;">{{$.i18n.Tr "repo.delete"}}</a>
{{else}}
<a class="ti-action-menu-item disabled"
style="color: #FF4D4F;">{{$.i18n.Tr "repo.delete"}}</a>
{{end}}
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">

<span class="cti-mgRight-sm">
{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}
</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span>
<span class="cti-mgRight-sm">
{{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon"
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text"
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
</span>
<span
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span>
<span class="cti-mgRight-sm uc-accordionTitle-black"
id="{{.VersionName}}-duration-span">{{.TrainJobDuration}}</span>
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}">
<i class="redo icon redo-color"></i>
</span>
</div>
</span>
</span>
</div>
</div>
</div>
<div class="{{if eq $k 0}}active{{end}} content">
<div class="content-pad">
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">

<a class="active item"
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item detail-log-tab" data-tab="second{{$k}}"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}" data-path="{{$.RepoRelPath}}/modelarts/train-job/{{.JobID}}/metric_statistics?version_name={{.VersionName}}&statistic_type=each&metrics=">{{$.i18n.Tr "cloudbrain.resource_use"}}</a>
<a class="item load-model-file" data-tab="third{{$k}}" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
<div class="tab_2_content">
<div class="ac-grid ac-grid-col2">
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_task"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.DisplayJobName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.status"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-status">
{{.Status}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.cloudbrain_creator"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" id="{{.VersionName}}-mirror">
{{.User.Name}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.run_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.VersionName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" id="{{.VersionName}}-startTime">
{{if not (eq .Cloudbrain.StartTime 0)}}
{{TimeSinceUnix1 .Cloudbrain.StartTime}}
{{else}}
--
{{end}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
id="{{.VersionName}}-duration">
{{.TrainJobDuration}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.model_name"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelconvert.modelversion"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div>
</td>
</tr>
</tbody>
</table>
</div>
<div class="ac-grid-col">
<table class="ti-form">
<tbody class="ti-text-form">
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.AI_driver"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.EngineName}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.code_version"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BranchName}}
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span>
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.BootFile}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w" title="{{.Parameters}}">
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}}
</div>
</td>
</tr>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.standard"}}
</td>

<td class="ti-text-form-content spec{{$k}}">
<div class="text-span text-span-w"></div>
</td>
<script>
;(function() {
var SPEC = {{ .Spec }};
var showPoint = false;
var specStr = window.renderSpecStr(SPEC, showPoint, {
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}},
free: {{$.i18n.Tr "cloudbrain.free"}},
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}},
memory: {{$.i18n.Tr "cloudbrain.memory"}},
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}},
});
$('td.ti-text-form-content.spec{{$k}} div').text(specStr);
})();
</script>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.compute_node"}}
</td>
<td class="ti-text-form-content">
<div class="text-span text-span-w">
{{.WorkServerNumber}}
</div>
</td>
</tr>
<tr class="ti-no-ng-animate">
<td class="ti-no-ng-animate ti-text-form-label text-width80">
{{$.i18n.Tr "repo.modelarts.train_job.description"}}
</td>

<td class="ti-text-form-content">
<div class="text-span text-span-w"
title="{{.Cloudbrain.Description}}">
{{if .Cloudbrain.Description}}{{.Cloudbrain.Description}}{{else}}--{{end}}
</div>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<div style="clear:both">
<table style="border:none" class="ui fixed small stackable table">
<thead>
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th>
</tr></thead>
<tbody>
{{range $m ,$n := $.datasetList}}
{{if eq $k $m}}
{{range $f ,$g := $n}}
<tr>
<td style="word-wrap: break-word;word-break: break-all;">
{{if eq .IsDelete true}}
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}})
{{else}}
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a>
{{end}}
</td>
</tr>
{{end}}
{{end}}
{{end}}
</tbody>
</table>
</div>
</div>

</div>
</div>
<div class="ui tab" data-tab="second{{$k}}">
<!--
<div class="file-info">
<a id="{{.VersionName}}-log-down"
class='{{if and (.CanModify) (eq .Status "KILLED" "FAILED" "START_FAILED" "STOPPED" "COMPLETED") }}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file?version_name={{.VersionName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
<div class="file-info full-log-dialog" data-version="{{.VersionName}}" data-log="{{$.i18n.Tr "repo.modelarts.log_file"}}"
data-exit="{{$.i18n.Tr "repo.modelarts.exit_full_screen"}}" style="margin-left: 1rem;color:#0366d6;cursor: pointer;"
data-log-down="{{$.i18n.Tr "repo.modelarts.download_log"}}" data-href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file?version_name={{.VersionName}}"
data-scroll-top="{{$.i18n.Tr "repo.log_scroll_start"}}" data-scroll-bottom="{{$.i18n.Tr "repo.log_scroll_end"}}">
<i class="ri-aspect-ratio-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.fullscreen_log_file"}}</span>
</div>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="{{$.i18n.Tr "repo.log_scroll_start"}}" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="{{$.i18n.Tr "repo.log_scroll_end"}}" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="end_line-max" value>
<input type="hidden" name="start_line-max" value>
<input type="hidden" name="start_line-max-copy" value>
<input type="hidden" name="start_line" value>
<input type="hidden" name="init_log" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>

</div>
-->
<div class="detail-log-content detail-log-content-{{.VersionName}}"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/log"
data-logdownloadurl="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }}
</div>
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;">
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div>
<div style="padding:0 50px 10px 30px;height:100%">
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;"
data-multiplenode=""
data-logapiurl="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/log"
data-logdownloadurl="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file"
data-workservernumber="{{.WorkServerNumber}}"
data-version="{{.VersionName}}">
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }}
</div>
</div>
</div>
</div>
<div class="ui tab" data-tab="four{{$k}}" style="position: relative;">
<i class="ri-refresh-line metric_chart"
style="position: absolute;right: 25%;color:#3291f8;z-index:99;cursor: pointer;"
data-version="{{.VersionName}}"></i>
<div class="ui inverted active dimmer" style="display: none;">
<div class="ui loader"></div>
</div>
<div id="metric-{{.VersionName}}" style="height: 260px;width: 870px;">
</div>
</div>
<div class="ui tab" data-tab="third{{$k}}">
<input type="hidden" name="model{{.VersionName}}" value="-1">
<input type="hidden" name="modelback{{.VersionName}}" value="-1">
<div style="display: flex;justify-content: space-between;">
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'>
<div class="active section">{{.VersionName}}</div>
<div class="divider"> / </div>
</div>
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info'
href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span>
</a>
</div>
<div id="dir_list{{.VersionName}}" style="max-height: 500px;overflow:auto;">

</div>
</div>

</div>
</div>
</div>
<!-- {{template "custom/max_log" .}} -->
{{end}} {{template "base/paginate" .}}
</div>
<!-- 确认模态框 -->
<div id="deletemodel">
<div class="ui basic modal">
<div class="ui icon header">
<i class="trash icon"></i> {{.i18n.Tr "cloudbrain.delete_task"}}
</div>

<div class="content">
<p>{{.i18n.Tr "cloudbrain.task_delete_confirm"}}</p>
</div>
<div class="actions">
<div class="ui red basic inverted cancel button">
<i class="remove icon"></i> {{.i18n.Tr "cloudbrain.operate_cancel"}}
</div>
<div class="ui green basic inverted ok button">
<i class="checkmark icon"></i> {{.i18n.Tr "cloudbrain.operate_confirm"}}
</div>
</div>
</div>
</div>
<!--
<div id="menuContent" class="menuContent" style="display:none; position: absolute;z-index:9999">
<ul id="treeDemo" class="ztree" style="margin-top:0; width: 83%; height: 100%;"></ul>
</div> -->
<!-- 创建模型 -->
<div id="newmodel">
<div class="ui modal second">
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);">
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4>
</div>
<div class="content content-padding">
<form id="formId" method="POST" class="ui form">
<div class="ui error message">
</div>
{{$.CsrfTokenHtml}}
<input type="hidden" name="trainTaskCreate" value="true">

<div class="two inline fields ">
<div class="required ten wide field">
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label>&nbsp;
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required>
<input class="width83" id="JobName" readonly required>

</div>
<div class="required six widde field">
<label>{{.i18n.Tr "repo.model.manage.version"}}</label>
<input class="width70" id="versionName" name="versionName" readonly required>
</div>
</div>

<div class="required inline field" id="modelname">
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label>
<input style="width: 45%;" id="name" name="name" required maxlength="25"
onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div>
<div class="required inline field" id="verionname">
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label>
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255">
</div>
<div class="unite min_title inline field required">
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label>
<input type="hidden" id="engine" name="engine" required>
<input style="width: 45%;" id="engine_name" name="engine_name" readonly required maxlength="255">
</div>
<div class="unite min_title inline fields required">
<div class="field required">
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label>
</div>
<div class="thirteen wide field" style="position:relative">
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile" >
<div id="menuContent" class="menuContent" style="display:none;">
<ul id="treeDemo" class="ztree"></ul>
</div>
</div>
</div>
<div class="inline field">
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label>
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255"
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'>
</div>
{{if eq $.Repository.IsPrivate false}}
<div class="inline fields">
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}}&nbsp;&nbsp;&nbsp;</label>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" checked="checked" value="false">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label>
</div>
</div>
<div class="field">
<div class="ui radio checkbox">
<input type="radio" name="isPrivate" value="true">
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label>
</div>
</div>
</div>
{{end}}
<div class="inline field">
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label>
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3"
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}'
onchange="this.value=this.value.substring(0, 255)"
onkeydown="this.value=this.value.substring(0, 255)"
onkeyup="this.value=this.value.substring(0, 256)"></textarea>
</div>

<div class="inline field" style="margin-left: 75px;">
<button onclick="createModel()" type="button" class="ui create_train_job green button"
style="position: absolute;">
{{.i18n.Tr "repo.model.manage.sava_model"}}
</button>
</div>
</form>
<div class="actions" style="display: inline-block;margin-left: 180px;">
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button>
</div>
</div>


</div>
</div>
{{template "custom/export_dataset" .}}
</div>
{{template "base/footer" .}}

<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script>
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script>
<script>
var setting = {
check: {
enable: true,
chkboxType: {"Y":"ps", "N":"ps"}
},
view: {
dblClickExpand: false
},
callback: {
beforeClick: beforeClick,
onCheck: onCheck
}
};

function beforeClick(treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo");
zTree.checkNode(treeNode, !treeNode.checked, null, true);
return false;
}
function onCheck(e, treeId, treeNode) {
var zTree = $.fn.zTree.getZTreeObj("treeDemo"),
nodes = zTree.getCheckedNodes(true),
v = "";
for (var i=0, l=nodes.length; i<l; i++) {
if(nodes[i].isParent){
continue;
}
var pathNodes = nodes[i].getPath();
var path ="";
for(var j=0;j<pathNodes.length;j++){
if(j ==0){
path += pathNodes[j].name;
}else{
path += "/" + pathNodes[j].name;
}
}
v += path + ";";
}
if (v.length > 0 ) v = v.substring(0, v.length-1);
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", v);
}
function showMenu() {
var cityObj = $("#modelSelectedFile");
var cityOffset = $("#modelSelectedFile").offset();
//$("#menuContent").css({left:cityOffset.left + "px", top:cityOffset.top + cityObj.outerHeight() + "px"}).slideDown("fast");
$("#menuContent").slideDown("fast");
$("body").bind("mousedown", onBodyDown);
}
function hideMenu() {
$("#menuContent").fadeOut("fast");
$("body").unbind("mousedown", onBodyDown);
}
function onBodyDown(event) {
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) {
hideMenu();
}
}
let dirKey="isOnlyDir--:&";
function loadSelectedModelFile(trainJob){
$('#choice_file').dropdown('clear')
$("#model-file").empty()
if(trainJob ==null || trainJob ==""){
console.log("trainJob is null");
}else{
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=1&VersionName=${trainJob.VersionName}`, (data) => {
const n_length = data.length
let file_html=''
let firstFileName =''
var zNodes=[];
var nodesMap={};
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
break;
}
if(parentNodeMap[fileSplits[j]] == null){
parentNodeMap[fileSplits[j]] = {};
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
for (let i=0;i<n_length;i++){
var parentNodeMap = nodesMap;
var fileSplits = data[i].FileName.split("/");
for(let j=0;j < fileSplits.length;j++){
if(fileSplits[j] == ""){
if(data[i].FileName[data[i].FileName.length -1] =="/"){
if(Object.keys(parentNodeMap).length ==0){
parentNodeMap[dirKey]="true";
}
}
break;
}
parentNodeMap = parentNodeMap[fileSplits[j]];
}
}
convertToNode(zNodes,nodesMap);
$.fn.zTree.init($("#treeDemo"), setting, zNodes);
})
}
}

function convertToNode(nodeList,nodesMap){
var keyList = Object.keys(nodesMap);
keyList.sort(function(a,b){
return a-b;
});
var isFirst = true;
for(var i=0; i<keyList.length;i++){
var node = {};
node["name"] = keyList[i];
nodeList.push(node);
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){
if(nodesMap[keyList[i]][dirKey] != null){
node["open"] = false;
node["isParent"] = true;
}else{
node["children"]=[];
if(isFirst){
node["open"] = true;
isFirst= false;
}
convertToNode(node["children"],nodesMap[keyList[i]]);
}
}
}
}

$('.menu .item').tab()
$(document).ready(function () {
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } });
});
$(document).ready(function () {
$('.secondary.menu .item').tab();
});

let userName
let repoPath
let jobID
let downlaodFlag = {{ $.canDownload }}
$(document).ready(function () {
let url = window.location.href;
let urlArr = url.split('/')
userName = urlArr.slice(-5)[0]
repoPath = urlArr.slice(-4)[0]
jobID = urlArr.slice(-1)[0]
})
function stopBubbling(e) {
e = window.event || e;
if (e.stopPropagation) {
e.stopPropagation(); //阻止事件 冒泡传播
} else {
e.cancelBubble = true; //ie兼容
}
}

function showcreate(obj) {
$('.ui.modal.second')
.modal({
centered: false,
onShow: function () {
$('input[name="version"]').addClass('model_disabled')
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled')
$('input[name="jobId"]').val(obj.JobID)
$('input[name="versionName"]').val(obj.VersionName).addClass('model_disabled')
if(isMindSporeEngine(obj)){
$('input[name="engine_name"]').val("MindSpore").addClass('model_disabled');
$('input[name="engine"]').val(2);
}else{
if(obj.EngineID ==121 || obj.EngineID ==38){
$('input[name="engine_name"]').val("TensorFlow").addClass('model_disabled');
$('input[name="engine"]').val(1);
}else{
$('input[name="engine_name"]').val("Other").addClass('model_disabled');
$('input[name="engine"]').val(3);
}
}
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" })
createModelName();
loadSelectedModelFile(obj);
},
onHide: function () {
$('.ui.dimmer').css({ "background-color": "" })
$('.ui.error.message').text()
$('.ui.error.message').css('display', 'none')
}
})
.modal('show')
}
function createModel() {
if(!$('input#modelSelectedFile').val()){
$('input#modelSelectedFile').parent().addClass('error')
return
}
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model`
let data = $("#formId").serialize()
var radio = document.getElementsByName("isPrivate");
if(radio == null || radio.length == 0){
data +="&isPrivate=true";
}
$("#mask").css({ "display": "block", "z-index": "9999" })
$.ajax({
url: url_href,
type: 'POST',
data: data,
success: function (res) {
const modelName = $('#formId #name').val();
$('input[name="engine_name"]').val("");
$('input[name="engine"]').val("");
$('input[name="jobId"]').val("");
$('input[name="label"]').val("");
$('input[name="description"]').val("");
var cityObj = $("#modelSelectedFile");
cityObj.attr("value", "");
document.getElementById("formId").reset();
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}`
$('.ui.modal.second').modal('hide')
},
error: function (xhr) {
// 隐藏 loading
// 只有请求不正常(状态码不为200)才会执行
$('.ui.error.message').text(xhr.responseText)
$('.ui.error.message').css('display', 'block')
},
complete: function (xhr) {
$("#mask").css({ "display": "none", "z-index": "1" })
}
})

}

function isMindSporeEngine(obj){
if(obj.EngineName != null && obj.EngineName.toLowerCase().startsWith("mindspore")){
return true;
}
if(obj.EngineID ==122 || obj.EngineID ==35 || obj.EngineID ==-1 || obj.EngineID ==37){
return true;
}
return false;
}

function createModelName() {
let repoName = location.pathname.split('/')[2]
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4)
$('#name').val(modelName)
$('#version').val("0.0.1")
}



</script>
{{ template "repo/cloudbrain/cloudbraindetail" .}}

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save