@@ -1,33 +1,3 @@ | |||
<!-- NOTE: If your issue is a security concern, please send an email to security@gitea.io instead of opening a public issue --> | |||
## 场景描述 | |||
<!-- | |||
1. Please speak English, this is the language all maintainers can speak and write. | |||
2. Please ask questions or configuration/deploy problems on our Discord | |||
server (https://discord.gg/gitea) or forum (https://discourse.gitea.io). | |||
3. Please take a moment to check that your issue doesn't already exist. | |||
4. Please give all relevant information below for bug reports, because | |||
incomplete details will be handled as an invalid report. | |||
--> | |||
- Gitea version (or commit ref): | |||
- Git version: | |||
- Operating system: | |||
- Database (use `[x]`): | |||
- [ ] PostgreSQL | |||
- [ ] MySQL | |||
- [ ] MSSQL | |||
- [ ] SQLite | |||
- Can you reproduce the bug at https://try.gitea.io: | |||
- [ ] Yes (provide example URL) | |||
- [ ] No | |||
- [ ] Not relevant | |||
- Log gist: | |||
## Description | |||
... | |||
## Screenshots | |||
<!-- **If this issue involves the Web Interface, please include a screenshot** --> | |||
## 预期效果 |
@@ -1,7 +1,14 @@ | |||
package entity | |||
import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/storage" | |||
"encoding/json" | |||
"fmt" | |||
"io" | |||
"reflect" | |||
"strings" | |||
"sync" | |||
"code.gitea.io/gitea/models" | |||
@@ -11,7 +18,7 @@ import ( | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
//todo 暂时保留之前各种云脑属性的定义 | |||
// todo 暂时保留之前各种云脑属性的定义 | |||
type CreateReq struct { | |||
JobType models.JobType `json:"job_type" binding:"Required"` | |||
DisplayJobName string `json:"display_job_name" binding:"Required"` | |||
@@ -32,8 +39,12 @@ type CreateReq struct { | |||
Description string `json:"description"` | |||
LabelName string `json:"label_names"` | |||
DatasetUUIDStr string `json:"dataset_uuid_str"` | |||
Params string `json:"run_para_list"` | |||
Params string `json:"params"` | |||
BootFile string `json:"boot_file"` | |||
PoolId string `json:"pool_id"` | |||
IsContinueRequest bool `json:"is_continue"` | |||
SourceCloudbrainId int64 `json:"source_cloudbrain_id"` | |||
AppName string `json:"app_name"` | |||
ParamArray models.Parameters | |||
ComputeSource *models.ComputeSource | |||
ReqCommitID string | |||
@@ -45,16 +56,47 @@ type CreateReq struct { | |||
} | |||
type CreationResponse struct { | |||
Error error | |||
JobID string | |||
Status string //todo 考虑统一状态 | |||
CreateTime timeutil.TimeStamp | |||
Error error | |||
JobID string | |||
Status string //todo 考虑统一状态 | |||
CreateTime timeutil.TimeStamp | |||
VersionID int64 | |||
VersionName string | |||
} | |||
type JobIdAndVersionId struct { | |||
JobID string | |||
VersionID int64 | |||
} | |||
type QueryAITaskRes struct { | |||
Task *AITaskDetailInfo `json:"task"` | |||
EarlyVersionList []*AITaskDetailInfo `json:"early_version_list"` | |||
CanCreateVersion bool `json:"can_create_version"` | |||
CanDownload bool `json:"can_download"` | |||
} | |||
func (r *QueryAITaskRes) TryToRemoveDatasetAndModelInfo(currentUser *models.User) { | |||
if r.Task != nil { | |||
r.Task.TryToRemoveDatasets(currentUser) | |||
r.Task.TryToRemovePretrainModelList(currentUser) | |||
} | |||
if r.EarlyVersionList != nil { | |||
for _, t := range r.EarlyVersionList { | |||
t.TryToRemoveDatasets(currentUser) | |||
t.TryToRemovePretrainModelList(currentUser) | |||
} | |||
} | |||
} | |||
func (r *QueryAITaskRes) Tr(language string) { | |||
if r.Task != nil { | |||
r.Task.Tr(language) | |||
} | |||
if r.EarlyVersionList != nil { | |||
for _, t := range r.EarlyVersionList { | |||
t.Tr(language) | |||
} | |||
} | |||
} | |||
type AITaskDetailInfo struct { | |||
@@ -79,11 +121,13 @@ type AITaskDetailInfo struct { | |||
CodePath string `json:"code_path"` | |||
DatasetPath string `json:"dataset_path"` | |||
PretrainModelPath string `json:"pretrain_model_path"` | |||
PretrainModelUrl string `json:"pretrain_model_url"` | |||
OutputPath string `json:"output_path"` | |||
CodeUrl string `json:"code_url"` | |||
PretrainModelName string `json:"pretrain_model_name"` | |||
PretrainModelVersion string `json:"pretrain_model_version"` | |||
PretrainCkptName string `json:"pretrain_model_ckpt_name"` | |||
PretrainModelId string `json:"pretrain_model_id"` | |||
StartTime timeutil.TimeStamp `json:"start_time"` | |||
EndTime timeutil.TimeStamp `json:"end_time"` | |||
Description string `json:"description"` | |||
@@ -95,17 +139,23 @@ type AITaskDetailInfo struct { | |||
CreatorName string `json:"creator_name"` | |||
EngineName string `json:"engine_name"` | |||
FailedReason string `json:"failed_reason"` | |||
UserId int64 `json:"-"` | |||
AppName string `json:"app_name"` | |||
} | |||
func (a *AITaskDetailInfo) Tr(language string) { | |||
a.AICenter = getAiCenterShow(a.AICenter, language) | |||
} | |||
func (a *AITaskDetailInfo) RemoveDatasets() { | |||
a.DatasetList = []*models.DatasetDownload{} | |||
func (a *AITaskDetailInfo) TryToRemoveDatasets(currentUser *models.User) { | |||
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId) { | |||
a.DatasetList = []*models.DatasetDownload{} | |||
} | |||
} | |||
func (a *AITaskDetailInfo) RemovePretrainModelList() { | |||
a.PretrainModelList = []*models.ModelDownload{} | |||
func (a *AITaskDetailInfo) TryToRemovePretrainModelList(currentUser *models.User) { | |||
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId) { | |||
a.PretrainModelList = []*models.ModelDownload{} | |||
} | |||
} | |||
func getAiCenterShow(aiCenter string, language string) string { | |||
@@ -158,6 +208,8 @@ type AITaskBriefInfo struct { | |||
ComputeSource string `json:"compute_source"` | |||
AICenter string `json:"ai_center"` | |||
IsFileNotebook bool `json:"is_file_notebook"` | |||
IsFineTuneTask bool `json:"is_fine_tune_task"` | |||
APPName string `json:"app_name"` | |||
} | |||
func (a *AITaskBriefInfo) Tr(language string) { | |||
@@ -170,6 +222,7 @@ type AITaskListRes struct { | |||
PageSize int `json:"page_size"` | |||
Page int `json:"page"` | |||
CanCreateTask bool `json:"can_create_task"` | |||
IsRepoEmpty bool `json:"is_repo_empty"` | |||
} | |||
type AITaskInfo4List struct { | |||
Task *AITaskBriefInfo `json:"task"` | |||
@@ -179,11 +232,6 @@ type AITaskInfo4List struct { | |||
} | |||
func ConvertCloudbrainToAITaskBriefInfo(task *models.Cloudbrain) *AITaskBriefInfo { | |||
computeSource := "" | |||
c := models.GetComputeSourceInstance(task.ComputeResource) | |||
if c != nil { | |||
computeSource = c.Name | |||
} | |||
return &AITaskBriefInfo{ | |||
ID: task.ID, | |||
JobType: task.JobType, | |||
@@ -192,14 +240,225 @@ func ConvertCloudbrainToAITaskBriefInfo(task *models.Cloudbrain) *AITaskBriefInf | |||
CreatedUnix: task.CreatedUnix, | |||
FormattedDuration: task.TrainJobDuration, | |||
Cluster: GetClusterTypeFromCloudbrainType(task.Type).GetParentCluster(), | |||
ComputeSource: computeSource, | |||
ComputeSource: task.GetStandardComputeSource(), | |||
StartTime: task.StartTime, | |||
EndTime: task.EndTime, | |||
AICenter: task.AiCenter, | |||
IsFileNotebook: task.IsFileNoteBookTask(), | |||
IsFineTuneTask: task.FineTune, | |||
APPName: task.AppName, | |||
} | |||
} | |||
type NotebookDataset struct { | |||
DatasetUrl string `json:"dataset_url"` | |||
} | |||
type QueryLogOpts struct { | |||
CloudbrainId int64 | |||
BaseLine int64 | |||
Lines int64 | |||
Order Direction | |||
UserId int64 | |||
NodeId int | |||
LogFileName string | |||
} | |||
type GetLogDownloadInfoReq struct { | |||
CloudbrainId int64 | |||
NodeId int | |||
LogFileName string | |||
} | |||
type GetAllOutputReq struct { | |||
CloudbrainId int64 | |||
Suffix []string | |||
} | |||
type GetOutputDownloadInfoReq struct { | |||
CloudbrainId int64 | |||
FileName string | |||
ParentDir string | |||
} | |||
type Direction string | |||
const ( | |||
UP Direction = "up" | |||
DOWN Direction = "down" | |||
) | |||
func (o Direction) Reverse() Direction { | |||
switch o { | |||
case DOWN: | |||
return UP | |||
case UP: | |||
return DOWN | |||
} | |||
return "" | |||
} | |||
type FileReader struct { | |||
Reader io.ReadCloser | |||
Name string | |||
} | |||
type GetTaskListReq struct { | |||
models.ListOptions | |||
ComputeSource *models.ComputeSource | |||
JobTypes []string | |||
RepoID int64 | |||
Operator *models.User | |||
IsRepoOwner bool | |||
} | |||
type AITaskBaseConfig struct { | |||
ContainerSteps map[ContainerDataType]*ContainerBuildOpts `json:"container_configs"` | |||
ActionType models.ActionType `json:"action_type"` | |||
IsActionUseJobId bool `json:"is_action_use_job_id"` | |||
DatasetsLimitSizeGB int | |||
DatasetsMaxNum int | |||
} | |||
func GetAITaskConfigFromCloudbrainConfig(config *models.CloudbrainConfig) *AITaskBaseConfig { | |||
if config == nil { | |||
return nil | |||
} | |||
s := config.ConfigurationSnapshot | |||
c := &AITaskBaseConfig{} | |||
err := json.Unmarshal([]byte(s), c) | |||
if err != nil { | |||
log.Error("GetAITaskConfigFromCloudbrain err,config=%+v err=&v", config, err) | |||
return nil | |||
} | |||
return c | |||
} | |||
type AITaskDetailConfigInfo struct { | |||
BaseConfig *AITaskBaseConfig | |||
OutputObjectPrefix string | |||
OutputStorageType StorageType | |||
LogObjectPrefix string | |||
LogStorageType StorageType | |||
} | |||
func BuildAITaskDetailConfigInfo(config *models.CloudbrainConfig) *AITaskDetailConfigInfo { | |||
c := &AITaskBaseConfig{} | |||
json.Unmarshal([]byte(config.ConfigurationSnapshot), c) | |||
return &AITaskDetailConfigInfo{ | |||
BaseConfig: c, | |||
OutputObjectPrefix: config.OutputObjectPrefix, | |||
OutputStorageType: StorageType(config.OutputStorageType), | |||
LogObjectPrefix: config.LogObjectPrefix, | |||
LogStorageType: StorageType(config.LogStorageType), | |||
} | |||
} | |||
type AITaskConfigKey struct { | |||
ComputeSource string | |||
IsFileNoteBookRequest bool | |||
} | |||
func (opts AITaskConfigKey) GetKey() string { | |||
v := reflect.ValueOf(opts) | |||
t := v.Type() | |||
b := strings.Builder{} | |||
for i := 0; i < v.NumField(); i++ { | |||
field := v.Field(i) | |||
fieldName := t.Field(i).Name | |||
fieldValue := field.Interface() | |||
if !field.IsZero() { | |||
b.WriteString(fmt.Sprintf("%s:%v;", fieldName, fieldValue)) | |||
} | |||
} | |||
return b.String() | |||
} | |||
func (c *AITaskBaseConfig) GetContainerConfig(containerDataType ContainerDataType) *ContainerBuildOpts { | |||
containerConfigs := c.ContainerSteps | |||
if containerConfigs != nil { | |||
return containerConfigs[containerDataType] | |||
} | |||
return nil | |||
} | |||
func (c *AITaskBaseConfig) GetContainerPath(containerDataType ContainerDataType) string { | |||
config := c.GetContainerConfig(containerDataType) | |||
if config == nil { | |||
return "" | |||
} | |||
return config.ContainerPath | |||
} | |||
type AITaskConfigMap struct { | |||
mu sync.RWMutex | |||
ConfigMap map[string]*AITaskBaseConfig | |||
} | |||
func (h *AITaskConfigMap) Add(opts AITaskConfigKey, config *AITaskBaseConfig) *AITaskConfigMap { | |||
h.mu.Lock() | |||
defer h.mu.Unlock() | |||
if h.ConfigMap == nil { | |||
h.ConfigMap = make(map[string]*AITaskBaseConfig, 0) | |||
} | |||
h.ConfigMap[opts.GetKey()] = config | |||
return h | |||
} | |||
func (h *AITaskConfigMap) Default(config *AITaskBaseConfig) *AITaskConfigMap { | |||
h.mu.Lock() | |||
defer h.mu.Unlock() | |||
if h.ConfigMap == nil { | |||
h.ConfigMap = make(map[string]*AITaskBaseConfig, 0) | |||
} | |||
h.ConfigMap[AITaskConfigKey{}.GetKey()] = config | |||
return h | |||
} | |||
func (h AITaskConfigMap) Get(opts AITaskConfigKey) *AITaskBaseConfig { | |||
h.mu.RLock() | |||
defer h.mu.RUnlock() | |||
if h.ConfigMap == nil { | |||
return nil | |||
} | |||
key := opts.GetKey() | |||
if _, isOk := h.ConfigMap[key]; isOk { | |||
return h.ConfigMap[key] | |||
} | |||
return nil | |||
} | |||
func (h AITaskConfigMap) IsEmpty() bool { | |||
return h.ConfigMap == nil || len(h.ConfigMap) == 0 | |||
} | |||
type AITaskOutput struct { | |||
Status models.ModelMigrateStatus `json:"status"` | |||
Path string `json:"path"` | |||
FileList []storage.FileInfo `json:"file_list"` | |||
IsTaskTerminal bool `json:"is_task_terminal"` | |||
CanReschedule bool `json:"can_reschedule"` | |||
CanDownload bool `json:"can_download"` | |||
} | |||
type AllAITaskOutput struct { | |||
FileList []storage.FileInfo `json:"file_list"` | |||
} | |||
type GetResourceUsageOpts struct { | |||
CloudbrainId int64 | |||
NodeId int | |||
LogFileName string | |||
} | |||
type AITaskNodeInfo struct { | |||
ID int `json:"id"` | |||
LogFileName string `json:"log_file_name"` | |||
} | |||
type StorageObjectInfo struct { | |||
ObjectKey string | |||
StorageType StorageType | |||
Bucket string | |||
} |
@@ -1,35 +0,0 @@ | |||
package entity | |||
type AITaskConfig struct { | |||
ContainerSteps map[ContainerDataType]*ContainerBuildOpts `json:"container_configs"` | |||
DatasetMaxSize int | |||
} | |||
type ContainerConfig struct { | |||
Enable bool | |||
ContainerPath string | |||
ReadOnly bool | |||
AcceptStorageType []StorageType | |||
} | |||
type GetAITaskConfigOpts struct { | |||
ComputeSource string | |||
IsFileNoteBookRequest bool | |||
} | |||
func (c *AITaskConfig) GetContainerConfig(containerDataType ContainerDataType) *ContainerBuildOpts { | |||
containerConfigs := c.ContainerSteps | |||
if containerConfigs != nil { | |||
return containerConfigs[containerDataType] | |||
} | |||
return nil | |||
} | |||
func (c *AITaskConfig) GetContainerPath(containerDataType ContainerDataType) string { | |||
config := c.GetContainerConfig(containerDataType) | |||
if config == nil { | |||
return "" | |||
} | |||
return config.ContainerPath | |||
} |
@@ -1,12 +0,0 @@ | |||
package entity | |||
import "code.gitea.io/gitea/models" | |||
type GetTaskListReq struct { | |||
models.ListOptions | |||
ComputeSource *models.ComputeSource | |||
JobTypes []string | |||
RepoID int64 | |||
Operator *models.User | |||
IsRepoOwner bool | |||
} |
@@ -1,14 +1,14 @@ | |||
package entity | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"strconv" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"encoding/json" | |||
"fmt" | |||
"strconv" | |||
) | |||
type CreateNoteBookTaskRequest struct { | |||
@@ -54,8 +54,11 @@ type RestartNoteBookTaskResponse struct { | |||
} | |||
type CreateTrainTaskRequest struct { | |||
Name string | |||
Tasks []TrainTask | |||
Name string | |||
DisplayJobName string | |||
Description string | |||
TaskConfig *AITaskBaseConfig | |||
Tasks []TrainTask | |||
} | |||
type QueryTaskResponse struct { | |||
@@ -71,6 +74,7 @@ type QueryTaskResponse struct { | |||
DataUrl string `json:"data_url"` | |||
ContainerIP string `json:"container_ip"` | |||
ContainerID string `json:"container_id"` | |||
VersionId int64 `json:"version_id"` | |||
} | |||
func ConvertGrampusNotebookResponse(job models.GrampusNotebookInfo) *QueryTaskResponse { | |||
@@ -195,23 +199,33 @@ func ConvertCloudbrainOneNotebookResponse(input map[string]interface{}) (*QueryT | |||
} | |||
type ClusterLog struct { | |||
Content string `json:"content"` | |||
CanLogDownload bool `json:"can_log_download"` | |||
//云脑二返回的startline和baseline在前端会丢失精度。因此改为string类型 | |||
StartLine string `json:"start_line"` | |||
EndLine string `json:"end_line"` | |||
Lines int64 `json:"lines"` | |||
} | |||
type TrainTask struct { | |||
Command string `json:"command"` | |||
Name string `json:"name"` | |||
ImageId string `json:"imageId"` | |||
ImageUrl string `json:"imageUrl"` | |||
ResourceSpecId string `json:"resourceSpecId"` | |||
CenterID []string `json:"centerID"` | |||
ReplicaNum int `json:"replicaNum"` | |||
Datasets []ContainerData `json:"datasets"` | |||
Models []ContainerData `json:"models"` | |||
Code ContainerData `json:"code"` | |||
BootFile string `json:"bootFile"` | |||
OutPut ContainerData `json:"output"` | |||
Params models.Parameters | |||
Spec *models.Specification | |||
Command string `json:"command"` | |||
Name string `json:"name"` | |||
ImageId string `json:"imageId"` | |||
ImageUrl string `json:"imageUrl"` | |||
ResourceSpecId string `json:"resourceSpecId"` | |||
CenterID []string `json:"centerID"` | |||
ReplicaNum int `json:"replicaNum"` | |||
Datasets []ContainerData `json:"datasets"` | |||
PreTrainModel []ContainerData `json:"models"` | |||
Code []ContainerData `json:"code"` | |||
BootFile string `json:"bootFile"` | |||
OutPut []ContainerData `json:"output"` | |||
LogPath []ContainerData `json:"logPath"` | |||
PoolId string `json:"poolId"` | |||
Params models.Parameters | |||
Spec *models.Specification | |||
RepoName string | |||
WorkServerNumber int | |||
} | |||
type CreateTrainTaskResponse struct { | |||
@@ -225,6 +239,8 @@ type CreateTrainTaskResponse struct { | |||
Name string `json:"name"` | |||
Status string `json:"status"` | |||
UserID string `json:"userId"` | |||
VersionID int64 `json:"versionID"` | |||
VersionName string `json:"versionName"` //当前版本 | |||
} | |||
type ClusterType string | |||
@@ -270,3 +286,69 @@ func GetClusterTypeFromCloudbrainType(t int) ClusterType { | |||
} | |||
return "" | |||
} | |||
type ClusterLogOpts struct { | |||
JobId string | |||
BaseLine int64 | |||
Lines int64 | |||
Direction Direction | |||
ObjectKeyPrefix string | |||
StorageType StorageType | |||
VersionID int64 | |||
NodeId int | |||
LogFileName string | |||
WorkServerNum int | |||
} | |||
func (opts ClusterLogOpts) IsBottomRequest() bool { | |||
return opts.BaseLine == 0 && opts.Direction == UP | |||
} | |||
func (opts ClusterLogOpts) IsHeadRequest() bool { | |||
return opts.BaseLine == 0 && opts.Direction == DOWN | |||
} | |||
type ClusterLogDownloadInfoOpts struct { | |||
JobId string | |||
ObjectKeyPrefix string | |||
StorageType StorageType | |||
NodeId int | |||
LogFileName string | |||
WorkServerNum int | |||
JobName string | |||
DisplayJobName string | |||
} | |||
type ClusterOutputDownloadInfoOpts struct { | |||
JobId string | |||
Path string | |||
JobName string | |||
StorageType StorageType | |||
} | |||
type ClusterNodeInfoOpts struct { | |||
JobId string | |||
WorkServerNum int | |||
VersionId int64 | |||
} | |||
type ClusterResourceUsageOpts struct { | |||
JobId string | |||
NodeId int | |||
LogFileName string | |||
VersionID int64 | |||
StartTime int64 | |||
EndTime int64 | |||
ComputeSource string | |||
WorkServerNumber int | |||
} | |||
type ClusterOutputOpts struct { | |||
JobId string | |||
ObjectKeyPrefix string | |||
StorageType StorageType | |||
ParentDir string | |||
} | |||
type ClusterAITaskOutput struct { | |||
Status models.ModelMigrateStatus | |||
Path string | |||
FileList []storage.FileInfo | |||
} |
@@ -0,0 +1,56 @@ | |||
package entity | |||
import "strings" | |||
type Command struct { | |||
CommandStr string | |||
} | |||
func NewCommand(s ...string) *Command { | |||
var builder strings.Builder | |||
for i := 0; i < len(s); i++ { | |||
builder.WriteString(s[i] + " ") | |||
} | |||
r := strings.TrimSuffix(builder.String(), " ") | |||
return &Command{ | |||
CommandStr: r, | |||
} | |||
} | |||
func (c *Command) ToString() string { | |||
return c.CommandStr | |||
} | |||
type CommandBuilder struct { | |||
Commands []*Command | |||
} | |||
func (b *CommandBuilder) ToString() string { | |||
var builder strings.Builder | |||
for i := 0; i < len(b.Commands); i++ { | |||
builder.WriteString(b.Commands[i].ToString() + ";") | |||
} | |||
return builder.String() | |||
} | |||
func (b *CommandBuilder) Next(c *Command) *CommandBuilder { | |||
if b.Commands == nil { | |||
b.Commands = make([]*Command, 0) | |||
} | |||
b.Commands = append(b.Commands, c) | |||
return b | |||
} | |||
func (b *CommandBuilder) Add(another *CommandBuilder) *CommandBuilder { | |||
if b.Commands == nil { | |||
b.Commands = make([]*Command, 0) | |||
} | |||
if another == nil { | |||
return b | |||
} | |||
if another.Commands == nil { | |||
return b | |||
} | |||
b.Commands = append(b.Commands, another.Commands...) | |||
return b | |||
} |
@@ -15,8 +15,11 @@ type ContainerData struct { | |||
ContainerPath string `json:"containerPath"` | |||
RealPath string `json:"realPath"` | |||
ReadOnly bool `json:"readOnly"` | |||
Size int64 `json:"size"` | |||
IsDir bool `json:"isDir"` | |||
GetBackEndpoint string `json:"getBackEndpoint"` | |||
S3DownloadUrl string `json:"s3DownloadUrl"` | |||
Size int64 `json:"size"` | |||
StorageType StorageType | |||
} | |||
type ContainerDataType string | |||
@@ -26,15 +29,20 @@ const ( | |||
ContainerDataset ContainerDataType = "dataset" | |||
ContainerPreTrainModel ContainerDataType = "pre_train_model" | |||
ContainerOutPutPath ContainerDataType = "output" | |||
ContainerLogPath ContainerDataType = "log" | |||
ContainerFileNoteBookCode ContainerDataType = "file_note_book_code" | |||
) | |||
type ContainerBuildOpts struct { | |||
Disable bool | |||
ContainerPath string | |||
ReadOnly bool | |||
AcceptStorageType []StorageType | |||
NotArchive bool | |||
Disable bool | |||
//容器内路径 | |||
ContainerPath string | |||
//在aiforge存储上基于云脑存储目录的相对路径 | |||
StorageRelativePath string | |||
ReadOnly bool | |||
AcceptStorageType []StorageType | |||
Uncompressed bool | |||
MKDIR bool | |||
} | |||
func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool { | |||
@@ -45,3 +53,9 @@ func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool { | |||
} | |||
return false | |||
} | |||
func (opts ContainerBuildOpts) GetLocalPath() string { | |||
if opts.StorageRelativePath != "" { | |||
return opts.StorageRelativePath | |||
} | |||
return opts.ContainerPath | |||
} |
@@ -18,6 +18,7 @@ type CreationRequiredInfo struct { | |||
PointAccount *PointAccountInfo `json:"point_account"` | |||
PaySwitch bool `json:"pay_switch"` | |||
Config AITaskCreationConfig `json:"config"` | |||
AllowedWorkerNum []int `json:"allowed_worker_num"` | |||
} | |||
type AITaskCreationConfig struct { | |||
@@ -0,0 +1,8 @@ | |||
package entity | |||
type FileType string | |||
const ( | |||
FileTypeTXT FileType = "txt" | |||
FileTypeZIP FileType = "zip" | |||
) |
@@ -0,0 +1,22 @@ | |||
package entity | |||
type FileDownloadInfo struct { | |||
Readers []FileReader | |||
ResultType FileType | |||
ResultFileName string | |||
DownloadUrl string | |||
} | |||
func (f *FileDownloadInfo) IsEmpty() bool { | |||
return (f.Readers == nil || len(f.Readers) == 0) && f.DownloadUrl == "" | |||
} | |||
func (f *FileDownloadInfo) Close() { | |||
if f.Readers != nil && len(f.Readers) > 0 { | |||
for _, r := range f.Readers { | |||
if r.Reader != nil { | |||
r.Reader.Close() | |||
} | |||
} | |||
} | |||
} |
@@ -0,0 +1,11 @@ | |||
package entity | |||
type ResourceUsage struct { | |||
Interval int `json:"interval"` | |||
MetricsInfo []MetricsInfo `json:"metrics_info"` | |||
} | |||
type MetricsInfo struct { | |||
Name string `json:"name"` | |||
Value []float32 `json:"value"` | |||
} |
@@ -4,7 +4,9 @@ import ( | |||
"crypto/tls" | |||
"encoding/json" | |||
"fmt" | |||
"math" | |||
"net/http" | |||
"strconv" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
@@ -319,15 +321,20 @@ sendjob: | |||
return &result, nil | |||
} | |||
func GetTrainJobLog(jobID string) (string, error) { | |||
func GetTrainJobLog(jobID string, nodeId ...int) (string, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var logContent string | |||
url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log" | |||
if len(nodeId) > 0 { | |||
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log/node/" + strconv.Itoa(nodeId[0]) | |||
} | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&logContent). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log") | |||
Get(url) | |||
if err != nil { | |||
return logContent, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||
@@ -348,13 +355,24 @@ func GetTrainJobLog(jobID string) (string, error) { | |||
return logContent, nil | |||
} | |||
func GetGrampusMetrics(jobID string) (models.NewModelArtsMetricStatisticResult, error) { | |||
func GetGrampusMetrics(jobID string, startTime int64, endTime int64, nodeId ...int) (models.NewModelArtsMetricStatisticResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NewModelArtsMetricStatisticResult | |||
url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics" | |||
if len(nodeId) > 0 { | |||
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics/node/" + strconv.Itoa(nodeId[0]) | |||
} | |||
if startTime > 0 { | |||
var step int64 = 60 | |||
size := int64(math.Ceil(float64(endTime-startTime)/float64(step))) + 1 | |||
url = url + "?startTime=" + strconv.FormatInt(startTime, 10) + "&step=" + strconv.FormatInt(step, 10) + "&size=" + strconv.FormatInt(size, 10) | |||
} | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics") | |||
Get(url) | |||
if err != nil { | |||
return result, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||
@@ -512,13 +530,14 @@ func GetTrainJobEvents(jobID string) (*models.GetGrampusJobEventsResponse, error | |||
retry := 0 | |||
sendjob: | |||
_, err := client.R(). | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/events") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobEvents: %v", err) | |||
} | |||
log.Info("res=%+v", res) | |||
if result.ErrorCode == errorIllegalToken && retry < 1 { | |||
retry++ | |||
@@ -73,6 +73,7 @@ const ( | |||
ActionCreateGrampusMLUTrainTask //44 | |||
ActionCreateGrampusGPUOnlineInferTask //45 | |||
ActionCreateGrampusDCUDebugTask //46 | |||
ActionCreateSuperComputeTask //47 | |||
) | |||
// Action represents user operation type and other information to | |||
@@ -425,7 +426,8 @@ func (a *Action) IsCloudbrainAction() bool { | |||
ActionCreateGrampusGCUTrainTask, | |||
ActionCreateGrampusGCUDebugTask, | |||
ActionCreateGrampusDCUDebugTask, | |||
ActionCreateGrampusMLUDebugTask: | |||
ActionCreateGrampusMLUDebugTask, | |||
ActionCreateSuperComputeTask: | |||
return true | |||
} | |||
return false | |||
@@ -291,10 +291,12 @@ func QueryModelById(id string) (*AiModelManage, error) { | |||
defer sess.Close() | |||
re := new(AiModelManage) | |||
isExist, err := sess.Table(new(AiModelManage)).ID(id).Get(re) | |||
if err == nil && isExist { | |||
return re, nil | |||
if err != nil { | |||
return nil, err | |||
} else if !isExist { | |||
return nil, ErrPretrainModelNotExist{} | |||
} | |||
return nil, err | |||
return re, nil | |||
} | |||
func DeleteModelConvertById(id string) error { | |||
@@ -48,6 +48,7 @@ const ( | |||
NPUResource = "NPU" | |||
GPUResource = "CPU/GPU" | |||
GCUResource = "GCU" | |||
CPUResource = "CPU" | |||
AllResource = "all" | |||
//notebook storage category | |||
@@ -78,6 +79,7 @@ const ( | |||
JobTypeTrain JobType = "TRAIN" | |||
JobTypeInference JobType = "INFERENCE" | |||
JobTypeOnlineInference JobType = "ONLINEINFERENCE" | |||
JobTypeSuperCompute JobType = "HPC" | |||
//notebook | |||
ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 | |||
@@ -158,14 +160,18 @@ const ( | |||
GCU = "GCU" | |||
MLU = "MLU" | |||
DCU = "DCU" | |||
CPU = "CPU" | |||
ProcessorTypeNPU = "npu.huawei.com/NPU" | |||
ProcessorTypeGPU = "nvidia.com/gpu" | |||
ProcessorTypeGCU = "enflame-tech.com/gcu" | |||
ProcessorTypeMLU = "cambricon.com/mlu" | |||
ProcessorTypeDCU = "ac.sugon.com/dcu" | |||
ProcessorTypeCPU = "hpc/cpu" | |||
) | |||
const CloudbrainTwoDefaultVersion = "/V0001" | |||
type ComputeSource struct { | |||
Name string | |||
CloudbrainFormat string | |||
@@ -188,12 +194,29 @@ func GetComputeSourceInstance(name string) *ComputeSource { | |||
return nil | |||
} | |||
func GetComputeSourceCloudbrainFormat(name string) string { | |||
c := GetComputeSourceInstance(name) | |||
if c == nil { | |||
return "" | |||
} | |||
return c.CloudbrainFormat | |||
} | |||
func GetComputeSourceStandardFormat(name string) string { | |||
c := GetComputeSourceInstance(name) | |||
if c == nil { | |||
return "" | |||
} | |||
return c.Name | |||
} | |||
var ComputeSourceMap = map[string]*ComputeSource{ | |||
GPU: {Name: GPU, CloudbrainFormat: GPUResource, FullName: ProcessorTypeGPU}, | |||
NPU: {Name: NPU, FullName: ProcessorTypeNPU}, | |||
GCU: {Name: GCU, FullName: ProcessorTypeGCU}, | |||
MLU: {Name: MLU, FullName: ProcessorTypeMLU}, | |||
DCU: {Name: DCU, FullName: ProcessorTypeDCU}, | |||
CPU: {Name: CPU, FullName: ProcessorTypeCPU}, | |||
} | |||
const ( | |||
@@ -244,8 +267,9 @@ type Cloudbrain struct { | |||
FailedReason string `xorm:"text"` | |||
TrainUrl string //输出模型的obs路径 | |||
RemoteCodeUrl string //分中心下载代码地址 | |||
BranchName string `xorm:"varchar(2550)"` //分支名称 | |||
Parameters string //传给modelarts的param参数 | |||
Parameters string `xorm:"varchar(2000)"` //传给modelarts的param参数 | |||
BootFile string `xorm:"varchar(2550)"` //启动文件 | |||
DataUrl string `xorm:"varchar(3500)"` //数据集的obs路径 | |||
LogUrl string //日志输出的obs路径 | |||
@@ -277,7 +301,9 @@ type Cloudbrain struct { | |||
FineTune bool `xorm:"DEFAULT false"` | |||
FineTuneModelType int | |||
FineTuneCategory int | |||
Spec *Specification `xorm:"-"` | |||
Spec *Specification `xorm:"-"` | |||
Config *CloudbrainConfig `xorm:"-"` | |||
AppName string //超算任务的应用类型 | |||
} | |||
type CloudbrainShow struct { | |||
@@ -325,6 +351,21 @@ func (task *Cloudbrain) ToShow() *CloudbrainShow { | |||
return c | |||
} | |||
func (task *Cloudbrain) GetStandardComputeSource() string { | |||
return GetComputeSourceStandardFormat(task.ComputeResource) | |||
} | |||
func (task *Cloudbrain) GetCloudbrainConfig() *CloudbrainConfig { | |||
if task.Config != nil { | |||
return task.Config | |||
} | |||
c, err := GetCloudbrainConfig(task.ID) | |||
if err != nil { | |||
return nil | |||
} | |||
task.Config = c | |||
return c | |||
} | |||
func (task *Cloudbrain) ComputeAndSetDuration() { | |||
var d int64 | |||
if task.StartTime == 0 { | |||
@@ -362,7 +403,7 @@ func (task *Cloudbrain) GetAiCenter() string { | |||
} | |||
//是否为在线notebook文件任务 | |||
// 是否为在线notebook文件任务 | |||
func (task *Cloudbrain) IsFileNoteBookTask() bool { | |||
return task.JobType == string(JobTypeDebug) && task.BootFile != "" | |||
} | |||
@@ -446,8 +487,8 @@ func (task *Cloudbrain) NeedActiveStop() bool { | |||
return task.IsCreating() || (task.IsPreparing() && int64(task.CreatedUnix) < time.Now().Add(-1*setting.PREPARING_MAX_WAIT_DURATION).Unix()) | |||
} | |||
//是否允许创建多版本 | |||
//目前只有启智NPU可以 | |||
// 是否允许创建多版本 | |||
// 目前只有启智NPU可以 | |||
func (task *Cloudbrain) IsAllowedToCreateMultipleVersions() bool { | |||
if task.Type == TypeCloudBrainTwo && task.ComputeResource == NPUResource && task.JobType != string(JobTypeDebug) { | |||
return true | |||
@@ -462,11 +503,7 @@ func (task *Cloudbrain) IsNewAITask() bool { | |||
continue | |||
} | |||
for _, s := range v { | |||
c := GetComputeSourceInstance(s) | |||
if c == nil { | |||
continue | |||
} | |||
if c.GetCloudbrainFormat() == task.ComputeResource { | |||
if s == task.GetStandardComputeSource() { | |||
return true | |||
} | |||
} | |||
@@ -1526,6 +1563,7 @@ type ModelUrls struct { | |||
} | |||
type DatasetDownload struct { | |||
UUID string `json:"uuid"` | |||
DatasetName string `json:"dataset_name"` | |||
DatasetDownloadLink string `json:"dataset_download_link"` | |||
RepositoryLink string `json:"repository_link"` | |||
@@ -1533,6 +1571,7 @@ type DatasetDownload struct { | |||
} | |||
type ModelDownload struct { | |||
ModelName string `json:"model_name"` | |||
Name string `json:"name"` | |||
DownloadLink string `json:"download_link"` | |||
RepositoryLink string `json:"repository_link"` | |||
@@ -1977,6 +2016,7 @@ type GrampusDataset struct { | |||
ContainerPath string `json:"containerPath"` | |||
ReadOnly bool `json:"readOnly"` | |||
GetBackEndpoint string `json:"getBackEndpoint"` | |||
Size int64 `json:"size"` | |||
} | |||
type CreateGrampusJobRequest struct { | |||
@@ -2328,6 +2368,13 @@ func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { | |||
return err | |||
} | |||
} | |||
if cloudbrain.Config != nil { | |||
cloudbrain.Config.CloudbrainID = cloudbrain.ID | |||
if _, err = session.Insert(cloudbrain.Config); err != nil { | |||
session.Rollback() | |||
return err | |||
} | |||
} | |||
session.Commit() | |||
go updateReferenceCount(cloudbrain) | |||
OperateRepoAITaskNum(cloudbrain.RepoID, 1) | |||
@@ -3142,6 +3189,22 @@ type DatasetInfo struct { | |||
FullName string | |||
Type int | |||
Size int64 | |||
DownloadUrl string | |||
} | |||
type DatasetInfo4AITask struct { | |||
Compressed DatasetBaseInfo | |||
Uncompressed DatasetBaseInfo | |||
Type int | |||
Size int64 | |||
} | |||
type DatasetBaseInfo struct { | |||
RealPath string | |||
ObjectKey string | |||
HttpDownloadUrl string | |||
S3DownloadUrl string | |||
Name string | |||
} | |||
func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { | |||
@@ -3178,25 +3241,31 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn | |||
} | |||
} | |||
var dataLocalPath string | |||
var downloadUrl string | |||
if len(grampusType) > 0 { | |||
if grampusType[0] == GPU { | |||
dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID | |||
} else if grampusType[0] == NPU { | |||
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" | |||
} else if grampusType[0] == GCU || grampusType[0] == MLU || grampusType[0] == DCU { | |||
} else { | |||
if attach.Type == TypeCloudBrainOne { | |||
dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID | |||
} else { | |||
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" | |||
} | |||
} | |||
} else { | |||
dataLocalPath = setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
AttachmentRelativePath(attach.UUID) + | |||
attach.UUID | |||
if attach.Type == TypeCloudBrainOne { | |||
dataLocalPath = setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
AttachmentRelativePath(attach.UUID) + | |||
attach.UUID | |||
} else { | |||
downloadUrl = "s3://" + setting.Bucket + "/" + setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/" | |||
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" | |||
} | |||
} | |||
datasetInfos[attach.UUID] = DatasetInfo{ | |||
@@ -3205,6 +3274,7 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn | |||
FullName: attach.Name, | |||
Type: attach.Type, | |||
Size: attach.Size, | |||
DownloadUrl: downloadUrl, | |||
} | |||
if i == 0 { | |||
datasetNames = attach.Name | |||
@@ -3216,6 +3286,81 @@ func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetIn | |||
return datasetInfos, datasetNames, nil | |||
} | |||
func GetDatasetInfo4AITask(uuidStr string) (map[string]DatasetInfo4AITask, error) { | |||
uuids := strings.Split(uuidStr, ";") | |||
attachments, err := GetAttachmentsByUUIDs(uuids) | |||
if err != nil { | |||
log.Error("GetAttachmentsByUUIDs failed: %v", err) | |||
return nil, err | |||
} | |||
attachMap := make(map[string]*Attachment, 0) | |||
attachNameMap := make(map[string]string, 0) | |||
for _, attach := range attachments { | |||
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz") | |||
if _, exits := attachNameMap[fileName]; exits { | |||
return nil, errors.New("the dataset name is same") | |||
} | |||
attachNameMap[fileName] = "" | |||
attachMap[attach.UUID] = attach | |||
} | |||
datasetInfos := make(map[string]DatasetInfo4AITask) | |||
for _, tmpUuid := range uuids { | |||
attach := attachMap[tmpUuid] | |||
if attach == nil { | |||
log.Error("GetAttachmentsByUUIDs failed: %v", err) | |||
return nil, err | |||
} | |||
var compressedRealPath, compressedObjectKey, compressedS3DownloadUrl string | |||
var uncompressedRealPath, uncompressedObjectKey, uncompressedS3DownloadUrl string | |||
fileName := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(attach.Name, ".zip"), ".tar.gz"), ".tgz") | |||
if attach.Type == TypeCloudBrainOne { | |||
uncompressedRealPath = setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
AttachmentRelativePath(attach.UUID) + | |||
attach.UUID | |||
uncompressedObjectKey = setting.Attachment.Minio.BasePath + | |||
AttachmentRelativePath(attach.UUID) + | |||
attach.UUID | |||
compressedRealPath = setting.Attachment.Minio.RealPath + | |||
setting.Attachment.Minio.Bucket + "/" + | |||
setting.Attachment.Minio.BasePath + | |||
path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + | |||
attach.UUID | |||
compressedObjectKey = setting.Attachment.Minio.BasePath + | |||
path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + | |||
attach.UUID | |||
} else { | |||
compressedObjectKey = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + attach.Name | |||
compressedS3DownloadUrl = "s3://" + setting.Bucket + "/" + compressedObjectKey | |||
uncompressedObjectKey = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + attach.UUID + "/" | |||
uncompressedS3DownloadUrl = "s3://" + setting.Bucket + "/" + uncompressedObjectKey | |||
} | |||
datasetInfos[attach.UUID] = DatasetInfo4AITask{ | |||
Compressed: DatasetBaseInfo{ | |||
RealPath: compressedRealPath, | |||
ObjectKey: compressedObjectKey, | |||
S3DownloadUrl: compressedS3DownloadUrl, | |||
Name: attach.Name, | |||
}, | |||
Uncompressed: DatasetBaseInfo{ | |||
RealPath: uncompressedRealPath, | |||
ObjectKey: uncompressedObjectKey, | |||
S3DownloadUrl: uncompressedS3DownloadUrl, | |||
Name: fileName, | |||
}, | |||
Type: attach.Type, | |||
Size: attach.Size, | |||
} | |||
} | |||
return datasetInfos, nil | |||
} | |||
var ( | |||
SpecsMapInitFlag = false | |||
CloudbrainDebugResourceSpecsMap map[int]*ResourceSpec | |||
@@ -0,0 +1,35 @@ | |||
package models | |||
import ( | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
type CloudbrainConfig struct { | |||
CloudbrainID int64 `xorm:"pk"` | |||
OutputObjectPrefix string | |||
OutputStorageType string | |||
OutputBucket string | |||
OutputEndpoint string | |||
LogObjectPrefix string | |||
LogStorageType string | |||
LogBucket string | |||
LogEndpoint string | |||
ConfigurationSnapshot string `xorm:"text"` | |||
ContainerDataSnapshot string `xorm:"text"` | |||
CreatedTime timeutil.TimeStamp `xorm:"created"` | |||
UpdatedTime timeutil.TimeStamp `xorm:"updated"` | |||
} | |||
func GetCloudbrainConfig(cloudbrainId int64) (*CloudbrainConfig, error) { | |||
r := &CloudbrainConfig{} | |||
if has, err := x.Where("cloudbrain_id = ?", cloudbrainId).Get(r); err != nil { | |||
return nil, err | |||
} else if !has { | |||
return nil, ErrRecordNotExist{} | |||
} | |||
return r, nil | |||
} | |||
func InsertCloudbrainConfig(config *CloudbrainConfig) (int64, error) { | |||
return x.Insert(config) | |||
} |
@@ -2070,3 +2070,15 @@ type ErrModelartsDeployNotExist struct { | |||
func (err ErrModelartsDeployNotExist) Error() string { | |||
return fmt.Sprintf("Deployment %s does not exist", err.ID) | |||
} | |||
type ErrPretrainModelNotExist struct { | |||
} | |||
func IsErrPretrainModelNotExist(err error) bool { | |||
_, ok := err.(ErrPretrainModelNotExist) | |||
return ok | |||
} | |||
func (err ErrPretrainModelNotExist) Error() string { | |||
return fmt.Sprintf("pretrain model is not exists") | |||
} |
@@ -176,6 +176,7 @@ func init() { | |||
new(IPLocation), | |||
new(ModelartsDeploy), | |||
new(ModelartsDeployQueue), | |||
new(CloudbrainConfig), | |||
) | |||
tablesStatistic = append(tablesStatistic, | |||
@@ -13,19 +13,20 @@ const ( | |||
type TaskType string | |||
const ( | |||
TaskCreatePublicRepo TaskType = "CreatePublicRepo" | |||
TaskCreateIssue TaskType = "CreateIssue" | |||
TaskCreatePullRequest TaskType = "CreatePullRequest" | |||
TaskCommentIssue TaskType = "CommentIssue" | |||
TaskUploadAttachment TaskType = "UploadAttachment" | |||
TaskCreateNewModelTask TaskType = "CreateNewModelTask" | |||
TaskBindWechat TaskType = "BindWechat" | |||
TaskCreateCloudbrainTask TaskType = "CreateCloudbrainTask" | |||
TaskDatasetRecommended TaskType = "DatasetRecommended" | |||
TaskCreateImage TaskType = "CreateImage" | |||
TaskImageRecommend TaskType = "ImageRecommend" | |||
TaskChangeUserAvatar TaskType = "ChangeUserAvatar" | |||
TaskPushCommits TaskType = "PushCommits" | |||
TaskCreatePublicRepo TaskType = "CreatePublicRepo" | |||
TaskCreateIssue TaskType = "CreateIssue" | |||
TaskCreatePullRequest TaskType = "CreatePullRequest" | |||
TaskCommentIssue TaskType = "CommentIssue" | |||
TaskUploadAttachment TaskType = "UploadAttachment" | |||
TaskCreateNewModelTask TaskType = "CreateNewModelTask" | |||
TaskBindWechat TaskType = "BindWechat" | |||
TaskCreateCloudbrainTask TaskType = "CreateCloudbrainTask" | |||
TaskCreateSuperComputeTask TaskType = "CreateSuperComputeTask" | |||
TaskDatasetRecommended TaskType = "DatasetRecommended" | |||
TaskCreateImage TaskType = "CreateImage" | |||
TaskImageRecommend TaskType = "ImageRecommend" | |||
TaskChangeUserAvatar TaskType = "ChangeUserAvatar" | |||
TaskPushCommits TaskType = "PushCommits" | |||
) | |||
func GetTaskTypeFromAction(a ActionType) TaskType { | |||
@@ -43,6 +44,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { | |||
ActionCreateGrampusGCUTrainTask, | |||
ActionCreateGrampusMLUDebugTask, | |||
ActionCreateGrampusDCUDebugTask, | |||
ActionCreateSuperComputeTask, | |||
ActionCreateGrampusGPUOnlineInferTask, | |||
ActionCreateGrampusGPUTrainTask: | |||
return TaskCreateCloudbrainTask | |||
@@ -77,8 +79,8 @@ func GetTaskTypeFromAction(a ActionType) TaskType { | |||
return "" | |||
} | |||
//PointTaskConfig Only add and delete are allowed, edit is not allowed | |||
//so if you want to edit config for some task code,please delete first and add new one | |||
// PointTaskConfig Only add and delete are allowed, edit is not allowed | |||
// so if you want to edit config for some task code,please delete first and add new one | |||
type TaskConfig struct { | |||
ID int64 `xorm:"pk autoincr"` | |||
TaskCode string `xorm:"NOT NULL"` | |||
@@ -101,6 +101,7 @@ func ToSpecification(s *models.Specification) *api.SpecificationShow { | |||
ShareMemGiB: s.ShareMemGiB, | |||
ComputeResource: s.ComputeResource, | |||
UnitPrice: s.UnitPrice, | |||
SourceSpecId: s.SourceSpecId, | |||
} | |||
} | |||
@@ -291,7 +291,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str | |||
var jobResult *models.CreateTrainJobResult | |||
var createErr error | |||
if req.EngineID < 0 { | |||
jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||
jobResult, createErr = CreateTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||
JobName: req.JobName, | |||
Description: req.Description, | |||
Config: models.UserImageConfig{ | |||
@@ -315,7 +315,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str | |||
}, | |||
}) | |||
} else { | |||
jobResult, createErr = createTrainJob(models.CreateTrainJobParams{ | |||
jobResult, createErr = CreateTrainJob(models.CreateTrainJobParams{ | |||
JobName: req.JobName, | |||
Description: req.Description, | |||
Config: models.Config{ | |||
@@ -412,7 +412,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str | |||
func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) { | |||
return createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||
return CreateTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||
JobName: req.JobName, | |||
Description: req.Description, | |||
Config: models.UserImageConfig{ | |||
@@ -491,7 +491,7 @@ sendjob: | |||
return &result, nil | |||
} | |||
func createTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
func CreateTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
@@ -551,7 +551,7 @@ sendjob: | |||
return &result, nil | |||
} | |||
func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
func CreateTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
@@ -641,6 +641,7 @@ var ( | |||
UsageRateBeginTime string | |||
GPUImageCommonName string | |||
MultiNode string | |||
MMLSparkMaxTime int64 | |||
}{} | |||
ClearStrategy = struct { | |||
@@ -726,6 +727,8 @@ var ( | |||
//ai_task config | |||
AI_TASK_RANGE map[string][]string | |||
PREPARING_MAX_WAIT_DURATION time.Duration | |||
OUTPUT_SHOW_MAX_KEY int | |||
OUTPUT_DOWNLOAD_MAX_KEY int | |||
//wenxin url | |||
BaiduWenXin = struct { | |||
@@ -1576,6 +1579,8 @@ func NewContext() { | |||
json.Unmarshal([]byte(tmp), &rangeMap) | |||
AI_TASK_RANGE = rangeMap | |||
PREPARING_MAX_WAIT_DURATION = sec.Key("ENABLED").MustDuration(15 * time.Minute) | |||
OUTPUT_SHOW_MAX_KEY = sec.Key("OUTPUT_SHOW_MAX_KEY").MustInt(100) | |||
OUTPUT_DOWNLOAD_MAX_KEY = sec.Key("OUTPUT_DOWNLOAD_MAX_KEY").MustInt(1000) | |||
sec = Cfg.Section("benchmark") | |||
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) | |||
@@ -1890,6 +1895,8 @@ func GetGrampusConfig() { | |||
} | |||
Grampus.MultiNode = sec.Key("MULTI_NODE").MustString("") | |||
Grampus.MMLSparkMaxTime = sec.Key("MMLSparkMaxTime").MustInt64(8 * 3600) | |||
} | |||
func SetRadarMapConfig() { | |||
@@ -407,3 +407,18 @@ func IsObjectExist4Minio(bucket, objectName string) (bool, error) { | |||
return true, nil | |||
} | |||
func MinioCheckAndGetFileSize(srcBucket string, key string) (bool, int64) { | |||
_, core, err := getClients() | |||
if err != nil { | |||
log.Error("getClients failed:", err.Error()) | |||
return false, 0 | |||
} | |||
meta, err := core.StatObject(srcBucket, key, miniov6.StatObjectOptions{}) | |||
if err != nil { | |||
log.Info("MinioCheckAndGetFileSize error, error=%v", err) | |||
return false, 0 | |||
} | |||
return true, meta.Size | |||
} |
@@ -21,12 +21,13 @@ import ( | |||
) | |||
type FileInfo struct { | |||
FileName string `json:"FileName"` | |||
ModTime string `json:"ModTime"` | |||
IsDir bool `json:"IsDir"` | |||
Size int64 `json:"Size"` | |||
ParenDir string `json:"ParenDir"` | |||
UUID string `json:"UUID"` | |||
FileName string `json:"FileName"` | |||
ModTime string `json:"ModTime"` | |||
IsDir bool `json:"IsDir"` | |||
Size int64 `json:"Size"` | |||
ParenDir string `json:"ParenDir"` | |||
UUID string `json:"UUID"` | |||
RelativePath string `json:"RelativePath"` | |||
} | |||
type FileInfoList []FileInfo | |||
@@ -278,13 +279,27 @@ func ObsGetFilesSize(srcBucket string, Files []string) int64 { | |||
return fileTotalSize | |||
} | |||
func ObsCheckAndGetFileSize(srcBucket string, key string) (bool, int64) { | |||
out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{ | |||
Bucket: srcBucket, | |||
Key: key, | |||
}) | |||
if err != nil { | |||
log.Info("ObsCheckAndGetFilesSize error, error=%v", err) | |||
return false, 0 | |||
} | |||
return true, out.ContentLength | |||
} | |||
func ObsCopyManyFile(srcBucket string, srcPath string, destBucket string, destPath string, Files []string) (int64, error) { | |||
var fileTotalSize int64 | |||
srcPath = strings.TrimSuffix(srcPath, "/") + "/" | |||
destPath = strings.TrimSuffix(destPath, "/") + "/" | |||
for _, file := range Files { | |||
srcKey := srcPath + file | |||
destKey := destPath + file | |||
srcKey := srcPath + strings.TrimPrefix(file, "/") | |||
destKey := destPath + strings.TrimPrefix(file, "/") | |||
log.Info("srcKey=" + srcKey + " destKey=" + destKey) | |||
out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{ | |||
Bucket: srcBucket, | |||
@@ -495,11 +510,12 @@ func GetAllObjectByBucketAndPrefix(bucket string, prefix string) ([]FileInfo, er | |||
isDir = false | |||
} | |||
fileInfo := FileInfo{ | |||
ModTime: val.LastModified.Format("2006-01-02 15:04:05"), | |||
FileName: val.Key[prefixLen:], | |||
Size: val.Size, | |||
IsDir: isDir, | |||
ParenDir: "", | |||
ModTime: val.LastModified.Format("2006-01-02 15:04:05"), | |||
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"), | |||
Size: val.Size, | |||
IsDir: isDir, | |||
ParenDir: "", | |||
RelativePath: val.Key, | |||
} | |||
fileInfoList = append(fileInfoList, fileInfo) | |||
} | |||
@@ -7,6 +7,7 @@ package storage | |||
import ( | |||
"fmt" | |||
"io" | |||
"strings" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/obs" | |||
@@ -46,9 +47,9 @@ func Copy(dstStorage ObjectStorage, dstPath string, srcStorage ObjectStorage, sr | |||
var ( | |||
// Attachments represents attachments storage | |||
Attachments ObjectStorage | |||
ObsCli *obs.ObsClient | |||
ScheduleMinioCore *minio.Core | |||
Attachments ObjectStorage | |||
ObsCli *obs.ObsClient | |||
MinioCore *minio.Core | |||
) | |||
// Init init the stoarge | |||
@@ -70,7 +71,7 @@ func Init() error { | |||
m.UseSSL, | |||
) | |||
log.Info("minio storage inited.") | |||
ScheduleMinioCore, err = minio.NewCore(m.Endpoint, m.AccessKeyID, m.SecretAccessKey, m.UseSSL) | |||
MinioCore, err = minio.NewCore(m.Endpoint, m.AccessKeyID, m.SecretAccessKey, m.UseSSL) | |||
if err != nil { | |||
log.Error("init ScheduleMinioCore err.%v", err) | |||
} | |||
@@ -87,3 +88,20 @@ func Init() error { | |||
return nil | |||
} | |||
func SelectFileByPrefixAndSuffix(list []FileInfo, prefix, suffix string) []FileInfo { | |||
r := make([]FileInfo, 0) | |||
for _, l := range list { | |||
if l.IsDir { | |||
continue | |||
} | |||
if !strings.HasPrefix(l.FileName, prefix) { | |||
continue | |||
} | |||
if !strings.HasSuffix(l.FileName, suffix) { | |||
continue | |||
} | |||
r = append(r, l) | |||
} | |||
return r | |||
} |
@@ -143,6 +143,7 @@ type SpecificationShow struct { | |||
ShareMemGiB float32 `json:"share_mem_gi_b"` | |||
ComputeResource string `json:"compute_resource"` | |||
UnitPrice int `json:"unit_price"` | |||
SourceSpecId string `json:"source_spec_id"` | |||
} | |||
type PointAccountShow struct { | |||
ID int64 `json:"id"` | |||
@@ -260,6 +260,7 @@ func NewFuncMap() []template.FuncMap { | |||
return dict, nil | |||
}, | |||
"Printf": fmt.Sprintf, | |||
"ToLower": strings.ToLower, | |||
"Escape": Escape, | |||
"Sec2Time": models.SecToTime, | |||
"ParseDeadline": func(deadline string) []string { | |||
@@ -422,6 +423,7 @@ func NewTextFuncMap() []texttmpl.FuncMap { | |||
return dict, nil | |||
}, | |||
"Printf": fmt.Sprintf, | |||
"ToLower": strings.ToLower, | |||
"Escape": Escape, | |||
"Sec2Time": models.SecToTime, | |||
"ParseDeadline": func(deadline string) []string { | |||
@@ -48,7 +48,7 @@ func tryScheduleDir(endpoint, bucket, objectKey, dstPeer string) { | |||
} | |||
func MoveBucketInOpenIMinio(objectKeyPrefix, targetObjectPrefix, oldBucket, newBucket string) error { | |||
var core = storage.ScheduleMinioCore | |||
var core = storage.MinioCore | |||
objectInfo := core.Client.ListObjects(oldBucket, objectKeyPrefix, true, nil) | |||
log.Info("MoveBucketInOpenIMinio start.objectKeyPrefix=%s", objectKeyPrefix) | |||
count := 0 | |||
@@ -1096,6 +1096,7 @@ online_debug = Start | |||
debug_again=Restart | |||
stop=Stop | |||
delete=Delete | |||
start_use=Start use | |||
more=More | |||
gpu_type_all=All | |||
model_download=Model Download | |||
@@ -1128,6 +1129,8 @@ export_result_to_dataset = Export the results to a dataset | |||
loader_result_file = Loading results file... | |||
cloudbrain=Cloudbrain | |||
superCompute=HPC | |||
superComputeTask=HPC Task | |||
cloudbrain.task = Cloudbrain Task | |||
cloudbrain.search = Seach Task Name | |||
cloudbrain.new=New cloudbrain | |||
@@ -3177,11 +3180,12 @@ task_npudebugjob=`created NPU type debugging task <a href="%s/modelarts/notebook | |||
task_c2net_gpudebugjob=`created CPU/GPU type debugging task <a href="%s/grampus/notebook/%s">%s</a>` | |||
task_c2net_npudebugjob=`created NPU type debugging task <a href="%s/grampus/notebook/%s">%s</a>` | |||
task_c2ent_gcudebugjob=`created GCU type debugging task <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_gcutrainjob=`created GCU type train task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_c2ent_gcutrainjob=`created GCU type train task <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_mludebugjob=`created MLU type debugging task <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_mlutrainjob=`created MLU type train task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_c2ent_mlutrainjob=`created MLU type train task <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2net_dcudebugjob=`created DCU type debugging task <a href="%s/grampus/notebook/%s">%s</a>` | |||
task_c2ent_onlineinferjob=`created GPU type online inference task <a href="%s/grampus/onlineinfer/%s">%s</a>` | |||
task_c2net_cpusupercomputejob=`created CPU type HPC task <a href="%s/supercompute/job/%s">%s</a>` | |||
task_nputrainjob=`created NPU training task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_inferencejob=`created reasoning task <a href="%s/modelarts/inference-job/%s">%s</a>` | |||
task_benchmark=`created profiling task <a href="%s/cloudbrain/benchmark/%s">%s</a>` | |||
@@ -3352,6 +3356,7 @@ TRAIN = TRAIN | |||
INFERENCE = INFERENCE | |||
BENCHMARK = BENCHMARK | |||
ONLINEINFERENCE = ONLINEINFERENCE | |||
SUPERCOMPUTE = HPC | |||
brain_area = Brain Area | |||
Delete_failed=Fail to delete the job, please try again later. | |||
@@ -3431,16 +3436,20 @@ job_name_already_used = The job name did already exist | |||
insufficient_point_balance = Insufficient point balance | |||
create_failed = Create AI task failed | |||
restart_failed = Restart AI task failed, please try again later. | |||
boot_file_must_python = The boot file must be a python file | |||
stop_failed = Fail to stop the job, please try again later. | |||
can_not_restart = The task was not scheduled successfully before, so it cannot be restart. | |||
dataset_size_over_limit = The size of dataset exceeds limitation (%dGB) | |||
boot_file_must_python = The boot file must be a python file | |||
boot_file_not_exist= The boot file is not exists. | |||
branch_not_exists= The branch does not exist. Please refresh and select again. | |||
boot_file_not_exist = The boot file is not exists. | |||
branch_not_exists = The branch does not exist. Please refresh and select again. | |||
dataset_number_over_limit = The dataset count exceed the limit | |||
result_cleared=The files of the task have been cleared, can not restart or retrain any more, please create a new task instead | |||
model_not_exist=The model in the task does not exist or has been deleted | |||
[common_error] | |||
system_error = System error.Please try again later | |||
insufficient_permission = Insufficient permissions | |||
insufficient_permission = You do not have permission to perform this operation | |||
param_error = The parameter you submitted is incorrect | |||
wechat_not_bind = Please scan the code and bind to wechat first | |||
@@ -1095,6 +1095,7 @@ online_debug = 在线推理 | |||
debug_again=再次调试 | |||
stop=停止 | |||
delete=删除 | |||
start_use=开始使用 | |||
more=更多 | |||
gpu_type_all=全部 | |||
model_download=结果下载 | |||
@@ -1127,6 +1128,8 @@ export_result_to_dataset = 导出结果至数据集 | |||
loader_result_file = 正在加载结果文件中... | |||
cloudbrain=云脑 | |||
superCompute=超算 | |||
superComputeTask=超算任务 | |||
cloudbrain.task = 云脑任务 | |||
cloudbrain.search = 搜索任务名称 | |||
cloudbrain.new=新建任务 | |||
@@ -3200,6 +3203,7 @@ task_c2ent_mludebugjob=`创建了MLU类型调试任务 <a href="%s/grampus/noteb | |||
task_c2ent_mlutrainjob=`创建了MLU类型训练任务 <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2net_dcudebugjob=`创建了DCU类型调试任务 <a href="%s/grampus/notebook/%s">%s</a>` | |||
task_c2ent_onlineinferjob=`创建了GPU类型在线推理任务 <a href="%s/grampus/onlineinfer/%s">%s</a>` | |||
task_c2net_cpusupercomputejob=`创建了CPU类型超算任务 <a href="%s/supercompute/job/%s">%s</a>` | |||
task_nputrainjob=`创建了NPU类型训练任务 <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_inferencejob=`创建了推理任务 <a href="%s/modelarts/inference-job/%s">%s</a>` | |||
task_benchmark=`创建了评测任务 <a href="%s/cloudbrain/benchmark/%s">%s</a>` | |||
@@ -3373,6 +3377,7 @@ TRAIN = 训练任务 | |||
INFERENCE = 推理任务 | |||
BENCHMARK = 评测任务 | |||
ONLINEINFERENCE = 在线推理 | |||
HPC = 超算任务 | |||
brain_area = 脑区 | |||
Delete_failed=任务删除失败,请稍后再试。 | |||
@@ -3453,16 +3458,21 @@ job_name_already_used = 任务名已被使用,请换一个名称 | |||
insufficient_point_balance = 积分余额不足 | |||
create_failed = 创建AI任务失败 | |||
restart_failed = 再次调试失败,请稍后再试 | |||
boot_file_must_python = 启动文件必须是python文件 | |||
stop_failed = 任务停止失败,请稍后再试 | |||
can_not_restart = 这个任务之前没有调度成功,不能再次调试。 | |||
dataset_size_over_limit = 数据集大小超过限制(%dGB) | |||
boot_file_must_python = 启动文件必须是python文件 | |||
boot_file_not_exist =启动文件不存在 | |||
branch_not_exists= 代码分支不存在,请刷新后重试 | |||
branch_not_exists = 代码分支不存在,请刷新后重试 | |||
dataset_number_over_limit = 选择的数据集文件数量超出限制 | |||
result_cleared=源任务的文件已被清理,无法再次调试或复用训练结果,请新建任务。 | |||
model_not_exist=选择的预训练模型不存在或者已被删除 | |||
[common_error] | |||
system_error = 当前服务不可用,请稍后再试 | |||
insufficient_permission = 权限不足 | |||
insufficient_permission = 您没有权限执行此操作 | |||
param_error = 提交的参数有误 | |||
wechat_not_bind = 请先扫码绑定微信 | |||
@@ -1,5 +1,5 @@ | |||
{ | |||
"name": "aiforge1", | |||
"name": "aiforge", | |||
"lockfileVersion": 2, | |||
"requires": true, | |||
"packages": { | |||
@@ -31,7 +31,7 @@ var swiperRepo = new Swiper(".homepro-list", { | |||
delay: 2500, | |||
disableOnInteraction: false, | |||
}, | |||
breakpoints: { | |||
breakpoints: { | |||
768: { | |||
slidesPerView: 2, | |||
}, | |||
@@ -170,7 +170,7 @@ document.onreadystatechange = function () { | |||
if(document.readyState != "complete"){ | |||
return; | |||
} | |||
console.log("Start to open WebSocket." + document.readyState); | |||
console.log("Start to open WebSocket." + document.readyState); | |||
queryRecommendData(); | |||
var output = document.getElementById("newmessage"); | |||
@@ -179,7 +179,7 @@ document.onreadystatechange = function () { | |||
url = "wss://" + document.location.host + "/action/notification" | |||
} | |||
var socket = new WebSocket(url); | |||
socket.onopen = function () { | |||
messageQueue = []; | |||
console.log("message has connected."); | |||
@@ -242,8 +242,8 @@ document.onreadystatechange = function () { | |||
actionName = actionName.replace("{oldRepoName}",record.Content); | |||
html += recordPrefix + actionName; | |||
html += " <a href=\"" + getRepoLink(record) + "\" rel=\"nofollow\">" + getRepotext(record) + "</a>" | |||
} | |||
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" | |||
} | |||
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" | |||
|| record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "42" || record.OpType == "44"){ | |||
html += recordPrefix + actionName; | |||
const taskLink = getTaskLink(record); | |||
@@ -251,9 +251,10 @@ document.onreadystatechange = function () { | |||
html += " <a href=\"" + taskLink + "\" rel=\"nofollow\">" + record.RefName + "</a>" | |||
} else { | |||
html += " <span style=\"color: rgba(0,0,0,0.3)\">" + record.RefName + "</span>" | |||
} | |||
} | |||
} | |||
else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41" || record.OpType == "43"|| record.OpType == "44"|| record.OpType == "45"|| record.OpType == "46"){ | |||
else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41" | |||
|| record.OpType == "43"|| record.OpType == "44"|| record.OpType == "45"|| record.OpType == "46"|| record.OpType == "47"){ | |||
html += recordPrefix + actionName; | |||
const taskLink = getTaskLink(record); | |||
if (taskLink) { | |||
@@ -302,7 +303,11 @@ function getTaskLink(record){ | |||
re = ''; | |||
} | |||
}else if(record.OpType == 27){ | |||
re = re + "/modelarts/train-job/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/modelarts/train-job/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
}else if(record.OpType == 28){ | |||
re = re + "/modelarts/inference-job/" + record.Content; | |||
}else if(record.OpType == 29){ | |||
@@ -310,15 +315,25 @@ function getTaskLink(record){ | |||
}else if(record.OpType == 30){ | |||
re = re + "/modelmanage/model_readme_tmpl?name=" + record.RefName; | |||
}else if(record.OpType == 31){ | |||
re = re + "/cloudbrain/train-job/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/cloudbrain/train-job/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
}else if(record.OpType == 32 || record.OpType == 33 || record.OpType == 42 || record.OpType == 44){ | |||
re = re + "/grampus/train-job/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/grampus/train-job/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
}else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41 || record.OpType == 43|| record.OpType == 46){ | |||
if (record.Cloudbrain) { | |||
re = re + "/grampus/notebook/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
} else if(record.OpType == 47){ | |||
re = re + "/supercompute/job/" + record.Cloudbrain.ID; | |||
} else if(record.OpType == 45){ | |||
re = re + "/grampus/onlineinfer/" + record.Content; | |||
} | |||
@@ -486,6 +501,7 @@ var actionNameZH={ | |||
"44":"创建了MLU类型训练任务", | |||
"45":"创建了GPU类型在线推理任务", | |||
"46":"创建了DCU类型调试任务", | |||
"47":"创建了CPU类型超算任务", | |||
}; | |||
var actionNameEN={ | |||
@@ -525,6 +541,7 @@ var actionNameEN={ | |||
"44":" created MLU type training task ", | |||
"45":" created GPU type online inference task ", | |||
"46":" created DCU type debugging task ", | |||
"47":" created CPU type super compute task ", | |||
}; | |||
var repoAndOrgZH={ | |||
@@ -591,7 +608,7 @@ function queryRecommendData(){ | |||
function displayActivity(json){ | |||
var activityDiv = document.getElementById("recommendactivity"); | |||
if (!activityDiv) return; | |||
if (!activityDiv) return; | |||
var html = ""; | |||
if (json != null && json.length > 0){ | |||
for(var i = 0; i < json.length;i++){ | |||
@@ -649,7 +666,7 @@ function displayRepo(json){ | |||
if (json != null && json.length > 0){ | |||
var repoMap = {}; | |||
for (var i = 0, iLen = json.length; i < iLen; i++) { | |||
var repo = json[i]; | |||
var repo = json[i]; | |||
var label = isZh ? repo.Label : repo.Label_en; | |||
if (repoMap[label]) { | |||
repoMap[label].push(repo); | |||
@@ -670,7 +687,7 @@ function displayRepo(json){ | |||
${repo["Avatar"] ? `<img style="border-radius:100%;" class="left floated mini ui image" src="${repo["Avatar"]}">` : `<img style="border-radius:100%;" class="left floated mini ui image" avatar="${repo["OwnerName"]}">`} | |||
<a class="header nowrap" style="color:rgb(50, 145, 248);font-size:14px;" href="/${repo["OwnerName"]}/${repo["Name"]}" title="${repo["Alias"]}">${repo["Alias"]}</a> | |||
<div class="description nowrap-2" style="rgba(136,136,136,1);;font-size:12px;" title="${repo["Description"]}">${repo["Description"]}</div> | |||
<a href="/${repo["OwnerName"]}/${repo["Name"]}" style="height:100%;width:100%;position:absolute;left:0;top:0"></a>`; | |||
<a href="/${repo["OwnerName"]}/${repo["Name"]}" style="height:100%;width:100%;position:absolute;left:0;top:0"></a>`; | |||
html += ` | |||
</div> | |||
</div>`; | |||
@@ -718,7 +735,7 @@ function displayOrg(json){ | |||
swiperOrg.updateSlides(); | |||
} | |||
function displayDataset(data) { | |||
function displayDataset(data) { | |||
var homeDatasetEl = document.getElementById("home_dataset"); | |||
if (!homeDatasetEl) return; | |||
var html = ''; | |||
@@ -757,7 +774,7 @@ function displayUserExp(data) { | |||
for (var i = 0, iLen = data.length; i < iLen; i++) { | |||
var dataI = data[i]; | |||
html += `<div class="swiper-slide"> | |||
<div class="ui fluid user-card"> | |||
<div class="ui fluid user-card"> | |||
<div><div class="content img-c"> | |||
<a href="/${dataI.name}"> | |||
<div class="img" style="width:60px;height:60px;background-image:url('${dataI.avatar}')"></div> | |||
@@ -765,7 +782,7 @@ function displayUserExp(data) { | |||
</div></div> | |||
<div><div class="content label" title="${dataI.fullname || dataI.name}">${dataI.fullname || dataI.name}</div></div> | |||
<div><div class="content descr" title="${dataI.desc}">${dataI.desc}</div></div> | |||
</div> | |||
</div> | |||
</div>` | |||
} | |||
homeUserExpEl.innerHTML = html; | |||
@@ -907,8 +924,8 @@ function initHomeTopBanner() { | |||
if (banner.data) { | |||
hmPageC.append($(banner.data)); | |||
hmPageSlidePaginationC.append('<div class="_hm-slide-pagination-item"></div>'); | |||
} | |||
} | |||
} | |||
} | |||
startSlide(); | |||
} | |||
@@ -1,8 +1,6 @@ | |||
package ai_task | |||
import ( | |||
"net/http" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
@@ -10,8 +8,12 @@ import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
"code.gitea.io/gitea/routers/common" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/schedule" | |||
"code.gitea.io/gitea/services/ai_task_service/task" | |||
"net/http" | |||
"strings" | |||
) | |||
func CreateAITask(ctx *context.Context, form entity.CreateReq) { | |||
@@ -68,21 +70,49 @@ func RestartAITask(ctx *context.Context) { | |||
func GetAITaskLog(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
baseLine := ctx.QueryInt64("base_line") | |||
lines := ctx.QueryInt64("lines") | |||
order := ctx.Query("order") | |||
nodeId := ctx.QueryInt("node_id") | |||
logFileName := ctx.Query("file_name") | |||
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskLog GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
t.GetLog(id) | |||
ctx.JSON(http.StatusOK, response.OuterSuccess()) | |||
res, err := t.GetLog(entity.QueryLogOpts{ | |||
CloudbrainId: id, | |||
BaseLine: baseLine, | |||
Lines: lines, | |||
Order: entity.Direction(order), | |||
NodeId: nodeId, | |||
LogFileName: logFileName, | |||
}) | |||
if err != nil { | |||
log.Error("GetAITaskLog error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
if res.Content != "" && cloudbrain.IsUserHasRight(ctx.User) { | |||
res.CanLogDownload = true | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(res)) | |||
} | |||
func GetAITaskInfo(ctx *context.Context) { | |||
func DownloadAITaskLog(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
nodeId := ctx.QueryInt("node_id") | |||
logFileName := ctx.Query("file_name") | |||
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskInfo GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
log.Error("DownloadAITaskLog GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
@@ -92,36 +122,152 @@ func GetAITaskInfo(ctx *context.Context) { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
resultTask, err := t.Query(id) | |||
res, err := t.GetLogDownloadInfo(entity.GetLogDownloadInfoReq{ | |||
CloudbrainId: id, | |||
NodeId: nodeId, | |||
LogFileName: logFileName, | |||
}) | |||
if err != nil { | |||
log.Error("Query error.id=%d err=%v", id, err) | |||
log.Error("DownloadAITaskLog error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
//国际化 | |||
resultTask.Tr(ctx.Language()) | |||
//根据权限去掉数据集和模型信息 | |||
var operatorId int64 | |||
if ctx.User != nil { | |||
operatorId = ctx.User.ID | |||
if res == nil || res.IsEmpty() { | |||
log.Error("DownloadAITaskLog error.%v", err) | |||
ctx.JSON(http.StatusNotFound, "") | |||
return | |||
} | |||
tmpErr := common.WriteDownloadContent2Resp(ctx, res) | |||
if tmpErr != nil { | |||
log.Error("DownloadAITaskLog error.%v", tmpErr) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr)) | |||
return | |||
} | |||
} | |||
func DownloadOutputFile(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
fileName := ctx.Query("file_name") | |||
parentDir := ctx.Query("parent_dir") | |||
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("DownloadOutputFile GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.GetSingleOutputDownloadInfo(entity.GetOutputDownloadInfoReq{ | |||
CloudbrainId: id, | |||
FileName: fileName, | |||
ParentDir: parentDir, | |||
}) | |||
if err != nil { | |||
log.Error("DownloadOutputFile error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
if res == nil || res.IsEmpty() { | |||
log.Error("DownloadOutputFile error.%v", err) | |||
ctx.JSON(http.StatusNotFound, "") | |||
return | |||
} | |||
tmpErr := common.WriteDownloadContent2Resp(ctx, res) | |||
if tmpErr != nil { | |||
log.Error("DownloadAITaskLog error.%v", tmpErr) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr)) | |||
return | |||
} | |||
} | |||
func DownloadAllOutputFile(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("DownloadAllOutputFile GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.GetAllOutputDownloadInfo(entity.GetOutputDownloadInfoReq{ | |||
CloudbrainId: id, | |||
}) | |||
if err != nil { | |||
log.Error("GetAllOutputDownloadInfo error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
if res == nil || res.IsEmpty() { | |||
log.Error("DownloadAllOutputFile error.%v", err) | |||
ctx.JSON(http.StatusNotFound, "") | |||
return | |||
} | |||
tmpErr := common.WriteDownloadContent2Resp(ctx, res) | |||
if tmpErr != nil { | |||
log.Error("DownloadAITaskLog error.%v", tmpErr) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(tmpErr)) | |||
return | |||
} | |||
} | |||
func GetAITaskInfo(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
job, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskInfo GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(job) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
if operatorId == 0 || cloudbrain.UserID != operatorId { | |||
resultTask.RemoveDatasets() | |||
resultTask.RemovePretrainModelList() | |||
resultTask, err := t.Query(id) | |||
if err != nil { | |||
log.Error("Query error.id=%d err=%v", id, err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
//加载关联版本 | |||
earlyVersionList, bizErr := task.QueryTaskEarlyVersionList(id, operatorId) | |||
earlyVersionList, bizErr := task.QueryTaskEarlyVersionList(id) | |||
if bizErr != nil { | |||
log.Error("QueryTaskEarlyVersionList err.id=%d err=%v", id, err) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(bizErr)) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(&entity.QueryAITaskRes{ | |||
res := &entity.QueryAITaskRes{ | |||
Task: resultTask, | |||
CanDownload: cloudbrain.CanDownloadJob(ctx, job), | |||
EarlyVersionList: earlyVersionList, | |||
CanCreateVersion: cloudbrain.CanUserModify(ctx.User), | |||
})) | |||
CanCreateVersion: job.CanUserModify(ctx.User), | |||
} | |||
//根据权限去掉数据集和模型信息 | |||
res.TryToRemoveDatasetAndModelInfo(ctx.User) | |||
//国际化 | |||
res.Tr(ctx.Language()) | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(res)) | |||
} | |||
func GetAITaskBriefInfo(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
@@ -141,7 +287,69 @@ func GetAITaskBriefInfo(ctx *context.Context) { | |||
} | |||
func GetAITaskOutput(ctx *context.Context) { | |||
ctx.JSON(http.StatusOK, response.OuterSuccess()) | |||
id := ctx.QueryInt64("id") | |||
parentDir := ctx.Query("parent_dir") | |||
cloudbrainTask, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskOutput GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrainTask) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.GetOutput(id, parentDir) | |||
if err != nil { | |||
log.Error("GetOutput error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res.CanReschedule = cloudbrain.CanDeleteJob(ctx, cloudbrainTask) | |||
res.CanDownload = cloudbrain.CanDownloadJob(ctx, cloudbrainTask) | |||
m := map[string]interface{}{"output": res} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m)) | |||
} | |||
func GetAllAITaskOutput(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
suffixStr := ctx.Query("suffix") | |||
var suffix []string | |||
if suffixStr != "" { | |||
suffixList := strings.Split(suffixStr, "|") | |||
for i := 0; i < len(suffixList); i++ { | |||
if suffixList[i] != "" { | |||
suffix = append(suffix, suffixList[i]) | |||
} | |||
} | |||
} | |||
cloudbrainTask, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskOutput GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrainTask) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.GetAllOutput(entity.GetAllOutputReq{ | |||
CloudbrainId: cloudbrainTask.ID, | |||
Suffix: suffix, | |||
}) | |||
if err != nil { | |||
log.Error("GetAllOutput error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
m := map[string]interface{}{"output": res} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m)) | |||
} | |||
func GetNotebookUrl(ctx *context.Context) { | |||
@@ -164,6 +372,25 @@ func GetNotebookUrl(ctx *context.Context) { | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m)) | |||
} | |||
func GetNodeInfo(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.GetNodeInfo(id) | |||
if err != nil { | |||
log.Error("GetNodeInfo error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
m := map[string]interface{}{"nodes": res} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m)) | |||
} | |||
func GetCreationRequiredInfo(ctx *context.Context) { | |||
jobType := ctx.Query("job_type") | |||
var isOnlineType bool | |||
@@ -222,6 +449,7 @@ func GetAITaskList(ctx *context.Context) { | |||
return | |||
} | |||
result.CanCreateTask = cloudbrain.CanCreateOrDebugJob(ctx) | |||
result.IsRepoEmpty = ctx.Repo.Repository.IsEmpty | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(result)) | |||
} | |||
@@ -242,6 +470,48 @@ func GetAITaskOperationProfile(ctx *context.Context) { | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(r)) | |||
} | |||
func GetAITaskResourceUsage(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
nodeId := ctx.QueryInt("node_id") | |||
logFileName := ctx.Query("file_name") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
r, err := t.GetResourceUsage(entity.GetResourceUsageOpts{ | |||
CloudbrainId: id, | |||
NodeId: nodeId, | |||
LogFileName: logFileName, | |||
}) | |||
if err != nil { | |||
log.Error("GetOperationProfile error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(r)) | |||
} | |||
func RetryModelSchedule(ctx *context.APIContext) { | |||
id := ctx.QueryInt64("id") | |||
if id <= 0 { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx)) | |||
return | |||
} | |||
job, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx)) | |||
return | |||
} | |||
err = schedule.RetryModelMigrate(job) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(err)) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccess()) | |||
} | |||
func handCreateReq(req *entity.CreateReq) { | |||
req.JobName = util.ConvertDisplayJobNameToJobName(req.DisplayJobName) | |||
if req.WorkServerNumber == 0 { | |||
@@ -651,16 +651,24 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/stop", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask) | |||
m.Post("/del", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask) | |||
m.Post("/restart", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrAITaskCreator(), ai_task.RestartAITask) | |||
m.Get("/log", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskLog) | |||
m.Get("/output", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOutput) | |||
m.Get("/debug_url", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNotebookUrl) | |||
m.Get("/creation/required", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo) | |||
m.Post("/output/reschedule", reqRepoWriter(models.UnitTypeCloudBrain), ai_task.RetryModelSchedule) | |||
}, reqToken(), context.RepoRef()) | |||
m.Group("/ai_task", func() { | |||
m.Get("", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskInfo) | |||
m.Get("/brief", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskBriefInfo) | |||
m.Get("/list", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskList) | |||
m.Get("/operation_profile", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOperationProfile) | |||
m.Get("/log", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskLog) | |||
m.Get("/log/download", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadAITaskLog) | |||
m.Get("/node_info", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNodeInfo) | |||
m.Get("/output", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOutput) | |||
m.Get("/output/download", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadOutputFile) | |||
m.Get("/output/download/all", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.DownloadAllOutputFile) | |||
m.Get("/output/all", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAllAITaskOutput) | |||
m.Get("/resource_usage", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskResourceUsage) | |||
}) | |||
}, repoAssignment()) | |||
// Miscellaneous | |||
@@ -46,6 +46,7 @@ import ( | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/storage" | |||
routerRepo "code.gitea.io/gitea/routers/repo" | |||
ai_task "code.gitea.io/gitea/services/ai_task_service/task" | |||
) | |||
func CloudBrainShow(ctx *context.APIContext) { | |||
@@ -92,6 +93,15 @@ func CloudBrainShow(ctx *context.APIContext) { | |||
} | |||
func GeneralCloudBrainJobStop(ctx *context.APIContext) { | |||
task := ctx.Cloudbrain | |||
if task.IsNewAITask() { | |||
_, bizErr := ai_task.StopCloudbrain(task) | |||
if bizErr != nil { | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.Stopped_failed"))) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, models.BaseOKMessageApi) | |||
} | |||
if task.IsTerminal() { | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("cloudbrain.Already_stopped")) | |||
return | |||
@@ -7,6 +7,7 @@ package repo | |||
import ( | |||
"encoding/json" | |||
"errors" | |||
"net/http" | |||
"path" | |||
"sort" | |||
@@ -35,6 +36,7 @@ import ( | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
routerRepo "code.gitea.io/gitea/routers/repo" | |||
ai_task "code.gitea.io/gitea/services/ai_task_service/task" | |||
cloudbrainService "code.gitea.io/gitea/services/cloudbrain" | |||
) | |||
@@ -120,12 +122,35 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||
jobID := ctx.Params(":jobid") | |||
versionName := ctx.Query("version_name") | |||
job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
var job *models.Cloudbrain | |||
id := ctx.QueryInt64("id") | |||
if id > 0 { | |||
job, err = models.GetCloudbrainByCloudbrainID(id) | |||
} else { | |||
job, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
} | |||
if err != nil { | |||
ctx.NotFound(err) | |||
return | |||
} | |||
if job.IsNewAITask() { | |||
var bizErr *response.BizError | |||
job, bizErr = ai_task.UpdateCloudbrain(job) | |||
if bizErr != nil { | |||
log.Error("UpdateCloudbrain err.job.DisplayJobName = %s err=%v", job.DisplayJobName, err) | |||
ctx.NotFound(err) | |||
return | |||
} | |||
aiCenterName = cloudbrainService.GetAiCenterShow(job.AiCenter, ctx.Context) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobID": jobID, | |||
"JobStatus": job.Status, | |||
"JobDuration": job.TrainJobDuration, | |||
"AiCenter": aiCenterName, | |||
"StartTime": job.StartTime, | |||
}) | |||
return | |||
} | |||
if job.Type == models.TypeCloudBrainOne { | |||
aiCenterName = routerRepo.GetAiCenterNameByCode(models.AICenterOfCloudBrainOne, ctx.Language()) | |||
job, err = cloudbrainTask.SyncCloudBrainOneStatus(job) | |||
@@ -205,7 +230,12 @@ func GetModelScheduleStatus(ctx *context.APIContext) { | |||
func RetryModelSchedule(ctx *context.APIContext) { | |||
jobID := ctx.Params(":jobid") | |||
err := schedule.RetryModelMigrate(jobID) | |||
job, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("RetryModelMigrate GetCloudbrainByJobID err.jobId=%s err=%v", jobID, err) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(errors.New("jobId not correct"))) | |||
} | |||
err = schedule.RetryModelMigrate(job) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(err)) | |||
return | |||
@@ -362,17 +392,41 @@ func trainJobGetLogContent(jobID string, versionID int64, baseLine string, order | |||
func DelTrainJobVersion(ctx *context.APIContext) { | |||
var ( | |||
err error | |||
err error | |||
task *models.Cloudbrain | |||
) | |||
var jobID = ctx.Params(":jobid") | |||
var versionName = ctx.Query("version_name") | |||
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
var id = ctx.QueryInt64("id") | |||
if id > 0 { | |||
task, err = models.GetCloudbrainByCloudbrainID(id) | |||
} else { | |||
task, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
} | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
ctx.NotFound(err) | |||
return | |||
} | |||
if task.IsNewAITask() { | |||
bizErr := ai_task.DelCloudbrain(task) | |||
if bizErr != nil { | |||
log.Error("DelCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"Message": ctx.Tr(bizErr.TrCode), | |||
"StatusOK": 1, | |||
}) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobID": task.JobID, | |||
"VersionName": task.VersionName, | |||
"StatusOK": 0, | |||
}) | |||
return | |||
} | |||
if !task.IsTerminal() { | |||
log.Error("the job(%s) version has not been stopped", task.JobName) | |||
@@ -444,16 +498,33 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||
func StopTrainJobVersion(ctx *context.APIContext) { | |||
var ( | |||
err error | |||
err error | |||
task *models.Cloudbrain | |||
) | |||
var jobID = ctx.Params(":jobid") | |||
var versionName = ctx.Query("version_name") | |||
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
var id = ctx.QueryInt64("id") | |||
if id > 0 { | |||
task, err = models.GetCloudbrainByCloudbrainID(id) | |||
} else { | |||
task, err = models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
} | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
return | |||
} | |||
if task.IsNewAITask() { | |||
_, bizErr := ai_task.StopCloudbrain(task) | |||
if bizErr != nil { | |||
log.Error("StopCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"JobID": task.JobID, | |||
"VersionName": task.VersionName, | |||
"StatusOK": 0, | |||
}) | |||
} | |||
_, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||
if err != nil { | |||
log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | |||
@@ -0,0 +1,62 @@ | |||
package common | |||
import ( | |||
"archive/zip" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/log" | |||
"io" | |||
"net/http" | |||
"net/url" | |||
) | |||
func WriteDownloadContent2Resp(ctx *context.Context, res *entity.FileDownloadInfo) error { | |||
defer func() { | |||
res.Close() | |||
}() | |||
resp := ctx.Resp | |||
//优先重定向到下载链接 | |||
if res.DownloadUrl != "" { | |||
ctx.Resp.Header().Set("Cache-Control", "max-age=0") | |||
http.Redirect(ctx.Resp, ctx.Req.Request, res.DownloadUrl, http.StatusTemporaryRedirect) | |||
return nil | |||
} | |||
//没有下载链接则直接返回文件流 | |||
resp.Header().Set("Content-Disposition", "attachment; filename="+url.QueryEscape(res.ResultFileName)) | |||
resp.Header().Set("Content-Type", "application/octet-stream") | |||
var reader io.Reader | |||
switch res.ResultType { | |||
case entity.FileTypeTXT: | |||
for _, f := range res.Readers { | |||
reader = f.Reader | |||
io.Copy(resp, reader) | |||
} | |||
case entity.FileTypeZIP: | |||
w := zip.NewWriter(resp) | |||
defer w.Close() | |||
for _, f := range res.Readers { | |||
fDest, err := w.Create(f.Name) | |||
if err != nil { | |||
log.Error("GetAITaskLog error.%v", err) | |||
return err | |||
} | |||
p := make([]byte, 1024) | |||
var readErr error | |||
var readCount int | |||
// 读取对象内容 | |||
for { | |||
readCount, readErr = f.Reader.Read(p) | |||
if readCount > 0 { | |||
fDest.Write(p[:readCount]) | |||
} | |||
if readErr != nil { | |||
break | |||
} | |||
} | |||
} | |||
} | |||
return nil | |||
} |
@@ -47,17 +47,7 @@ const ( | |||
MODEL_ONLINE_TYPE = 0 | |||
) | |||
func saveModelByParameters(jobId string, versionName string, name string, version string, label string, description string, engine int, ctx *context.Context) (string, error) { | |||
aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName) | |||
if err != nil { | |||
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId) | |||
if err != nil { | |||
log.Info("query task error." + err.Error()) | |||
return "", err | |||
} else { | |||
log.Info("query gpu train task.") | |||
} | |||
} | |||
func saveModelByParameters(aiTask *models.Cloudbrain, name string, version string, label string, description string, engine int, ctx *context.Context) (string, error) { | |||
uuid := uuid.NewV4() | |||
id := uuid.String() | |||
modelPath := id | |||
@@ -406,6 +396,25 @@ func SaveModel(ctx *context.Context) { | |||
re := map[string]string{ | |||
"code": "-1", | |||
} | |||
var aiTask *models.Cloudbrain | |||
var err error | |||
//云脑重构:适配用id的方式请求 | |||
cloudbrainId := ctx.QueryInt64("cloudbrain_id") | |||
if cloudbrainId > 0 { | |||
aiTask, err = models.GetCloudbrainByCloudbrainID(cloudbrainId) | |||
} else { | |||
aiTask, err = models.GetCloudbrainByJobIDAndVersionName(JobId, VersionName) | |||
if err != nil { | |||
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, JobId) | |||
} | |||
} | |||
if err != nil { | |||
log.Error("save model error." + err.Error()) | |||
re["msg"] = err.Error() | |||
return | |||
} | |||
isPrivate := ctx.QueryBool("isPrivate") | |||
if ctx.Repo.Repository.IsPrivate { | |||
if !isPrivate { | |||
@@ -414,11 +423,6 @@ func SaveModel(ctx *context.Context) { | |||
return | |||
} | |||
} | |||
if JobId == "" || VersionName == "" { | |||
re["msg"] = "JobId or VersionName is null." | |||
ctx.JSON(200, re) | |||
return | |||
} | |||
if modelSelectedFile == "" { | |||
re["msg"] = "Not selected model file." | |||
ctx.JSON(200, re) | |||
@@ -429,7 +433,7 @@ func SaveModel(ctx *context.Context) { | |||
ctx.JSON(200, re) | |||
return | |||
} | |||
id, err := saveModelByParameters(JobId, VersionName, name, version, label, description, engine, ctx) | |||
id, err := saveModelByParameters(aiTask, name, version, label, description, engine, ctx) | |||
if err != nil { | |||
log.Info("save model error." + err.Error()) | |||
re["msg"] = err.Error() | |||
@@ -872,7 +872,9 @@ func CloudBrainShow(ctx *context.Context) { | |||
} | |||
func CloudBrainTrainJobShow(ctx *context.Context) { | |||
cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain) | |||
// cloudBrainShow(ctx, tplCloudBrainTrainJobShow, models.JobTypeTrain) | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(200, tplCloudBrainTrainJobShow) | |||
} | |||
func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.JobType) { | |||
@@ -1333,7 +1335,20 @@ func CloudBrainStop(ctx *context.Context) { | |||
var status = "" | |||
task := ctx.Cloudbrain | |||
for { | |||
if task.IsNewAITask() { | |||
t, bizErr := ai_task.StopCloudbrain(task) | |||
if bizErr != nil { | |||
resultCode = "-1" | |||
errorMsg = bizErr.TrCode | |||
resultCode = task.Status | |||
break | |||
} | |||
status = t.Status | |||
break | |||
} | |||
if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) || task.Status == string(models.JobSucceeded) { | |||
log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) | |||
resultCode = "-1" | |||
@@ -2024,6 +2039,7 @@ func SyncCloudbrainStatus() { | |||
if task.Duration >= setting.MaxDuration && task.JobType == string(models.JobTypeDebug) { | |||
ai_task.StopCloudbrain(task) | |||
} | |||
continue | |||
} | |||
if task.Type == models.TypeCloudBrainOne { | |||
@@ -2050,7 +2066,7 @@ func SyncCloudbrainStatus() { | |||
log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType) | |||
} | |||
} else if task.Type == models.TypeC2Net { | |||
if task.JobType == string(models.JobTypeDebug) { | |||
if task.JobType == string(models.JobTypeDebug) || task.JobType == string(models.JobTypeSuperCompute) { | |||
cloudbrainTask.SyncGrampusNotebookStatus(task) | |||
} else { | |||
result, err := grampus.GetJob(task.JobID) | |||
@@ -2095,7 +2111,7 @@ func SyncCloudbrainStatus() { | |||
log.Error("task.Type(%s) is error:%d", task.JobName, task.Type) | |||
} | |||
if task.Status != string(models.JobWaiting) { | |||
if task.Duration >= setting.MaxDuration && task.JobType == string(models.JobTypeDebug) { | |||
if (task.Duration >= setting.MaxDuration && task.JobType == string(models.JobTypeDebug)) || (task.Duration >= setting.Grampus.MMLSparkMaxTime && task.JobType == string(models.JobTypeSuperCompute)) { | |||
log.Info("begin to stop job(%s), because of the duration", task.DisplayJobName) | |||
err = cloudbrainTask.StopDebugJob(task) | |||
if err != nil { | |||
@@ -2950,6 +2966,7 @@ func BenchmarkDel(ctx *context.Context) { | |||
func CloudBrainTrainJobNew(ctx *context.Context) { | |||
ctx.Data["IsCreate"] = true | |||
ctx.Data["PageIsCloudBrain"] = true | |||
cloudBrainTrainJobCreate(ctx) | |||
} | |||
func CloudBrainTrainJobVersionNew(ctx *context.Context) { | |||
@@ -2958,11 +2975,11 @@ func CloudBrainTrainJobVersionNew(ctx *context.Context) { | |||
} | |||
func cloudBrainTrainJobCreate(ctx *context.Context) { | |||
err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain)) | |||
if err != nil { | |||
ctx.ServerError("get new train-job info failed", err) | |||
return | |||
} | |||
// err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain)) | |||
// if err != nil { | |||
// ctx.ServerError("get new train-job info failed", err) | |||
// return | |||
// } | |||
ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) | |||
} | |||
@@ -624,6 +624,7 @@ func ExportModelToExistDataSet(ctx *context.Context) { | |||
} | |||
description := ctx.Query("description") | |||
jobId := ctx.Query("jobId") | |||
cloudbrainId := ctx.QueryInt64("cloudbrain_id") | |||
storeType := ctx.QueryInt("type") | |||
versionName := ctx.Query("versionName") | |||
dataset, err := models.GetDatasetByID(datasetId) | |||
@@ -633,17 +634,22 @@ func ExportModelToExistDataSet(ctx *context.Context) { | |||
ctx.JSON(200, re) | |||
return | |||
} | |||
aiTask, err := models.GetCloudbrainByJobIDAndVersionName(jobId, versionName) | |||
if err != nil { | |||
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId) | |||
var aiTask *models.Cloudbrain | |||
if cloudbrainId > 0 { | |||
aiTask, err = models.GetCloudbrainByCloudbrainID(cloudbrainId) | |||
} else { | |||
aiTask, err = models.GetCloudbrainByJobIDAndVersionName(jobId, versionName) | |||
if err != nil { | |||
log.Info("query task error." + err.Error()) | |||
re["msg"] = "Query cloudbrain task error." + err.Error() | |||
ctx.JSON(200, re) | |||
return | |||
aiTask, err = models.GetRepoCloudBrainByJobID(ctx.Repo.Repository.ID, jobId) | |||
} | |||
} | |||
msgKey := fmt.Sprint(datasetId) + "_" + jobId + "_" + versionName | |||
if err != nil { | |||
log.Info("query task error." + err.Error()) | |||
re["msg"] = "Query cloudbrain task error." + err.Error() | |||
ctx.JSON(200, re) | |||
return | |||
} | |||
msgKey := fmt.Sprint(datasetId) + "_" + aiTask.JobID + "_" + aiTask.VersionName | |||
msgMap := make(map[string]int, 0) | |||
msgMap["##type##"] = storeType | |||
filterFiles := strings.Split(modelSelectedFile, ";") | |||
@@ -651,7 +657,7 @@ func ExportModelToExistDataSet(ctx *context.Context) { | |||
msgMap[shortFile] = 0 | |||
} | |||
setProgress(msgKey, msgMap) | |||
go asyncToExportDataset(dataset, storeType, modelSelectedFile, aiTask, ctx.User, msgKey, msgMap, versionName, description) | |||
go asyncToExportDataset(dataset, storeType, modelSelectedFile, aiTask, ctx.User, msgKey, msgMap, aiTask.VersionName, description) | |||
ctx.JSON(200, map[string]string{ | |||
"code": "0", | |||
"progressId": msgKey, | |||
@@ -31,8 +31,8 @@ import ( | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
// "code.gitea.io/gitea/modules/notification" | |||
// "code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/modules/util" | |||
"github.com/unknwon/com" | |||
@@ -1421,7 +1421,13 @@ func GrampusNotebookDel(ctx *context.Context) { | |||
func GrampusTrainJobDel(ctx *context.Context) { | |||
var listType = ctx.Query("listType") | |||
if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil { | |||
if isHandled, err := ai_task.HandleNewAITaskDelete(ctx.Cloudbrain.ID); isHandled { | |||
if err != nil { | |||
log.Error("DeleteJob(%s) failed:%v", ctx.Cloudbrain.JobName, err, ctx.Data["msgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
} | |||
} else if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil { | |||
log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
@@ -1450,89 +1456,91 @@ func GrampusNotebookShow(ctx *context.Context) { | |||
func GrampusTrainJobShow(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
var task *models.Cloudbrain | |||
task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
task.ContainerIp = "" | |||
task.User, _ = models.GetUserByID(task.UserID) | |||
if task.DeletedAt.IsZero() { //normal record | |||
result, err := grampus.GetJob(task.JobID) | |||
ctx.HTML(http.StatusOK, tplGrampusTrainJobShow) | |||
/* | |||
var task *models.Cloudbrain | |||
task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) | |||
if err != nil { | |||
log.Error("GetJob failed:" + err.Error()) | |||
log.Error("GetCloudbrainByJobID failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
if result != nil { | |||
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { | |||
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] | |||
task.ContainerIp = "" | |||
task.User, _ = models.GetUserByID(task.UserID) | |||
if task.DeletedAt.IsZero() { //normal record | |||
result, err := grampus.GetJob(task.JobID) | |||
if err != nil { | |||
log.Error("GetJob failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
oldStatus := task.Status | |||
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { | |||
task.Duration = result.JobInfo.RunSec | |||
if task.Duration < 0 { | |||
task.Duration = 0 | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) | |||
if result != nil { | |||
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { | |||
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] | |||
} | |||
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||
task.EndTime = task.StartTime.Add(task.Duration) | |||
oldStatus := task.Status | |||
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { | |||
task.Duration = result.JobInfo.RunSec | |||
if task.Duration < 0 { | |||
task.Duration = 0 | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) | |||
} | |||
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||
task.EndTime = task.StartTime.Add(task.Duration) | |||
} | |||
task.CorrectCreateUnix() | |||
if oldStatus != task.Status { | |||
notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||
} | |||
} | |||
task.CorrectCreateUnix() | |||
if oldStatus != task.Status { | |||
notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||
err = models.UpdateJob(task) | |||
if err != nil { | |||
log.Error("UpdateJob failed:" + err.Error()) | |||
} | |||
} | |||
err = models.UpdateJob(task) | |||
if err != nil { | |||
log.Error("UpdateJob failed:" + err.Error()) | |||
} | |||
} | |||
} | |||
if len(task.Parameters) > 0 { | |||
var parameters models.Parameters | |||
err := json.Unmarshal([]byte(task.Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) | |||
ctx.ServerError("system error", err) | |||
return | |||
} | |||
if len(task.Parameters) > 0 { | |||
var parameters models.Parameters | |||
err := json.Unmarshal([]byte(task.Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) | |||
ctx.ServerError("system error", err) | |||
return | |||
} | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
} | |||
task.Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
task.Parameters = "" | |||
} | |||
task.Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
task.Parameters = "" | |||
} | |||
} | |||
taskList := make([]*models.Cloudbrain, 0) | |||
taskList = append(taskList, task) | |||
prepareSpec4Show(ctx, task) | |||
taskList := make([]*models.Cloudbrain, 0) | |||
taskList = append(taskList, task) | |||
prepareSpec4Show(ctx, task) | |||
ctx.Data["version_list_task"] = taskList | |||
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task) | |||
ctx.Data["displayJobName"] = task.DisplayJobName | |||
ctx.Data["canReschedule"] = cloudbrain.CanDeleteJob(ctx, task) | |||
ctx.Data["version_list_task"] = taskList | |||
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task) | |||
ctx.Data["displayJobName"] = task.DisplayJobName | |||
ctx.Data["canReschedule"] = cloudbrain.CanDeleteJob(ctx, task) | |||
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) | |||
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) | |||
ctx.HTML(http.StatusOK, tplGrampusTrainJobShow) | |||
ctx.HTML(http.StatusOK, tplGrampusTrainJobShow) | |||
*/ | |||
} | |||
func GrampusDownloadLog(ctx *context.Context) { | |||
@@ -2,7 +2,6 @@ package repo | |||
import ( | |||
"archive/zip" | |||
ai_task "code.gitea.io/gitea/services/ai_task_service/task" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
@@ -16,6 +15,8 @@ import ( | |||
"time" | |||
"unicode/utf8" | |||
ai_task "code.gitea.io/gitea/services/ai_task_service/task" | |||
"code.gitea.io/gitea/services/cloudbrain/modelmanage" | |||
"code.gitea.io/gitea/services/lock" | |||
@@ -810,54 +811,57 @@ func NotebookDel(ctx *context.Context) { | |||
func TrainJobIndex(ctx *context.Context) { | |||
MustEnableModelArts(ctx) | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(200, tplModelArtsTrainJobIndex) | |||
/* | |||
repo := ctx.Repo.Repository | |||
page := ctx.QueryInt("page") | |||
if page <= 0 { | |||
page = 1 | |||
} | |||
listType := ctx.Query("listType") | |||
ctx.Data["ListType"] = listType | |||
if listType == models.AllResource { | |||
listType = "" | |||
} | |||
var jobTypes []string | |||
jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||
ListOptions: models.ListOptions{ | |||
Page: page, | |||
PageSize: setting.UI.IssuePagingNum, | |||
}, | |||
RepoID: repo.ID, | |||
JobTypeNot: false, | |||
JobTypes: jobTypes, | |||
IsLatestVersion: modelarts.IsLatestVersion, | |||
ComputeResource: listType, | |||
Type: models.TypeCloudBrainAll, | |||
}) | |||
if err != nil { | |||
ctx.ServerError("Cloudbrain", err) | |||
return | |||
} | |||
repo := ctx.Repo.Repository | |||
page := ctx.QueryInt("page") | |||
if page <= 0 { | |||
page = 1 | |||
} | |||
listType := ctx.Query("listType") | |||
ctx.Data["ListType"] = listType | |||
if listType == models.AllResource { | |||
listType = "" | |||
} | |||
var jobTypes []string | |||
jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||
ListOptions: models.ListOptions{ | |||
Page: page, | |||
PageSize: setting.UI.IssuePagingNum, | |||
}, | |||
RepoID: repo.ID, | |||
JobTypeNot: false, | |||
JobTypes: jobTypes, | |||
IsLatestVersion: modelarts.IsLatestVersion, | |||
ComputeResource: listType, | |||
Type: models.TypeCloudBrainAll, | |||
}) | |||
if err != nil { | |||
ctx.ServerError("Cloudbrain", err) | |||
return | |||
} | |||
for i, task := range tasks { | |||
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) | |||
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) | |||
} | |||
for i, task := range tasks { | |||
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) | |||
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) | |||
} | |||
pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) | |||
pager.SetDefaultParams(ctx) | |||
pager.AddParam(ctx, "listType", "ListType") | |||
ctx.Data["Page"] = pager | |||
pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) | |||
pager.SetDefaultParams(ctx) | |||
pager.AddParam(ctx, "listType", "ListType") | |||
ctx.Data["Page"] = pager | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.Data["Tasks"] = tasks | |||
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) | |||
ctx.Data["RepoIsEmpty"] = repo.IsEmpty | |||
ctx.HTML(200, tplModelArtsTrainJobIndex) | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.Data["Tasks"] = tasks | |||
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) | |||
ctx.Data["RepoIsEmpty"] = repo.IsEmpty | |||
ctx.HTML(200, tplModelArtsTrainJobIndex) | |||
*/ | |||
} | |||
func TrainJobNew(ctx *context.Context) { | |||
@@ -1967,93 +1971,119 @@ func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) err | |||
func TrainJobShow(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
var jobID = ctx.Params(":jobid") | |||
repo := ctx.Repo.Repository | |||
page := ctx.QueryInt("page") | |||
if page <= 0 { | |||
page = 1 | |||
} | |||
var jobTypes []string | |||
jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||
ListOptions: models.ListOptions{ | |||
Page: page, | |||
PageSize: setting.UI.IssuePagingNum, | |||
}, | |||
RepoID: repo.ID, | |||
Type: models.TypeCloudBrainTwo, | |||
JobTypes: jobTypes, | |||
JobID: jobID, | |||
}) | |||
if err != nil { | |||
log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
if len(VersionListTasks) == 0 { | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
//设置权限 | |||
canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) | |||
if err != nil { | |||
ctx.ServerError("canNewJob failed", err) | |||
return | |||
} | |||
ctx.Data["canNewJob"] = canNewJob | |||
datasetList := make([][]*models.DatasetDownload, 0) | |||
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式 | |||
for i, task := range VersionListTasks { | |||
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
/* | |||
var jobID = ctx.Params(":jobid") | |||
repo := ctx.Repo.Repository | |||
page := ctx.QueryInt("page") | |||
if page <= 0 { | |||
page = 1 | |||
} | |||
var jobTypes []string | |||
jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||
ListOptions: models.ListOptions{ | |||
Page: page, | |||
PageSize: setting.UI.IssuePagingNum, | |||
}, | |||
RepoID: repo.ID, | |||
Type: models.TypeCloudBrainTwo, | |||
JobTypes: jobTypes, | |||
JobID: jobID, | |||
}) | |||
var parameters models.Parameters | |||
if VersionListTasks[i].Parameters != "" { | |||
err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) | |||
} | |||
if err != nil { | |||
log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
} | |||
VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
VersionListTasks[i].Parameters = "" | |||
} | |||
datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false)) | |||
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) | |||
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) | |||
VersionListTasks[i].ContainerIp = "" | |||
//add spec | |||
s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID) | |||
if len(VersionListTasks) == 0 { | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
//设置权限 | |||
canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) | |||
if err != nil { | |||
log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error()) | |||
continue | |||
ctx.ServerError("canNewJob failed", err) | |||
return | |||
} | |||
VersionListTasks[i].Cloudbrain.Spec = s | |||
} | |||
ctx.Data["canNewJob"] = canNewJob | |||
datasetList := make([][]*models.DatasetDownload, 0) | |||
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式 | |||
for i, task := range VersionListTasks { | |||
pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) | |||
pager.SetDefaultParams(ctx) | |||
ctx.Data["Page"] = pager | |||
ctx.Data["jobID"] = jobID | |||
ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName | |||
ctx.Data["version_list_task"] = VersionListTasks | |||
ctx.Data["version_list_count"] = VersionListCount | |||
ctx.Data["datasetList"] = datasetList | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, &VersionListTasks[0].Cloudbrain) | |||
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
var parameters models.Parameters | |||
if VersionListTasks[i].Parameters != "" { | |||
err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) | |||
} | |||
} | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
} | |||
VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
VersionListTasks[i].Parameters = "" | |||
} | |||
datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false)) | |||
VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) | |||
VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) | |||
VersionListTasks[i].ContainerIp = "" | |||
//add spec | |||
s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID) | |||
if err != nil { | |||
log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error()) | |||
continue | |||
} | |||
VersionListTasks[i].Cloudbrain.Spec = s | |||
} | |||
pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) | |||
pager.SetDefaultParams(ctx) | |||
ctx.Data["Page"] = pager | |||
ctx.Data["jobID"] = jobID | |||
ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName | |||
ctx.Data["version_list_task"] = VersionListTasks | |||
ctx.Data["version_list_count"] = VersionListCount | |||
ctx.Data["datasetList"] = datasetList | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, &VersionListTasks[0].Cloudbrain) | |||
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
*/ | |||
} | |||
func TrainJobDel(ctx *context.Context) { | |||
var jobID = ctx.Params(":jobid") | |||
var listType = ctx.Query("listType") | |||
var id = ctx.QueryInt64("id") | |||
if id > 0 { | |||
task, _ := models.GetCloudbrainByCloudbrainID(id) | |||
if task != nil && task.IsNewAITask() { | |||
bizErr := ai_task.DelCloudbrain(task) | |||
if bizErr != nil { | |||
log.Error("DelCloudbrain(%s) failed:%v err=%v", task.JobName, bizErr) | |||
ctx.ServerError("DelCloudbrain failed", bizErr.ToError()) | |||
return | |||
} | |||
var isAdminPage = ctx.Query("isadminpage") | |||
var isHomePage = ctx.Query("ishomepage") | |||
if ctx.IsUserSiteAdmin() && isAdminPage == "true" { | |||
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") | |||
} else if isHomePage == "true" { | |||
ctx.Redirect(setting.AppSubURL + "/cloudbrains") | |||
} else { | |||
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) | |||
} | |||
return | |||
} | |||
} | |||
repo := ctx.Repo.Repository | |||
var jobTypes []string | |||
@@ -2359,7 +2389,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||
param = addModelUrlParam(param, form.PreTrainModelUrl, form.CkptName) | |||
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) | |||
datasUrlList, dataUrl, datasetNames, _, err := getDatasUrlListByUUIDS(uuid) | |||
if err != nil { | |||
inferenceJobErrorNewDataPrepare(ctx, form) | |||
ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) | |||
@@ -2373,12 +2403,10 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||
ctx.RenderWithErr("json error:"+err.Error(), tplModelArtsInferenceJobNew, &form) | |||
return | |||
} | |||
if isMultiDataset { | |||
param = append(param, models.Parameter{ | |||
Label: modelarts.MultiDataUrl, | |||
Value: string(jsondatas), | |||
}) | |||
} | |||
param = append(param, models.Parameter{ | |||
Label: modelarts.MultiDataUrl, | |||
Value: string(jsondatas), | |||
}) | |||
existDeviceTarget := false | |||
if len(params) != 0 { | |||
@@ -1,5 +1,7 @@ | |||
package response | |||
import "errors" | |||
type BizError struct { | |||
Code int | |||
DefaultMsg string | |||
@@ -24,6 +26,14 @@ func (e *BizError) WithParams(params ...interface{}) *BizError { | |||
return newErr | |||
} | |||
func (e *BizError) ToError() error { | |||
msg := e.TrCode | |||
if msg == "" { | |||
msg = e.DefaultMsg | |||
} | |||
return errors.New(msg) | |||
} | |||
func NewBizError(err error) *BizError { | |||
return &BizError{Code: RESPONSE_CODE_ERROR_DEFAULT, DefaultMsg: err.Error(), TrCode: err.Error()} | |||
} | |||
@@ -9,7 +9,7 @@ var BADGES_STILL_HAS_USERS = &BizError{Code: 1005, DefaultMsg: "Please delete us | |||
//common response | |||
var SYSTEM_ERROR = &BizError{Code: 9009, DefaultMsg: "System error.Please try again later", TrCode: "common_error.system_error"} | |||
var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, DefaultMsg: "insufficient permissions", TrCode: "common_error.insufficient_permission"} | |||
var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, DefaultMsg: "You do not have permission to perform this operation", TrCode: "common_error.insufficient_permission"} | |||
var PARAM_ERROR = &BizError{Code: 9001, DefaultMsg: "param error", TrCode: "common_error.param_error"} | |||
var WECHAT_NOT_BIND = &BizError{Code: 9002, DefaultMsg: "Please scan the code and bind to wechat first", TrCode: "common_error.wechat_not_bind"} | |||
@@ -21,16 +21,18 @@ var MULTI_TASK = &BizError{Code: 2004, DefaultMsg: "You have already a running o | |||
var JOB_NAME_ALREADY_USED = &BizError{Code: 2005, DefaultMsg: "The job name did already exist", TrCode: "ai_task.job_name_already_used"} | |||
var INSUFFICIENT_POINT_BALANCE = &BizError{Code: 2006, DefaultMsg: "Insufficient point balance", TrCode: "ai_task.insufficient_point_balance"} | |||
var DATASET_NOT_EXISTS = &BizError{Code: 2007, DefaultMsg: "The part of datasets in the task does not exist or has been deleted, please create a new debug job.", TrCode: "repo.debug.manage.dataset_not_exist"} | |||
var MODEL_NOT_EXISTS = &BizError{Code: 2008, DefaultMsg: "The model in the task does not exist or has been deleted, please create a new debug job.", TrCode: "repo.debug.manage.model_not_exist"} | |||
var RESULT_CLEARD = &BizError{Code: 2009, DefaultMsg: "The files of the task have been cleared, can not restart any more, please create a new debug task instead.", TrCode: "cloudbrain.result_cleared"} | |||
var MODEL_NOT_EXISTS = &BizError{Code: 2008, DefaultMsg: "The model in the task does not exist or has been deleted", TrCode: "ai_task.model_not_exist"} | |||
var RESULT_CLEARD = &BizError{Code: 2009, DefaultMsg: "The files of the task have been cleared, can not restart or retrain any more, please create a new task instead.", TrCode: "ai_task.result_cleared"} | |||
var CREATE_FAILED = &BizError{Code: 2010, DefaultMsg: "Create AI task failed", TrCode: "ai_task.create_failed"} | |||
var RESTART_FAILED = &BizError{Code: 2011, DefaultMsg: "Restart AI task failed", TrCode: "ai_task.restart_failed"} | |||
var STOP_FAILED = &BizError{Code: 2012, DefaultMsg: "Stop AI task failed", TrCode: "ai_task.stop_failed"} | |||
var DATASET_SIZE_OVER_LIMIT = &BizError{Code: 2013, DefaultMsg: "The size of dataset exceeds limitation", TrCode: "ai_task.dataset_size_over_limit"} | |||
var BOOT_FILE_MUST_BE_PYTHON = &BizError{Code: 2013, DefaultMsg: "The boot file must be a python file", TrCode: "ai_task.boot_file_must_python"} | |||
var BOOT_FILE_NOT_EXIST = &BizError{Code: 2014, DefaultMsg: "The boot file not exist", TrCode: "ai_task.boot_file_not_exist"} | |||
var BOOT_FILE_MUST_BE_PYTHON = &BizError{Code: 2015, DefaultMsg: "The boot file must be a python file", TrCode: "ai_task.boot_file_must_python"} | |||
var NO_NODE_RIGHR = &BizError{Code: 2016, DefaultMsg: "The boot file must be a python file", TrCode: "repo.modelarts.no_node_right"} | |||
var DATASET_SELECT_ERROR = &BizError{Code: 2017, DefaultMsg: "Dataset select error: the count exceed the limit or has same name", TrCode: "cloudbrain.error.dataset_select"} | |||
var PARTIAL_DATASETS_NOT_AVAILABLE = &BizError{Code: 2018, DefaultMsg: "There are non-existent or deleted files in the selected dataset file, please select again", TrCode: "cloudbrain.error.partial_datasets_not_available"} | |||
var LOAD_CODE_FAILED = &BizError{Code: 2019, DefaultMsg: "Fail to load code, please check if the right branch is selected.", TrCode: "cloudbrain.load_code_failed"} | |||
var BRANCH_NOT_EXISTS = &BizError{Code: 2020, DefaultMsg: "The branch does not exist", TrCode: "ai_task.branch_not_exists"} | |||
var MODEL_NUM_OVER_LIMIT = &BizError{Code: 2021, DefaultMsg: "The number of models exceeds the limit of 30", TrCode: "repo.debug.manage.model_num_over_limit"} | |||
var DATASET_NUMBER_OVER_LIMIT = &BizError{Code: 2022, DefaultMsg: "The dataset count exceed the limit", TrCode: "ai_task.dataset_number_over_limit"} |
@@ -12,6 +12,8 @@ import ( | |||
"text/template" | |||
"time" | |||
"code.gitea.io/gitea/routers/super_compute" | |||
"code.gitea.io/gitea/routers/tech" | |||
"code.gitea.io/gitea/routers/badge" | |||
@@ -1303,6 +1305,12 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainInferencForm{}), context.PointAccount(), repo.CloudBrainInferenceJobCreate) | |||
}) | |||
}, context.RepoRef()) | |||
m.Group("/supercompute", func() { | |||
m.Get("", reqRepoCloudBrainReader, super_compute.GetAPPList) | |||
m.Get("/job", reqRepoCloudBrainReader, super_compute.GetJobList) | |||
m.Get("/job/create", reqRepoCloudBrainWriter, super_compute.CreateUI) | |||
m.Get("/job/:id", reqRepoCloudBrainReader, super_compute.GetDetailUI) | |||
}) | |||
m.Group("/grampus", func() { | |||
m.Group("/notebook", func() { | |||
m.Group("/:id", func() { | |||
@@ -0,0 +1,33 @@ | |||
package super_compute | |||
import ( | |||
"code.gitea.io/gitea/modules/base" | |||
"code.gitea.io/gitea/modules/context" | |||
) | |||
const ( | |||
tplAPPList base.TplName = "repo/supercompute/index" | |||
tplJobList base.TplName = "repo/supercompute/list" | |||
tplCreateUI base.TplName = "repo/supercompute/create" | |||
tplDetail base.TplName = "repo/supercompute/detail" | |||
) | |||
func GetAPPList(ctx *context.Context) { | |||
ctx.Data["PageIsSuperCompute"] = true | |||
ctx.HTML(200, tplAPPList) | |||
} | |||
func GetJobList(ctx *context.Context) { | |||
ctx.Data["PageIsSuperCompute"] = true | |||
ctx.HTML(200, tplJobList) | |||
} | |||
func CreateUI(ctx *context.Context) { | |||
ctx.Data["PageIsSuperCompute"] = true | |||
ctx.HTML(200, tplCreateUI) | |||
} | |||
func GetDetailUI(ctx *context.Context) { | |||
ctx.Data["PageIsSuperCompute"] = true | |||
ctx.HTML(200, tplDetail) | |||
} |
@@ -1,19 +1,26 @@ | |||
package cluster | |||
import ( | |||
"errors" | |||
"fmt" | |||
"strings" | |||
"time" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/manager/client/grampus" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
model_grampus "code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/schedule" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
"errors" | |||
"fmt" | |||
"io/ioutil" | |||
"path" | |||
"strings" | |||
"time" | |||
) | |||
type C2NetClusterAdapter struct { | |||
@@ -51,7 +58,7 @@ func (c C2NetClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequ | |||
return convertGrampus2NoteBookRes(jobResult), nil | |||
} | |||
func (c C2NetClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
func (c C2NetClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
processType := req.ComputeSource.FullName | |||
images, err := grampus.GetImages(processType, string(req.JobType)) | |||
if err != nil { | |||
@@ -68,6 +75,10 @@ func (c C2NetClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.Cluster | |||
return r, false, nil | |||
} | |||
func (c C2NetClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
return c.GetNotebookImages(req) | |||
} | |||
func ConvertGrampusImageToStandard(image models.GrampusImage) entity.ClusterImage { | |||
return entity.ClusterImage{ | |||
ImageId: image.ID, | |||
@@ -88,8 +99,6 @@ func convertNoteBookReq2Grampus(req entity.CreateNoteBookTaskRequest) models.Cre | |||
if models.DCU == req.Tasks[0].Spec.ComputeResource { | |||
command = "cp -r /code /tmp;cp -r /dataset /tmp;cp -r /pretrainmodel /tmp;" | |||
} | |||
//command := fmt.Sprintf(commandGpuDebug, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, codePath) | |||
// command := "bash && cd /code && unzip master.zip && cd test-export-data && uvicorn train:app --host 0.0.0.0 --port $OCTOPUS_NOTEBOOK_PORT" | |||
if models.NPU == req.Tasks[0].Spec.ComputeResource { | |||
command = "" | |||
} | |||
@@ -164,12 +173,12 @@ func getCopyCmd(datasetName, repoName, bootfilepath string) string { | |||
func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.GrampusNotebookTask { | |||
code := models.GrampusDataset{} | |||
codeArray := convertContainerArray2Grampus(t.Code) | |||
codeArray := convertContainerArray2GrampusArray(t.Code) | |||
if codeArray != nil && len(codeArray) > 0 { | |||
code = codeArray[0] | |||
} | |||
output := models.GrampusDataset{} | |||
outputArray := convertContainerArray2Grampus(t.OutPut) | |||
outputArray := convertContainerArray2GrampusArray(t.OutPut) | |||
if outputArray != nil && len(outputArray) > 0 { | |||
output = outputArray[0] | |||
} | |||
@@ -178,9 +187,9 @@ func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.G | |||
ResourceSpecId: t.Spec.SourceSpecId, | |||
ImageId: t.ImageId, | |||
ImageUrl: t.ImageUrl, | |||
Datasets: convertContainerArray2Grampus(t.Datasets), | |||
OutPut: output, | |||
Datasets: convertContainerArray2GrampusArray(t.Datasets), | |||
Code: code, | |||
OutPut: output, | |||
AutoStopDuration: t.AutoStopDuration, | |||
Capacity: t.Capacity, | |||
Command: command, | |||
@@ -188,7 +197,7 @@ func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.G | |||
} | |||
} | |||
func convertContainerArray2Grampus(containerDatas []entity.ContainerData) []models.GrampusDataset { | |||
func convertContainerArray2GrampusArray(containerDatas []entity.ContainerData) []models.GrampusDataset { | |||
res := make([]models.GrampusDataset, len(containerDatas)) | |||
for i := 0; i < len(containerDatas); i++ { | |||
d := containerDatas[i] | |||
@@ -197,6 +206,14 @@ func convertContainerArray2Grampus(containerDatas []entity.ContainerData) []mode | |||
return res | |||
} | |||
func convertContainerArray2Grampus(containerDatas []entity.ContainerData) models.GrampusDataset { | |||
res := models.GrampusDataset{} | |||
if containerDatas != nil && len(containerDatas) > 0 { | |||
res = convertContainer2Grampus(containerDatas[0]) | |||
} | |||
return res | |||
} | |||
func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset { | |||
return models.GrampusDataset{ | |||
Name: d.Name, | |||
@@ -206,6 +223,7 @@ func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset { | |||
ContainerPath: d.ContainerPath, | |||
ReadOnly: d.ReadOnly, | |||
GetBackEndpoint: d.GetBackEndpoint, | |||
Size: d.Size, | |||
} | |||
} | |||
@@ -248,24 +266,24 @@ func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartRespo | |||
} | |||
} | |||
func (c C2NetClusterAdapter) DeleteNoteBook(string) error { | |||
func (c C2NetClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error { | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) StopNoteBook(jobId string) error { | |||
_, err := grampus.StopJob(jobId, string(models.JobTypeDebug)) | |||
func (c C2NetClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error { | |||
_, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug)) | |||
if err != nil { | |||
log.Error("StopNoteBook(%s) failed:%v", jobId, err) | |||
log.Error("StopNoteBook(%s) failed:%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
if jobId == "" { | |||
func (c C2NetClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
if opts.JobID == "" { | |||
return nil, errors.New("jobID is empty") | |||
} | |||
result, err := grampus.GetNotebookJob(jobId) | |||
result, err := grampus.GetNotebookJob(opts.JobID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -297,7 +315,7 @@ func (c C2NetClusterAdapter) GetNoteBookLog(jobId string) (*entity.ClusterLog, e | |||
} | |||
func (c C2NetClusterAdapter) GetNoteBookUrl(jobId string) (string, error) { | |||
res, err := c.QueryNoteBook(jobId) | |||
res, err := c.QueryNoteBook(entity.JobIdAndVersionId{JobID: jobId}) | |||
if err != nil { | |||
return "", err | |||
} | |||
@@ -367,67 +385,148 @@ func convertTrainReq2Grampus(req entity.CreateTrainTaskRequest) models.CreateGra | |||
} | |||
func generateGrampusTrainCommand(req entity.CreateTrainTaskRequest) string { | |||
var command string | |||
t := req.Tasks[0] | |||
containerConfig := req.TaskConfig | |||
computeResource := t.Spec.ComputeResource | |||
var CommandPrepareScriptNpu = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;" | |||
//todo 现状:NPU和GPU的目录不一致,原因? | |||
var workDir = "/tmp/" | |||
if computeResource == models.NPU { | |||
workDir = "/cache/" | |||
} | |||
command += "pwd;cd " + workDir + ";" + CommandPrepareScriptNpu | |||
if computeResource == models.GPU || computeResource == models.GCU { | |||
command += "cd " + workDir + "code;echo \"start unzip code\";unzip -q master.zip;" | |||
command += "cd " + workDir + "dataset;echo \"start to unzip dataset\";" | |||
var unZipDatasetCommand string | |||
for _, d := range t.Datasets { | |||
if strings.HasSuffix(d.Name, ".tar.gz") { | |||
unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + d.Name + "';" | |||
} else { | |||
unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + d.Name + "' -d './" + strings.TrimSuffix(d.Name, ".zip") + "';" | |||
} | |||
unZipDatasetCommand += "rm -f '" + d.Name + "';" | |||
var codePath = containerConfig.GetContainerPath(entity.ContainerCode) | |||
var modelPath = containerConfig.GetContainerPath(entity.ContainerPreTrainModel) | |||
var datasetPath = containerConfig.GetContainerPath(entity.ContainerDataset) | |||
var outputPath = containerConfig.GetContainerPath(entity.ContainerOutPutPath) | |||
var modelFilePath = "" | |||
if t.PreTrainModel != nil && len(t.PreTrainModel) > 0 { | |||
modelFilePath = t.PreTrainModel[0].ContainerPath | |||
} | |||
builder := &entity.CommandBuilder{} | |||
builder. | |||
//mkdir dirs | |||
Add(buildMkdirCommand(codePath, modelPath, datasetPath, outputPath)). | |||
//unzip code | |||
Add(buildUnzipCodeCommand(codePath, t.Code[0].ContainerPath, computeResource)). | |||
//unzip dataset | |||
Add(buildUnzipDatasetCommand(t.Datasets, datasetPath, computeResource)). | |||
//export | |||
Add(buildExportCommand(req.Name, computeResource)). | |||
//exec code | |||
Add(buildExecCodeCommand(path.Join(codePath, strings.ToLower(t.RepoName)), modelFilePath, t.BootFile, computeResource, req.Name, t.Params)) | |||
return builder.ToString() | |||
} | |||
func buildMkdirCommand(dirs ...string) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
for _, dir := range dirs { | |||
builder.Next(entity.NewCommand("mkdir", "-p", dir)) | |||
} | |||
return builder | |||
} | |||
func buildUnzipCodeCommand(codeConfigPath, codeFilePath, computeSource string) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
if computeSource == models.NPU { | |||
return builder | |||
} | |||
builder. | |||
Next(entity.NewCommand("echo", "'start to unzip code'")). | |||
Next(entity.NewCommand("cd", codeConfigPath)). | |||
Next(entity.NewCommand("unzip", "-q", codeFilePath)). | |||
Next(entity.NewCommand("echo", "'unzip code finished'")). | |||
Next(entity.NewCommand("ls", "-l")). | |||
Next(entity.NewCommand("ls", "-l", "mnist_pytorchexample_gpu")) | |||
return builder | |||
} | |||
func buildUnzipDatasetCommand(datasets []entity.ContainerData, datasetPath, computeSource string) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
if computeSource == models.NPU { | |||
return builder | |||
} | |||
if len(datasets) == 0 { | |||
return nil | |||
} | |||
builder.Next(entity.NewCommand("cd", datasetPath)). | |||
Next(entity.NewCommand("echo", "'start to unzip datasets'")) | |||
//单数据集 | |||
if len(datasets) == 1 { | |||
if strings.HasSuffix(datasets[0].Name, ".tar.gz") { | |||
builder.Next(entity.NewCommand("tar", "--strip-components=1", "-zxvf", "'"+datasets[0].Name+"'")) | |||
} else { | |||
builder.Next(entity.NewCommand("unzip", "-q", "'"+datasets[0].Name+"'")) | |||
} | |||
builder.Next(entity.NewCommand("ls", "-l")) | |||
builder.Next(entity.NewCommand("echo", "'unzip datasets finished'")) | |||
return builder | |||
} | |||
//多数据集 | |||
for i := 0; i < len(datasets); i++ { | |||
name := datasets[i].Name | |||
if strings.HasSuffix(name, ".tar.gz") { | |||
builder.Next(entity.NewCommand("tar", "-zxvf", name)) | |||
} else { | |||
builder.Next(entity.NewCommand("unzip", "-q", "'"+name+"'", "-d", "'./"+strings.TrimSuffix(name, ".zip")+"'")) | |||
} | |||
command += "echo \"unzip finished;start to exec code;\";" | |||
} | |||
builder.Next(entity.NewCommand("ls", "-l")) | |||
builder.Next(entity.NewCommand("echo", "'unzip datasets finished'")) | |||
return builder | |||
} | |||
//exec code | |||
var parameters = t.Params.Parameter | |||
var paramCode string | |||
func buildExportCommand(jobName, computeResource string) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
if len(parameters) > 0 { | |||
for _, parameter := range parameters { | |||
//todo value需要单引号,再统一一下 | |||
paramCode += " --" + parameter.Label + "=" + parameter.Value | |||
} | |||
if computeResource == models.NPU { | |||
outputRemotePath := setting.CodePathPrefix + jobName + modelarts.OutputPath | |||
builder.Next(entity.NewCommand("export", "bucket="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath)) | |||
} else { | |||
outputRemotePath := setting.CBCodePathPrefix + jobName + cloudbrain.ModelMountPath + "/" | |||
builder.Next(entity.NewCommand("export", "env="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath)) | |||
} | |||
return builder | |||
} | |||
func buildExecCodeCommand(codeDirPath, modelFilePath, bootFile, computeResource, jobName string, params models.Parameters) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
builder.Next(entity.NewCommand("echo", "'start to exec code'")) | |||
var commandCode string | |||
modelRemoteObsUrl := getNpuModelRemoteObsUrl(t.Name) | |||
if t.Spec.ComputeResource == models.NPU { | |||
paramCode += " --model_url=" + modelRemoteObsUrl | |||
commandCode = "source /home/ma-user/.bashrc;python /home/ma-user/davinci/train/davincirun.py python /home/ma-user/openi.py " + paramCode + ";" | |||
} else if t.Spec.ComputeResource == models.GPU || t.Spec.ComputeResource == models.GCU { | |||
if len(t.Models) > 0 { | |||
paramCode += " --ckpt_url" + "=" + workDir + "pretrainmodel/" + t.Models[0].Name | |||
var paramCode string | |||
for _, param := range params.Parameter { | |||
paramCode += " --'" + param.Label + "'='" + param.Value + "'" | |||
} | |||
if computeResource == models.NPU { | |||
modelRemoteObsUrl := getNpuModelRemoteObsUrl(jobName) | |||
builder.Next(entity.NewCommand("source", "/home/ma-user/.bashrc")). | |||
Next(entity.NewCommand("python", "/home/ma-user/davinci/train/davincirun.py", "python", "/home/ma-user/openi.py", paramCode, "--model_url="+modelRemoteObsUrl)) | |||
} else if computeResource == models.GCU { | |||
builder.Next(entity.NewCommand("cd", codeDirPath)) | |||
if modelFilePath != "" { | |||
builder.Next(entity.NewCommand("python3", bootFile, paramCode, "--ckpt_url='"+modelFilePath+"'")) | |||
} else { | |||
builder.Next(entity.NewCommand("python3", bootFile, paramCode)) | |||
} | |||
} else { | |||
builder.Next(entity.NewCommand("cd", codeDirPath)) | |||
if modelFilePath != "" { | |||
builder.Next(entity.NewCommand("python", bootFile, paramCode, "--ckpt_url='"+modelFilePath+"'")) | |||
} else { | |||
builder.Next(entity.NewCommand("python", bootFile, paramCode)) | |||
} | |||
commandCode = "cd " + workDir + "code/" + strings.ToLower(t.Code.Name) + ";python " + t.BootFile + paramCode + ";" | |||
} | |||
command += commandCode | |||
builder.Next(entity.NewCommand("result=$?")) | |||
builder.Next(entity.NewCommand("bash", "-c", "\"[[ $result -eq 0 ]] && exit 0 || exit -1\"")) | |||
return builder | |||
} | |||
//get exec result | |||
commandGetRes := "result=$?;" | |||
command += commandGetRes | |||
func buildParamCommand(outputRemotePath, computeResource string) *entity.CommandBuilder { | |||
builder := &entity.CommandBuilder{} | |||
builder.Next(entity.NewCommand("echo", "'start to exec code'")) | |||
//check exec result | |||
commandCheckRes := "bash -c \"[[ $result -eq 0 ]] && exit 0 || exit -1\"" | |||
command += commandCheckRes | |||
if computeResource == models.NPU { | |||
builder.Next(entity.NewCommand("export", "bucket="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath)) | |||
} else { | |||
builder.Next(entity.NewCommand("export", "env="+setting.Grampus.Env, "&&", "export", "remote_path="+outputRemotePath)) | |||
} | |||
return command | |||
return builder | |||
} | |||
var BucketRemote = "grampus" | |||
@@ -443,18 +542,19 @@ func getNpuModelObjectKey(jobName string) string { | |||
func convertTrainTask2Grampus(t entity.TrainTask, command string) models.GrampusTasks { | |||
return models.GrampusTasks{ | |||
Name: t.Name, | |||
ResourceSpecId: t.ResourceSpecId, | |||
ImageId: t.ImageId, | |||
ImageUrl: t.ImageUrl, | |||
Datasets: convertContainerArray2Grampus(t.Datasets), | |||
Code: convertContainer2Grampus(t.Code), | |||
Command: command, | |||
CenterID: t.CenterID, | |||
ReplicaNum: 1, | |||
Models: convertContainerArray2Grampus(t.Models), | |||
BootFile: t.BootFile, | |||
OutPut: convertContainer2Grampus(t.OutPut), | |||
Name: t.Name, | |||
ResourceSpecId: t.ResourceSpecId, | |||
ImageId: t.ImageId, | |||
ImageUrl: t.ImageUrl, | |||
Datasets: convertContainerArray2GrampusArray(t.Datasets), | |||
Code: convertContainerArray2Grampus(t.Code), | |||
Command: command, | |||
CenterID: t.CenterID, | |||
ReplicaNum: 1, | |||
Models: convertContainerArray2GrampusArray(t.PreTrainModel), | |||
BootFile: t.BootFile, | |||
OutPut: convertContainerArray2Grampus(t.OutPut), | |||
WorkServerNumber: t.WorkServerNumber, | |||
} | |||
} | |||
@@ -474,19 +574,270 @@ func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.Creat | |||
} | |||
} | |||
func (c C2NetClusterAdapter) DeleteTrainJob(string) error { | |||
func (c C2NetClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error { | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) StopTrainJob(string) error { | |||
func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { | |||
_, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug)) | |||
if err != nil { | |||
log.Error("StopNoteBook(%s) failed:%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
func (c C2NetClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
if opts.JobID == "" { | |||
return nil, errors.New("jobID is empty") | |||
} | |||
result, err := grampus.GetJob(opts.JobID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if result == nil { | |||
return nil, nil | |||
} | |||
return convertGrampusTrainJobResponse(result.JobInfo), nil | |||
} | |||
func (c C2NetClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
func (c C2NetClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
res, err := grampus.GetJobListByJobName(jobName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
result := make([]*entity.QueryTaskResponse, 0) | |||
if res != nil { | |||
for i := 0; i < len(res.JobInfos); i++ { | |||
if res.JobInfos[i].Name == jobName { | |||
result = append(result, entity.ConvertGrampusTrainResponse(res.JobInfos[i])) | |||
} | |||
} | |||
} | |||
return result, nil | |||
} | |||
func (c C2NetClusterAdapter) GetTrainLog(jobId string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
func convertGrampusTrainJobResponse(job models.GrampusJobInfo) *entity.QueryTaskResponse { | |||
if len(job.Tasks) == 0 { | |||
return nil | |||
} | |||
task := job.Tasks[0] | |||
centerId := "" | |||
if len(task.CenterID) > 0 { | |||
centerId = task.CenterID[0] | |||
} | |||
centerName := "" | |||
if len(task.CenterName) > 0 { | |||
centerName = task.CenterName[0] | |||
} | |||
return &entity.QueryTaskResponse{ | |||
StartedAt: timeutil.TimeStamp(job.StartedAt), | |||
CompletedAt: timeutil.TimeStamp(job.CompletedAt), | |||
Status: job.Status, | |||
CenterId: centerId, | |||
CenterName: centerName, | |||
JobId: job.JobID, | |||
} | |||
} | |||
func (c C2NetClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) { | |||
exitDiagnostics := getGrampusExitDiagnostics(opts.JobId) | |||
var content string | |||
var err error | |||
if opts.WorkServerNum > 1 { | |||
if opts.WorkServerNum < 1 || opts.NodeId > opts.WorkServerNum-1 { | |||
return nil, errors.New("query parameter is wrong") | |||
} | |||
content, err = grampus.GetTrainJobLog(opts.JobId, opts.NodeId) | |||
} else { | |||
content, err = grampus.GetTrainJobLog(opts.JobId) | |||
} | |||
if err != nil { | |||
log.Error("GetLog err.opts=%+v,err=%v", opts, err) | |||
content = "" | |||
} | |||
return &entity.ClusterLog{ | |||
Content: content + "\n" + exitDiagnostics, | |||
}, nil | |||
} | |||
func getGrampusExitDiagnostics(jobId string) string { | |||
result, err := grampus.GetJob(jobId) | |||
if err != nil { | |||
log.Error("GetJob(%s) failed:%v", jobId, err) | |||
return "" | |||
} | |||
if result != nil { | |||
return result.ExitDiagnostics | |||
} | |||
return "" | |||
} | |||
func (c C2NetClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
res, err := c.GetLog(entity.ClusterLogOpts{ | |||
JobId: opts.JobId, | |||
NodeId: opts.NodeId, | |||
WorkServerNum: opts.WorkServerNum, | |||
}) | |||
if err != nil { | |||
log.Error("error occurs when attempting to get log content.opts=%+v err=%v", opts, err) | |||
return nil, err | |||
} | |||
fileName := opts.JobName + "-log.txt" | |||
if opts.WorkServerNum > 1 { | |||
fileName = opts.JobName + "-" + fmt.Sprint(opts.NodeId) + "-log.txt" | |||
} | |||
return &entity.FileDownloadInfo{ | |||
Readers: []entity.FileReader{{Reader: ioutil.NopCloser(strings.NewReader(res.Content))}}, | |||
ResultType: entity.FileTypeTXT, | |||
ResultFileName: fileName, | |||
}, nil | |||
} | |||
func (c C2NetClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
url, err := helper.GetSignedDownloadUrl(opts.Path) | |||
if err != nil { | |||
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
return &entity.FileDownloadInfo{ | |||
DownloadUrl: url, | |||
}, nil | |||
} | |||
func (c C2NetClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
return GetAllOutputDownloadInfo(opts) | |||
} | |||
func (c C2NetClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) { | |||
workServerNum := opts.WorkServerNum | |||
if opts.WorkServerNum < 1 { | |||
workServerNum = 1 | |||
} | |||
res := make([]entity.AITaskNodeInfo, workServerNum) | |||
for i := 0; i < workServerNum; i++ { | |||
res[i] = entity.AITaskNodeInfo{ | |||
ID: i, | |||
} | |||
} | |||
return res, nil | |||
} | |||
func (c C2NetClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) { | |||
var err error | |||
startTime := opts.StartTime | |||
endTime := opts.EndTime | |||
nodeId := opts.NodeId | |||
jobId := opts.JobId | |||
if opts.ComputeSource == models.NPU { | |||
startTime = 0 | |||
endTime = 0 | |||
} else { | |||
if startTime == 0 { | |||
startTime = time.Now().Unix() - 30*60 | |||
} | |||
if endTime == 0 { | |||
endTime = time.Now().Unix() | |||
} | |||
} | |||
var result models.NewModelArtsMetricStatisticResult | |||
if opts.WorkServerNumber <= 1 { | |||
result, err = grampus.GetGrampusMetrics(jobId, startTime, endTime) | |||
} else { | |||
if nodeId > opts.WorkServerNumber-1 { | |||
return nil, response.PARAM_ERROR.ToError() | |||
} | |||
result, err = grampus.GetGrampusMetrics(opts.JobId, opts.StartTime, opts.EndTime, nodeId) | |||
} | |||
if err != nil { | |||
log.Error("GetGrampusMetrics error. opts=%+v err= %v", opts, err) | |||
return nil, err | |||
} | |||
return transferGrampusMetrics2Standard(result), nil | |||
} | |||
func transferGrampusMetrics2Standard(result models.NewModelArtsMetricStatisticResult) *entity.ResourceUsage { | |||
m := make([]entity.MetricsInfo, 0) | |||
for i := 0; i < len(result.MetricsInfo); i++ { | |||
m = append(m, entity.MetricsInfo{ | |||
Name: result.MetricsInfo[i].Metric, | |||
Value: result.MetricsInfo[i].Value, | |||
}) | |||
} | |||
return &entity.ResourceUsage{ | |||
Interval: 0, | |||
MetricsInfo: m, | |||
} | |||
} | |||
func (c C2NetClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
if jobId == "" { | |||
log.Error("jobid is empty") | |||
return nil, errors.New("jobid is empty") | |||
} | |||
jobResult, err := grampus.GetTrainJobEvents(jobId) | |||
if err != nil { | |||
log.Error("GetTrainJobEvents failed:%v", err) | |||
return nil, err | |||
} | |||
r := parseC2NetEventsToOperationProfile(jobResult.JobEvents) | |||
getJobResult, err := grampus.GetJob(jobId) | |||
if err == nil && getJobResult != nil && getJobResult.ExitDiagnostics != "" { | |||
r.Events = append(r.Events, entity.ProfileEvent{ | |||
Message: getJobResult.ExitDiagnostics, | |||
Reason: "Exit", | |||
}) | |||
} | |||
return r, nil | |||
} | |||
func (c C2NetClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) { | |||
status, err := schedule.GetModelScheduleStatus(opts.JobId) | |||
if err != nil { | |||
log.Error("GetModelScheduleStatus(%s) failed:%v", opts.JobId, err) | |||
return nil, err | |||
} | |||
if status != models.ModelMigrateSuccess { | |||
return &entity.ClusterAITaskOutput{ | |||
Status: status, | |||
Path: opts.ParentDir, | |||
FileList: []storage.FileInfo{}, | |||
}, nil | |||
} | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.ClusterAITaskOutput{ | |||
Status: status, | |||
Path: opts.ParentDir, | |||
FileList: fileList, | |||
}, nil | |||
} | |||
func (c C2NetClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) { | |||
status, err := schedule.GetModelScheduleStatus(opts.JobId) | |||
if err != nil { | |||
log.Error("GetModelScheduleStatus(%s) failed:%v", opts.JobId, err) | |||
return nil, err | |||
} | |||
if status != models.ModelMigrateSuccess { | |||
return &entity.AllAITaskOutput{FileList: []storage.FileInfo{}}, nil | |||
} | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.AllAITaskOutput{FileList: fileList}, nil | |||
} |
@@ -2,14 +2,18 @@ package cluster | |||
import "C" | |||
import ( | |||
"encoding/json" | |||
"errors" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"io/ioutil" | |||
"path" | |||
"strings" | |||
) | |||
type CloudbrainOneClusterAdapter struct { | |||
@@ -33,10 +37,14 @@ func (c CloudbrainOneClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
func (c CloudbrainOneClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
return nil, true, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
return c.GetNotebookImages(req) | |||
} | |||
var SubTaskName = "task1" | |||
func convertNoteBookReq2CloudbrainOne(req entity.CreateNoteBookTaskRequest) models.CreateJobParams { | |||
@@ -100,25 +108,25 @@ func (c CloudbrainOneClusterAdapter) RestartNoteBook(string) (*entity.RestartNot | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) DeleteNoteBook(string) error { | |||
func (c CloudbrainOneClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error { | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) StopNoteBook(jobId string) error { | |||
err := cloudbrain.StopJob(jobId) | |||
func (c CloudbrainOneClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error { | |||
err := cloudbrain.StopJob(opts.JobID) | |||
if err != nil { | |||
log.Error("StopNoteBook(%s) failed:%v", jobId, err) | |||
log.Error("StopNoteBook(%s) failed:%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
if jobId == "" { | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
if opts.JobID == "" { | |||
log.Error("jobid is empty") | |||
return nil, errors.New("jobid is empty") | |||
} | |||
jobResult, err := cloudbrain.GetJob(jobId) | |||
jobResult, err := cloudbrain.GetJob(opts.JobID) | |||
if err != nil { | |||
log.Error("QueryNoteBook failed:%v", err) | |||
return nil, err | |||
@@ -178,7 +186,7 @@ func (c CloudbrainOneClusterAdapter) GetNoteBookOperationProfile(jobId string) ( | |||
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
ExitDiagnostics := taskRes.TaskStatuses[0].ExitDiagnostics | |||
return parseDiagnosticsToOperationProfile(result.JobStatus.AppExitDiagnostics,ExitDiagnostics), nil | |||
return parseDiagnosticsToOperationProfile(result.JobStatus.AppExitDiagnostics, ExitDiagnostics), nil | |||
} | |||
func parseDiagnosticsToOperationProfile(appExitDiagnostics string, exitDiagnostics string) *entity.OperationProfile { | |||
@@ -216,30 +224,318 @@ func parseDiagnosticsToOperationProfile(appExitDiagnostics string, exitDiagnosti | |||
Action: e.Action, | |||
}) | |||
} | |||
if exitDiagnostics != ""{ | |||
if exitDiagnostics != "" { | |||
events = append(events, entity.ProfileEvent{ | |||
Message: exitDiagnostics, | |||
Reason: "Error", | |||
Reason: "Error", | |||
}) | |||
} | |||
return &entity.OperationProfile{Events: events} | |||
} | |||
func (c CloudbrainOneClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
func (c CloudbrainOneClusterAdapter) CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
jobResult, err := cloudbrain.CreateJob(req.Name, convertTrainJobReq2CloudbrainOne(req)) | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
return nil, err | |||
} | |||
return convertCloudbrainOne2TrainJobRes(jobResult), nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) DeleteTrainJob(string) error { | |||
func convertTrainJobReq2CloudbrainOne(req entity.CreateTrainTaskRequest) models.CreateJobParams { | |||
var command = getTrainJobCommand(req) | |||
t := req.Tasks[0] | |||
return models.CreateJobParams{ | |||
JobName: t.Name, | |||
RetryCount: 1, | |||
GpuType: t.Spec.QueueCode, | |||
Image: t.ImageUrl, | |||
TaskRoles: []models.TaskRole{ | |||
{ | |||
Name: SubTaskName, | |||
TaskNumber: 1, | |||
MinSucceededTaskCount: 1, | |||
MinFailedTaskCount: 1, | |||
CPUNumber: t.Spec.CpuCores, | |||
GPUNumber: t.Spec.AccCardsNum, | |||
MemoryMB: int(t.Spec.MemGiB * 1024), | |||
ShmMB: int(t.Spec.ShareMemGiB * 1024), | |||
Command: command, | |||
NeedIBDevice: false, | |||
IsMainRole: false, | |||
UseNNI: false, | |||
}, | |||
}, | |||
Volumes: convertContainerDataArray2Volume(t.Code, t.Datasets, t.PreTrainModel, t.OutPut), | |||
} | |||
} | |||
func getTrainJobCommand(req entity.CreateTrainTaskRequest) string { | |||
form := req.Tasks[0] | |||
var command string | |||
bootFile := strings.TrimSpace(form.BootFile) | |||
params := form.Params | |||
var param string | |||
if params.Parameter != nil && len(params.Parameter) != 0 { | |||
for _, parameter := range params.Parameter { | |||
param += " --'" + parameter.Label + "'='" + parameter.Value + "'" | |||
} | |||
} | |||
//启智GPU训练暂未支持多模型,此处先视为只会有一个模型文件 | |||
if form.PreTrainModel != nil && len(form.PreTrainModel) > 0 { | |||
param += " --ckpt_url" + "=" + "'/pretrainmodel/" + form.PreTrainModel[0].Name + "'" | |||
} | |||
logPath := cloudbrain.ModelMountPath | |||
if form.LogPath != nil && len(form.LogPath) > 0 { | |||
logPath = form.LogPath[0].ContainerPath | |||
} | |||
command += "python -u /code/" + bootFile + param + " > " + logPath + "/" + req.DisplayJobName + "-" + cloudbrain.LogFile | |||
return command | |||
} | |||
func convertCloudbrainOne2TrainJobRes(res *models.CreateJobResult) *entity.CreateTrainTaskResponse { | |||
playload := res.Payload | |||
return &entity.CreateTrainTaskResponse{ | |||
JobID: playload["jobId"].(string), | |||
Status: string(models.JobWaiting), | |||
} | |||
} | |||
func (c CloudbrainOneClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error { | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) StopTrainJob(string) error { | |||
func (c CloudbrainOneClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { | |||
err := cloudbrain.StopJob(opts.JobID) | |||
if err != nil { | |||
log.Error("StopNoteBook(%s) failed:%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
func (c CloudbrainOneClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
return c.QueryNoteBook(opts) | |||
} | |||
func (c CloudbrainOneClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
func (c CloudbrainOneClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
jobResult, err := cloudbrain.GetJobListByName(jobName) | |||
if err != nil { | |||
log.Error("GetJobListByName failed:%v", err) | |||
return nil, err | |||
} | |||
result, err := models.ConvertToJobListResultPayload(jobResult.Payload) | |||
if err != nil { | |||
log.Error("ConvertToJobListResultPayload failed:%v", err) | |||
return nil, err | |||
} | |||
r := make([]*entity.QueryTaskResponse, 0) | |||
for i := 0; i < len(result.Jobs); i++ { | |||
if result.Jobs[i].Name == jobName { | |||
r = append(r, entity.ConvertCloudbrainOneQueryNotebookByNameResponse(result.Jobs[i])) | |||
} | |||
} | |||
return r, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) { | |||
if opts.Lines <= 0 || opts.ObjectKeyPrefix == "" { | |||
return nil, nil | |||
} | |||
//获取任务退出信息 | |||
existStr := getCloudbrainOneExitDiagnostics(opts.JobId) | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
//查找日志文件 | |||
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, "log.txt") | |||
if len(files) == 0 { | |||
//此时未找符合条件的文件 | |||
startLine, endLine, lines := handleOverLines(opts) | |||
return &entity.ClusterLog{ | |||
Content: existStr, | |||
StartLine: fmt.Sprint(startLine), | |||
EndLine: fmt.Sprint(endLine), | |||
Lines: lines, | |||
}, nil | |||
} | |||
//默认选择第一个文件 | |||
file := files[0] | |||
//计算开始行和结束行 | |||
startLine, endLine := findStartAndEnd(opts, file.RelativePath, helper) | |||
//获取日志内容 | |||
result, realEndLine, contentLines := getLogInStorage(startLine, endLine, helper, file.RelativePath) | |||
//处理到达顶部或者底部时的情况 | |||
if contentLines == 0 { | |||
startLine, realEndLine, contentLines = handleOverLines(opts) | |||
} | |||
return &entity.ClusterLog{ | |||
Content: result, | |||
StartLine: fmt.Sprint(startLine), | |||
EndLine: fmt.Sprint(realEndLine), | |||
Lines: contentLines, | |||
}, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
//获取任务退出信息 | |||
existStr := getCloudbrainOneExitDiagnostics(opts.JobId) | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
//查找日志文件 | |||
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, "log.txt") | |||
if len(files) == 0 { | |||
//此时未找符合条件的文件 | |||
if existStr != "" { | |||
return &entity.FileDownloadInfo{ | |||
ResultType: entity.FileTypeTXT, | |||
ResultFileName: "exit.log.txt", | |||
Readers: []entity.FileReader{{ | |||
Reader: ioutil.NopCloser(strings.NewReader(existStr)), | |||
}}, | |||
}, nil | |||
} | |||
return nil, nil | |||
} | |||
//默认选择第一个文件 | |||
file := files[0] | |||
//获取日志reader | |||
reader, err := helper.OpenFile(file.RelativePath) | |||
if err != nil { | |||
log.Error("GetLogDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
return &entity.FileDownloadInfo{ | |||
ResultType: entity.FileTypeTXT, | |||
ResultFileName: file.FileName, | |||
Readers: []entity.FileReader{{ | |||
Reader: reader, | |||
}}, | |||
}, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
url, err := helper.GetSignedDownloadUrl(opts.Path) | |||
if err != nil { | |||
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
return &entity.FileDownloadInfo{ | |||
DownloadUrl: url, | |||
}, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) { | |||
func (c CloudbrainOneClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
return GetAllOutputDownloadInfo(opts) | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) { | |||
return &entity.ResourceUsage{ | |||
Interval: 0, | |||
MetricsInfo: []entity.MetricsInfo{}, | |||
}, nil | |||
} | |||
func getLogInStorage(startLine, endLine int64, helper storage_helper.StorageHelper, path string) (content string, realEndLine int64, total int64) { | |||
log.Info("getLogInStorage path=%s", path) | |||
reader, err := helper.OpenFile(path) | |||
if err != nil { | |||
log.Info("elper.OpenFile error,path=%s err=%v", path, err) | |||
return "", 0, 0 | |||
} | |||
defer reader.Close() | |||
return GetLocalLog(reader, startLine, endLine) | |||
} | |||
func handleOverLines(opts entity.ClusterLogOpts) (int64, int64, int64) { | |||
var startLine, endLine int64 | |||
if opts.Direction == entity.DOWN { | |||
endLine = opts.BaseLine | |||
startLine = endLine + 1 - opts.Lines | |||
if startLine < 1 { | |||
startLine = 1 | |||
} | |||
} else { | |||
startLine = 1 | |||
endLine = startLine + opts.Lines - 1 | |||
} | |||
return startLine, endLine, 0 | |||
} | |||
func getCloudbrainOneExitDiagnostics(jobId string) string { | |||
jobResult, _ := cloudbrain.GetJob(jobId) | |||
if jobResult != nil { | |||
jobRes, _ := models.ConvertToJobResultPayload(jobResult.Payload) | |||
taskRoles := jobRes.TaskRoles | |||
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
return taskRes.TaskStatuses[0].ExitDiagnostics | |||
} | |||
return "" | |||
} | |||
//findStartAndEnd 基于baseLine,根据方向向上或者向下计算 | |||
func findStartAndEnd(opts entity.ClusterLogOpts, filePath string, helper storage_helper.StorageHelper) (startLine int64, endLine int64) { | |||
baseLine := opts.BaseLine | |||
if opts.Direction == entity.UP { | |||
if baseLine == 0 { | |||
endLine = getAllLineFromFile(helper, filePath) | |||
} else { | |||
endLine = baseLine - 1 | |||
} | |||
startLine = endLine - opts.Lines + 1 | |||
if startLine <= 0 { | |||
startLine = 1 | |||
} | |||
} else { | |||
startLine = baseLine + 1 | |||
endLine = startLine + opts.Lines - 1 | |||
} | |||
return startLine, endLine | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
return c.GetNoteBookOperationProfile(jobId) | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.ClusterAITaskOutput{ | |||
Status: models.ModelMigrateSuccess, | |||
Path: opts.ParentDir, | |||
FileList: fileList, | |||
}, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.AllAITaskOutput{FileList: fileList}, nil | |||
} |
@@ -2,16 +2,22 @@ package cluster | |||
import "C" | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/manager/client/cloudbrain_two" | |||
"code.gitea.io/gitea/manager/client/cloudbrain_two_cd" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"io" | |||
"path" | |||
"strconv" | |||
"strings" | |||
) | |||
type CloudbrainTwoClusterAdapter struct { | |||
@@ -73,21 +79,59 @@ func (c CloudbrainTwoClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook | |||
return nil, nil | |||
} | |||
var cloudbrainTwoImages []entity.ClusterImage | |||
var cloudbrainTwoNotebookImages []entity.ClusterImage | |||
func (c CloudbrainTwoClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
if cloudbrainTwoImages == nil || len(cloudbrainTwoImages) == 0 { | |||
func (c CloudbrainTwoClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
if cloudbrainTwoNotebookImages == nil || len(cloudbrainTwoNotebookImages) == 0 { | |||
images := setting.StImageInfos.ImageInfo | |||
cloudbrainTwoImages = make([]entity.ClusterImage, len(images)) | |||
cloudbrainTwoNotebookImages = make([]entity.ClusterImage, len(images)) | |||
for i := 0; i < len(images); i++ { | |||
cloudbrainTwoImages[i] = entity.ClusterImage{ | |||
cloudbrainTwoNotebookImages[i] = entity.ClusterImage{ | |||
ImageId: images[i].Id, | |||
ImageName: images[i].Value, | |||
} | |||
} | |||
} | |||
return cloudbrainTwoImages, false, nil | |||
return cloudbrainTwoNotebookImages, false, nil | |||
} | |||
var cloudbrainTwoTrainImages []entity.ClusterImage | |||
func (c CloudbrainTwoClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
if cloudbrainTwoTrainImages == nil || len(cloudbrainTwoTrainImages) == 0 { | |||
var versionInfos modelarts.VersionInfo | |||
if err := json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { | |||
log.Error("Unmarshal setting.EngineVersions err. req=%+v err=%v", req, err) | |||
return cloudbrainTwoTrainImages, false, nil | |||
} | |||
cloudbrainTwoTrainImages = make([]entity.ClusterImage, len(versionInfos.Version)) | |||
for i := 0; i < len(versionInfos.Version); i++ { | |||
cloudbrainTwoTrainImages[i] = entity.ClusterImage{ | |||
ImageId: fmt.Sprint(versionInfos.Version[i].ID), | |||
ImageName: versionInfos.Version[i].Value, | |||
ImageUrl: versionInfos.Version[i].Url, | |||
} | |||
} | |||
} | |||
return cloudbrainTwoTrainImages, false, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetTrainImageByImageId(imageId string) (entity.ClusterImage, error) { | |||
if imageId == "" { | |||
return entity.ClusterImage{}, errors.New("imageId is empty") | |||
} | |||
images, _, _ := c.GetTrainImages(entity.GetImageReq{}) | |||
if images == nil { | |||
return entity.ClusterImage{}, errors.New("image not setting correctly") | |||
} | |||
for _, image := range images { | |||
if image.ImageId == imageId { | |||
return image, nil | |||
} | |||
} | |||
return entity.ClusterImage{}, errors.New("image not exists") | |||
} | |||
var poolInfos *models.PoolInfos | |||
@@ -128,8 +172,8 @@ func convertCloudbrainTwo2NoteBookRestartRes(jobId string, res *models.NotebookA | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(jobId string) error { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(opts entity.JobIdAndVersionId) error { | |||
task, err := models.GetNewestCloudbrainByJobId(opts.JobID) | |||
if err != nil { | |||
return err | |||
} | |||
@@ -140,14 +184,14 @@ func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(jobId string) error { | |||
_, err = cloudbrain_two_cd.DelNotebook(task.JobID) | |||
} | |||
if err != nil { | |||
log.Error("DeleteNoteBook err.jobID=%s err=%v", jobId, err) | |||
log.Error("DeleteNoteBook err.jobID=%s err=%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) StopNoteBook(jobId string) error { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
func (c CloudbrainTwoClusterAdapter) StopNoteBook(opts entity.JobIdAndVersionId) error { | |||
task, err := models.GetNewestCloudbrainByJobId(opts.JobID) | |||
if err != nil { | |||
return err | |||
} | |||
@@ -160,14 +204,14 @@ func (c CloudbrainTwoClusterAdapter) StopNoteBook(jobId string) error { | |||
_, err = cloudbrain_two_cd.ManageNotebook(task.JobID, param) | |||
} | |||
if err != nil { | |||
log.Error("StopNoteBook err.jobID=%s err=%v", jobId, err) | |||
log.Error("StopNoteBook err.jobID=%s err=%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
func (c CloudbrainTwoClusterAdapter) QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
task, err := models.GetNewestCloudbrainByJobId(opts.JobID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -232,7 +276,9 @@ func (c CloudbrainTwoClusterAdapter) GetNoteBookLog(jobId string) (*entity.Clust | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetNoteBookUrl(jobId string) (string, error) { | |||
res, err := c.QueryNoteBook(jobId) | |||
res, err := c.QueryNoteBook(entity.JobIdAndVersionId{ | |||
JobID: jobId, | |||
}) | |||
if err != nil { | |||
return "", err | |||
} | |||
@@ -277,21 +323,538 @@ func parseCloudbrainTwoEventsToOperationProfile(result *models.GetNotebook2Resul | |||
return &entity.OperationProfile{Events: events} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
func (c CloudbrainTwoClusterAdapter) CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
t := req.Tasks[0] | |||
var jobResult *models.CreateTrainJobResult | |||
var createErr error | |||
//imageId 为0或者负数时代表自定义镜像 | |||
imageId := int64(-1) | |||
i, err := strconv.ParseInt(t.ImageId, 10, 32) | |||
if err == nil { | |||
imageId = i | |||
} | |||
if imageId <= 0 { | |||
image, err := c.GetTrainImageByImageId(t.ImageId) | |||
if err != nil { | |||
log.Error("GetTrainImageByImageId error.req=%+v err=%v", req, err) | |||
return nil, err | |||
} | |||
jobResult, createErr = modelarts.CreateTrainJobUserImage(convertCloudbrainTwoTrainJobUserImageReq(req, image)) | |||
} else { | |||
param, err := convertCloudbrainTwoTrainJobReq(req) | |||
if err != nil { | |||
return nil, err | |||
} | |||
jobResult, createErr = modelarts.CreateTrainJob(param) | |||
} | |||
if createErr != nil { | |||
log.Error("CloudbrainTwo createTrainJob failed: %v", createErr.Error()) | |||
if strings.HasPrefix(createErr.Error(), modelarts.UnknownErrorPrefix) { | |||
return nil, models.NetworkError{} | |||
} | |||
return nil, createErr | |||
} | |||
return convertCloudbrainTwoRes2Standard(jobResult), nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) DeleteTrainJob(string) error { | |||
return nil | |||
func convertCloudbrainTwoRes2Standard(res *models.CreateTrainJobResult) *entity.CreateTrainTaskResponse { | |||
var jobId string | |||
if res.JobID > 0 { | |||
jobId = fmt.Sprint(res.JobID) | |||
} | |||
return &entity.CreateTrainTaskResponse{ | |||
CreatedAt: res.CreateTime, | |||
JobID: jobId, | |||
Name: res.JobName, | |||
Status: modelarts.TransTrainJobStatus(res.Status), | |||
VersionID: res.VersionID, | |||
VersionName: res.VersionName, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) StopTrainJob(string) error { | |||
func convertCloudbrainTwoTrainJobReq(req entity.CreateTrainTaskRequest) (models.CreateTrainJobParams, error) { | |||
t := req.Tasks[0] | |||
imageId, err := strconv.ParseInt(t.ImageId, 10, 64) | |||
if err != nil { | |||
log.Error("Parse imageId err.imageIdStr=%s err=%v", t.ImageId, err) | |||
return models.CreateTrainJobParams{}, err | |||
} | |||
return models.CreateTrainJobParams{ | |||
JobName: req.Name, | |||
Description: req.Description, | |||
Config: models.Config{ | |||
WorkServerNum: t.WorkServerNumber, | |||
AppUrl: JointCloudbrainTwoReqUrl(t.Code), | |||
BootFileUrl: path.Join(JointCloudbrainTwoReqUrl(t.Code), t.BootFile), | |||
DataUrl: JointCloudbrainTwoReqUrl(t.Datasets), | |||
TrainUrl: JointCloudbrainTwoReqUrl(t.OutPut), | |||
LogUrl: JointCloudbrainTwoReqUrl(t.LogPath), | |||
PoolID: t.PoolId, | |||
CreateVersion: true, | |||
Flavor: models.Flavor{ | |||
Code: t.Spec.SourceSpecId, | |||
}, | |||
EngineID: imageId, | |||
Parameter: handleCloudbrainTwoParameter(req).Parameter, | |||
ShareAddr: setting.ModelArtsShareAddr, | |||
MountPath: setting.ModelArtsMountPath, | |||
NasType: setting.ModelArtsNasType, | |||
}, | |||
}, nil | |||
} | |||
func JointCloudbrainTwoReqUrl(data []entity.ContainerData) string { | |||
if len(data) > 0 { | |||
d := data[0] | |||
s := path.Join("/", d.Bucket, d.ObjectKey) | |||
if d.IsDir { | |||
s = strings.TrimSuffix(path.Join("/", d.Bucket, d.ObjectKey, "/"), "/") + "/" | |||
} | |||
return s | |||
} | |||
return "" | |||
} | |||
func convertCloudbrainTwoTrainJobUserImageReq(req entity.CreateTrainTaskRequest, image entity.ClusterImage) models.CreateUserImageTrainJobParams { | |||
t := req.Tasks[0] | |||
appUrl := JointCloudbrainTwoReqUrl(t.Code) | |||
bootFileUrl := path.Join(JointCloudbrainTwoReqUrl(t.Code), t.BootFile) | |||
dataUrl := JointCloudbrainTwoReqUrl(t.Datasets) | |||
trainUrl := JointCloudbrainTwoReqUrl(t.OutPut) | |||
logUrl := JointCloudbrainTwoReqUrl(t.LogPath) | |||
params := handleCloudbrainTwoParameter(req) | |||
return models.CreateUserImageTrainJobParams{ | |||
JobName: req.Name, | |||
Description: req.Description, | |||
Config: models.UserImageConfig{ | |||
WorkServerNum: t.WorkServerNumber, | |||
AppUrl: appUrl, | |||
BootFileUrl: bootFileUrl, | |||
DataUrl: dataUrl, | |||
TrainUrl: trainUrl, | |||
LogUrl: logUrl, | |||
PoolID: t.PoolId, | |||
CreateVersion: true, | |||
Flavor: models.Flavor{ | |||
Code: t.Spec.SourceSpecId, | |||
}, | |||
UserImageUrl: image.ImageUrl, | |||
UserCommand: getCloudbrainTwoUserCommand(appUrl, t.BootFile, dataUrl, trainUrl, params), | |||
ShareAddr: setting.ModelArtsShareAddr, | |||
MountPath: setting.ModelArtsMountPath, | |||
NasType: setting.ModelArtsNasType, | |||
}, | |||
} | |||
} | |||
func getCloudbrainTwoDataUrl(data []entity.ContainerData) string { | |||
if len(data) == 0 { | |||
return "" | |||
} | |||
return data[0].ObjectKey | |||
} | |||
func handleCloudbrainTwoParameter(req entity.CreateTrainTaskRequest) models.Parameters { | |||
t := req.Tasks[0] | |||
var param = models.Parameters{} | |||
datasetUrl := getCloudbrainTwoMultiDataUrl(t.Datasets) | |||
if datasetUrl != "" { | |||
param.Parameter = append(param.Parameter, models.Parameter{ | |||
Label: modelarts.MultiDataUrl, | |||
Value: datasetUrl, | |||
}) | |||
} | |||
multiModelUrl := getCloudbrainTwoModelUrl(t.PreTrainModel) | |||
if multiModelUrl != "" { | |||
param.Parameter = append(param.Parameter, models.Parameter{ | |||
Label: modelarts.PretrainUrl, | |||
Value: multiModelUrl, | |||
}, models.Parameter{ | |||
Label: modelarts.CkptUrl, | |||
Value: t.Datasets[0].S3DownloadUrl, | |||
}) | |||
} | |||
existDeviceTarget := false | |||
for _, parameter := range t.Params.Parameter { | |||
if parameter.Label == modelarts.DeviceTarget { | |||
existDeviceTarget = true | |||
} | |||
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { | |||
param.Parameter = append(param.Parameter, models.Parameter{ | |||
Label: parameter.Label, | |||
Value: parameter.Value, | |||
}) | |||
} | |||
} | |||
if !existDeviceTarget { | |||
param.Parameter = append(param.Parameter, models.Parameter{ | |||
Label: modelarts.DeviceTarget, | |||
Value: modelarts.Ascend, | |||
}) | |||
} | |||
return param | |||
} | |||
func getCloudbrainTwoUserCommand(appUrl, bootFile, dataUrl, trainUrl string, params models.Parameters) string { | |||
userCommand := "" | |||
tmpCodeObsPaths := strings.Split(strings.Trim(appUrl, "/"), "/") | |||
lastCodeDir := "code" | |||
if len(tmpCodeObsPaths) > 0 { | |||
lastCodeDir = tmpCodeObsPaths[len(tmpCodeObsPaths)-1] | |||
} | |||
userCommand = "/bin/bash /home/work/run_train.sh 's3://" + appUrl + "' '" + lastCodeDir + "/" + bootFile + "' '/tmp/log/train.log' --'data_url'='s3://" + dataUrl + "' --'train_url'='s3://" + trainUrl + "'" | |||
for _, param := range params.Parameter { | |||
userCommand += " --'" + param.Label + "'='" + param.Value + "'" | |||
} | |||
return userCommand | |||
} | |||
func getCloudbrainTwoMultiDataUrl(datasets []entity.ContainerData) string { | |||
if len(datasets) == 0 { | |||
return "" | |||
} | |||
var datasUrlList []models.Datasurl | |||
for _, d := range datasets { | |||
datasUrlList = append(datasUrlList, models.Datasurl{ | |||
DatasetUrl: d.S3DownloadUrl, | |||
DatasetName: d.Name, | |||
}) | |||
} | |||
jsondata, _ := json.Marshal(datasUrlList) | |||
return string(jsondata) | |||
} | |||
func getCloudbrainTwoModelUrl(datasets []entity.ContainerData) string { | |||
if len(datasets) == 0 { | |||
return "" | |||
} | |||
var modelUrlList []models.ModelUrls | |||
for _, d := range datasets { | |||
modelUrlList = append(modelUrlList, models.ModelUrls{ | |||
ModelUrl: d.S3DownloadUrl, | |||
ModelName: d.Name, | |||
}) | |||
} | |||
jsondata, _ := json.Marshal(modelUrlList) | |||
return string(jsondata) | |||
} | |||
func (c CloudbrainTwoClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error { | |||
_, err := modelarts.DelTrainJobVersion(opts.JobID, strconv.FormatInt(opts.VersionID, 10)) | |||
return err | |||
} | |||
func (c CloudbrainTwoClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { | |||
_, err := modelarts.StopTrainJob(opts.JobID, strconv.FormatInt(opts.VersionID, 10)) | |||
if err != nil { | |||
log.Error("StopTrainJob(%s) failed:%v", opts, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
func (c CloudbrainTwoClusterAdapter) QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
res, err := modelarts.GetTrainJobList(20, 1, "create_time", "desc", jobName) | |||
if err != nil { | |||
log.Error("GetTrainJobList failed:%v", err) | |||
return nil, err | |||
} | |||
result := make([]*entity.QueryTaskResponse, 0) | |||
if res != nil { | |||
for i := 0; i < len(res.JobList); i++ { | |||
if res.JobList[i].JobName == jobName { | |||
result = append(result, convertJobList2QueryRes(res.JobList[i])) | |||
} | |||
} | |||
} | |||
return result, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
func convertJobList2QueryRes(res models.JobList) *entity.QueryTaskResponse { | |||
return &entity.QueryTaskResponse{ | |||
JobId: strconv.FormatInt(res.JobID, 10), | |||
Status: transCloudbrainTwoTrainJobStatus(res.IntStatus), | |||
VersionId: res.VersionID, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
func (c CloudbrainTwoClusterAdapter) QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) { | |||
result, err := modelarts.GetTrainJob(opts.JobID, strconv.FormatInt(opts.VersionID, 10)) | |||
if err != nil { | |||
log.Error("GetTrainJob(%s) failed:%v", opts, err) | |||
return nil, err | |||
} | |||
return convertCloudbrainTwoTrainJob2QueryRes(result), nil | |||
} | |||
func convertCloudbrainTwoTrainJob2QueryRes(res *models.GetTrainJobResult) *entity.QueryTaskResponse { | |||
status := transCloudbrainTwoTrainJobStatus(res.IntStatus) | |||
startedAt := timeutil.TimeStamp(0) | |||
if res.StartTime > 0 { | |||
startedAt = timeutil.TimeStamp(res.StartTime / 1000) | |||
} | |||
duration := res.Duration / 1000 | |||
completedAt := timeutil.TimeStamp(0) | |||
if startedAt > 0 && models.IsCloudbrainTerminalStatus(status) { | |||
completedAt = startedAt.Add(duration) | |||
} | |||
return &entity.QueryTaskResponse{ | |||
StartedAt: startedAt, | |||
CompletedAt: completedAt, | |||
JobId: fmt.Sprint(res.JobID), | |||
Status: status, | |||
VersionId: res.VersionID, | |||
} | |||
} | |||
func transCloudbrainTwoTrainJobStatus(status int) string { | |||
switch status { | |||
case 0: | |||
return "UNKNOWN" | |||
case 1: | |||
return "INIT" | |||
case 2: | |||
return "IMAGE_CREATING" | |||
case 3: | |||
return "IMAGE_FAILED" | |||
case 4: | |||
return "SUBMIT_TRYING" | |||
case 5: | |||
return "SUBMIT_FAILED" | |||
case 6: | |||
return "DELETE_FAILED" | |||
case 7: | |||
return "WAITING" | |||
case 8: | |||
return "RUNNING" | |||
case 9: | |||
return "KILLING" | |||
case 10: | |||
return "COMPLETED" | |||
case 11: | |||
return "FAILED" | |||
case 12: | |||
return "KILLED" | |||
case 13: | |||
return "CANCELED" | |||
case 14: | |||
return "LOST" | |||
case 15: | |||
return "SCALING" | |||
case 16: | |||
return "SUBMIT_MODEL_FAILED" | |||
case 17: | |||
return "DEPLOY_SERVICE_FAILED" | |||
case 18: | |||
return "CHECK_INIT" | |||
case 19: | |||
return "CHECK_RUNNING" | |||
case 20: | |||
return "CHECK_RUNNING_COMPLETED" | |||
case 21: | |||
return "CHECK_FAILED" | |||
default: | |||
return strconv.Itoa(status) | |||
} | |||
} | |||
func transferCloudbrain2LogOrder(direction entity.Direction) string { | |||
if direction == entity.UP { | |||
return "asc" | |||
} else if direction == entity.DOWN { | |||
return "desc" | |||
} | |||
return "" | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) { | |||
baseLine := fmt.Sprint(opts.BaseLine) | |||
order := transferCloudbrain2LogOrder(opts.Direction) | |||
if opts.IsHeadRequest() { | |||
baseLine = "" | |||
order = "asc" | |||
} else if opts.IsBottomRequest() { | |||
baseLine = "" | |||
order = "desc" | |||
} | |||
result, err := getModelartsTrainJob(opts.JobId, opts.VersionID, baseLine, order, int(opts.Lines), opts.LogFileName) | |||
if err != nil { | |||
log.Error("getModelartsTrainJob(%s) failed:%v", opts.JobId, err) | |||
return nil, err | |||
} | |||
lines := int64(result.Lines) | |||
return &entity.ClusterLog{ | |||
Content: result.Content, | |||
StartLine: result.StartLine, | |||
EndLine: result.EndLine, | |||
Lines: lines, | |||
}, nil | |||
} | |||
func getModelartsTrainJob(jobID string, versionID int64, baseLine string, order string, lines int, logFileName string) (*models.GetTrainJobLogResult, error) { | |||
result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(versionID, 10), baseLine, logFileName, order, lines) | |||
if err != nil { | |||
log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) | |||
return nil, err | |||
} | |||
return result, err | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetLogDownloadInfo(opts entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
var err error | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
//查找日志文件 | |||
files := getLogFilesInStorage(helper, opts.ObjectKeyPrefix, ".log") | |||
if len(files) == 0 { | |||
//此时未找符合条件的文件 | |||
return nil, nil | |||
} | |||
//只有一个日志文件时直接使用obs的下载链接 | |||
if len(files) == 1 { | |||
var url string | |||
url, err = helper.GetSignedDownloadUrl(files[0].RelativePath) | |||
if err != nil { | |||
log.Error("GetObsCreateSignedUrlByBucketAndKey failed when GetLogDownloadInfo opts=%+v: err=%v", opts, err) | |||
return nil, err | |||
} | |||
return &entity.FileDownloadInfo{ | |||
DownloadUrl: url, | |||
}, nil | |||
} | |||
readerList := make([]entity.FileReader, 0) | |||
defer func() { | |||
if err != nil { | |||
for _, r := range readerList { | |||
if r.Reader != nil { | |||
r.Reader.Close() | |||
} | |||
} | |||
} | |||
}() | |||
//多个文件时需要打包后下载 | |||
for _, file := range files { | |||
//获取日志reader | |||
var reader io.ReadCloser | |||
reader, err = helper.OpenFile(file.RelativePath) | |||
if err != nil { | |||
log.Error("GetLogDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
readerList = append(readerList, entity.FileReader{ | |||
Reader: reader, | |||
Name: file.FileName, | |||
}) | |||
} | |||
return &entity.FileDownloadInfo{ | |||
Readers: readerList, | |||
ResultType: entity.FileTypeZIP, | |||
ResultFileName: opts.DisplayJobName + ".zip", | |||
}, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
url, err := helper.GetSignedDownloadUrl(opts.Path) | |||
if err != nil { | |||
log.Error("GetSignedDownloadUrl err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
return &entity.FileDownloadInfo{ | |||
DownloadUrl: url, | |||
}, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
return GetAllOutputDownloadInfo(opts) | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
return c.GetNoteBookOperationProfile(jobId) | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) { | |||
result, err := modelarts.GetTrainJobMetricStatistic(opts.JobId, strconv.FormatInt(opts.VersionID, 10), opts.LogFileName) | |||
if err != nil { | |||
log.Error("GetTrainJobMetricStatistic(%s) failed:%v", opts.JobId, err.Error()) | |||
return nil, err | |||
} | |||
return transferModelartsMetricsToStandard(result), nil | |||
} | |||
func transferModelartsMetricsToStandard(result *models.GetTrainJobMetricStatisticResult) *entity.ResourceUsage { | |||
m := make([]entity.MetricsInfo, 0) | |||
for i := 0; i < len(result.MetricsInfo); i++ { | |||
valArray := result.MetricsInfo[i].Value | |||
temp := make([]float32, len(valArray)) | |||
for j := 0; j < len(valArray); j++ { | |||
val, err := strconv.ParseFloat(valArray[j], 32) | |||
if err != nil { | |||
log.Error("parse metrics value error, val=%v err=%v result=%+v", valArray[j], err, result) | |||
return nil | |||
} | |||
temp[j] = float32(val) | |||
} | |||
m = append(m, entity.MetricsInfo{ | |||
Name: result.MetricsInfo[i].Metric, | |||
Value: temp, | |||
}) | |||
} | |||
return &entity.ResourceUsage{ | |||
Interval: result.Interval, | |||
MetricsInfo: m, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) { | |||
resultLogFile, err := modelarts.GetTrainJobLogFileNames(opts.JobId, strconv.FormatInt(opts.VersionId, 10)) | |||
if err != nil { | |||
log.Error("GetTrainJobLogFileNames(%s) failed:%v", opts.JobId, err.Error()) | |||
return nil, nil | |||
} | |||
if resultLogFile == nil { | |||
return nil, nil | |||
} | |||
res := make([]entity.AITaskNodeInfo, len(resultLogFile.LogFileList)) | |||
for i := 0; i < len(resultLogFile.LogFileList); i++ { | |||
res[i] = entity.AITaskNodeInfo{LogFileName: resultLogFile.LogFileList[i]} | |||
} | |||
return res, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetOneLevelObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetOneLevelObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.ClusterAITaskOutput{ | |||
Status: models.ModelMigrateSuccess, | |||
Path: opts.ParentDir, | |||
FileList: fileList, | |||
}, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
fileList, err := helper.GetAllObjectsUnderDir(path.Join(opts.ObjectKeyPrefix, opts.ParentDir)) | |||
if err != nil { | |||
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.ObjectKeyPrefix, err) | |||
return nil, err | |||
} | |||
return &entity.AllAITaskOutput{FileList: fileList}, nil | |||
} |
@@ -1,9 +1,8 @@ | |||
package cluster | |||
import ( | |||
"errors" | |||
"code.gitea.io/gitea/entity" | |||
"errors" | |||
) | |||
var clusterMap = map[entity.ClusterType]ClusterAdapter{} | |||
@@ -26,23 +25,30 @@ func GetCluster(t entity.ClusterType) (ClusterAdapter, error) { | |||
type ClusterAdapter interface { | |||
CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) | |||
RestartNoteBook(jobId string) (*entity.RestartNoteBookTaskResponse, error) | |||
DeleteNoteBook(jobId string) error | |||
StopNoteBook(jobId string) error | |||
QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) | |||
DeleteNoteBook(opts entity.JobIdAndVersionId) error | |||
StopNoteBook(opts entity.JobIdAndVersionId) error | |||
QueryNoteBook(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) | |||
QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error) | |||
GetNoteBookLog(jobId string) (*entity.ClusterLog, error) | |||
GetNoteBookUrl(jobId string) (string, error) | |||
GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error) | |||
CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) | |||
DeleteTrainJob(jobId string) error | |||
StopTrainJob(string) error | |||
RestartTrainJob(jobId string) (*entity.CreateTrainTaskResponse, error) | |||
QueryTrainJob(jobId string) (*entity.QueryTaskResponse, error) | |||
GetTrainLog(jobId string) (*entity.ClusterLog, error) | |||
DeleteTrainJob(opts entity.JobIdAndVersionId) error | |||
StopTrainJob(opts entity.JobIdAndVersionId) error | |||
QueryTrainJob(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) | |||
QueryTrainJobByJobName(jobName string) ([]*entity.QueryTaskResponse, error) | |||
GetLog(opts entity.ClusterLogOpts) (*entity.ClusterLog, error) | |||
GetLogDownloadInfo(entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
GetTrainJobOperationProfile(jobId string) (*entity.OperationProfile, error) | |||
GetOutput(opts entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) | |||
GetAllOutput(opts entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) | |||
GetSingleOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
GetNodeInfo(opts entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) | |||
GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) | |||
//GetImages return available list of clusters | |||
//The second parameter will return true if image is no limit | |||
GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) | |||
GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) | |||
GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) | |||
CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) | |||
} |
@@ -0,0 +1,140 @@ | |||
package cluster | |||
import ( | |||
"bufio" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"io" | |||
"strings" | |||
) | |||
func GetLocalLog(r io.Reader, startLine, endLine int64) (content string, realEndLine int64, total int64) { | |||
if startLine > endLine { | |||
return "", 0, 0 | |||
} | |||
re := "" | |||
fileEndLine := endLine | |||
reader := bufio.NewReader(r) | |||
var countLine = int64(1) | |||
//跳过开始行之前的内容 | |||
for countLine < startLine { | |||
_, err := reader.ReadString('\n') | |||
if err != nil { | |||
log.Error("GetLocalLog ReadString err. %v", err) | |||
return "", 0, 0 | |||
} | |||
countLine++ | |||
} | |||
//读取指定的开始行到结束行的内容 | |||
for countLine >= startLine && countLine <= endLine { | |||
line, err := reader.ReadString('\n') | |||
if err != nil { | |||
if err == io.EOF { | |||
re = re + line | |||
countLine++ | |||
} | |||
log.Error("GetLocalLog ReadString err. %v", err) | |||
break | |||
} | |||
re = re + line | |||
countLine++ | |||
} | |||
fileEndLine = countLine - 1 | |||
return re, fileEndLine, fileEndLine - startLine + 1 | |||
} | |||
func getAllLineFromFile(helper storage_helper.StorageHelper, filePath string) int64 { | |||
var count int64 | |||
r, err := helper.OpenFile(filePath) | |||
defer r.Close() | |||
if err != nil { | |||
log.Info("error:" + err.Error()) | |||
return 0 | |||
} | |||
reader := bufio.NewReader(r) | |||
for { | |||
_, err := reader.ReadString('\n') | |||
if err != nil { | |||
if err == io.EOF { | |||
count++ | |||
} | |||
log.Error("GetLocalLog ReadString err. %v", err) | |||
break | |||
} | |||
count++ | |||
} | |||
return count | |||
} | |||
func getLogFilesInStorage(helper storage_helper.StorageHelper, objectKeyPrefix string, logSuffix string) []storage.FileInfo { | |||
//获取日志输出目录下文件列表 | |||
fileList, err := helper.GetOneLevelObjectsUnderDir(objectKeyPrefix) | |||
if err != nil { | |||
log.Error("GetTrainLog read dir err.objectKeyPrefix=%s,err=%v", objectKeyPrefix, err) | |||
return nil | |||
} | |||
if len(fileList) == 0 { | |||
return nil | |||
} | |||
logFiles := make([]storage.FileInfo, 0) | |||
for _, f := range fileList { | |||
if f.IsDir { | |||
continue | |||
} | |||
if strings.HasSuffix(f.FileName, logSuffix) { | |||
logFiles = append(logFiles, f) | |||
} | |||
} | |||
return logFiles | |||
} | |||
func GetAllOutputDownloadInfo(opts entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) { | |||
helper := storage_helper.SelectUploaderFromStorageType(opts.StorageType) | |||
var err error | |||
fileList, err := helper.GetAllObjectsUnderDir(opts.Path) | |||
if err != nil { | |||
log.Error("GetAllObjectsUnderDir err.objectKeyPrefix=%s,err=%v", opts.Path, err) | |||
return nil, err | |||
} | |||
if len(fileList) == 0 { | |||
return nil, nil | |||
} | |||
res := &entity.FileDownloadInfo{ | |||
Readers: make([]entity.FileReader, 0), | |||
ResultType: entity.FileTypeZIP, | |||
ResultFileName: opts.JobName + ".zip", | |||
} | |||
defer func() { | |||
if err != nil { | |||
res.Close() | |||
} | |||
}() | |||
for i := 0; i < len(fileList); i++ { | |||
file := fileList[i] | |||
if file.IsDir { | |||
continue | |||
} | |||
var reader io.ReadCloser | |||
reader, err = helper.OpenFile(file.RelativePath) | |||
if err != nil { | |||
log.Error("GetAllOutputDownloadInfo OpenFile err.opts=%+v,err =%v", opts, err) | |||
return nil, err | |||
} | |||
res.Readers = append(res.Readers, entity.FileReader{ | |||
Reader: reader, | |||
Name: file.FileName, | |||
}) | |||
} | |||
return res, nil | |||
} |
@@ -1,15 +1,15 @@ | |||
package container_builder | |||
import ( | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"path" | |||
"strings" | |||
) | |||
type CodeBuilder struct { | |||
@@ -42,14 +42,12 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat | |||
jobName := ctx.Request.JobName | |||
repo := ctx.Repository | |||
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" | |||
uploader := upload.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.CodeMountPath | |||
uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + opts.GetLocalPath() | |||
//再次调试和在线运行notebook不需要下载、上传代码 | |||
if !ctx.Request.IsRestartRequest && !ctx.Request.IsFileNoteBookRequest { | |||
log.Info("start to upload to remoteDir=" + remoteDir + " codeLocalPath=" + codeLocalPath) | |||
if err := DownloadCode(ctx, codeLocalPath, b.Opts.NotArchive); err != nil { | |||
if err := DownloadCode(ctx, codeLocalPath, b.Opts.Uncompressed); err != nil { | |||
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) | |||
return nil, response.LOAD_CODE_FAILED | |||
} | |||
@@ -60,17 +58,22 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat | |||
} | |||
} | |||
codeArchiveName := "" | |||
//如果代码是压缩包形式,以默认分支命名压缩包(继承原有逻辑) | |||
if !b.Opts.NotArchive { | |||
var codeArchiveName, objectKey string | |||
//如果代码是压缩包形式,挂载的是文件,以默认分支命名压缩包(继承原有逻辑) | |||
if !b.Opts.Uncompressed { | |||
codeArchiveName = cloudbrain.DefaultBranchName + ".zip" | |||
objectKey = path.Join(remoteDir, codeArchiveName) | |||
} else { | |||
objectKey = remoteDir + "/" | |||
} | |||
containerPath := "" | |||
if opts.ContainerPath != "" { | |||
containerPath = opts.ContainerPath + "/" + codeArchiveName | |||
//如果代码是压缩包,此时的挂载路径是文件 | |||
//如果代码不是压缩包,此时的挂载路径是目录 | |||
containerPath = path.Join(opts.ContainerPath, codeArchiveName) | |||
} | |||
objectKey := remoteDir + "/" + codeArchiveName | |||
codeData := entity.ContainerData{ | |||
Name: strings.ToLower(repo.Name), | |||
Bucket: uploader.GetBucket(), | |||
@@ -79,6 +82,9 @@ func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerDat | |||
ReadOnly: opts.ReadOnly, | |||
ContainerPath: containerPath, | |||
RealPath: uploader.GetRealPath(objectKey), | |||
IsDir: b.Opts.Uncompressed, | |||
S3DownloadUrl: uploader.GetS3DownloadUrl(objectKey), | |||
StorageType: storageTypes[0], | |||
} | |||
return []entity.ContainerData{codeData}, nil | |||
} |
@@ -6,7 +6,7 @@ import ( | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"errors" | |||
"io" | |||
"io/ioutil" | |||
@@ -14,7 +14,7 @@ import ( | |||
"strings" | |||
) | |||
func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive bool) error { | |||
func DownloadCode(ctx *context.CreationContext, codeLocalPath string, uncompressed bool) error { | |||
dir, err := ioutil.ReadDir(codeLocalPath) | |||
//ReqCommitID为空时需要下载最新的代码,把旧的删掉 | |||
if len(dir) != 0 && ctx.Request.ReqCommitID == "" { | |||
@@ -26,10 +26,10 @@ func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive | |||
//目录为空时需要下载代码 | |||
if len(dir) == 0 { | |||
if notArchive { | |||
commitId, err = upload.DownloadCode(ctx.GitRepo, ctx.Repository, codeLocalPath, ctx.Request.BranchName) | |||
if uncompressed { | |||
commitId, err = storage_helper.DownloadCode(ctx.GitRepo, ctx.Repository, codeLocalPath, ctx.Request.BranchName) | |||
} else { | |||
commitId, err = upload.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName) | |||
commitId, err = storage_helper.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName) | |||
} | |||
if err != nil { | |||
log.Error("downloadZipCode failed, server timed out: %s (%v)", ctx.Repository.FullName(), err) | |||
@@ -40,8 +40,8 @@ func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive | |||
return nil | |||
} | |||
var obsUploader = &upload.OBSUploader{} | |||
var minioUploader = &upload.MinioUploader{} | |||
var obsUploader = &storage_helper.OBSHelper{} | |||
var minioUploader = &storage_helper.MinioHelper{} | |||
const CLONE_FILE_PREFIX = "file:///" | |||
@@ -33,8 +33,6 @@ func CreateContainerBuilder(containerType entity.ContainerDataType, opts *entity | |||
return nil | |||
} | |||
b := reflect.New(t.Elem()).Interface().(ContainerBuilder) | |||
//.Interface().(ContainerBuilder) | |||
//b.SetOpts(opts) | |||
b.SetOpts(opts) | |||
return b | |||
} | |||
@@ -1,14 +1,13 @@ | |||
package container_builder | |||
import ( | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"path" | |||
"strings" | |||
) | |||
type DatasetBuilder struct { | |||
@@ -32,61 +31,63 @@ func (b *DatasetBuilder) Build(ctx *context.CreationContext) ([]entity.Container | |||
if uuid == "" { | |||
return nil, nil | |||
} | |||
var datasetInfos map[string]models.DatasetInfo | |||
var datasetNames string | |||
var err error | |||
// models.GetDatasetInfo 是使用的以前的方法,所以此处按集群类型适配 | |||
if ctx.Request.Cluster == models.C2NetCluster { | |||
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, ctx.Request.ComputeSource.Name) | |||
} else { | |||
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid) | |||
} | |||
datasetInfos, err := models.GetDatasetInfo4AITask(uuid) | |||
if err != nil { | |||
log.Error("GetDatasetInfo failed: %v", err) | |||
return nil, response.DATASET_SELECT_ERROR | |||
} | |||
uuidArray := strings.Split(uuid, ";") | |||
if datasetInfos == nil || len(datasetInfos) < len(uuidArray) { | |||
if len(datasetInfos) < len(strings.Split(uuid, ";")) { | |||
log.Error("GetDatasetInfo count error.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return nil, response.PARTIAL_DATASETS_NOT_AVAILABLE | |||
} | |||
var data []entity.ContainerData | |||
for _, datasetInfo := range datasetInfos { | |||
name := datasetInfo.FullName | |||
var name, objectKey, s3DownloadUrl string | |||
//如果不是压缩包,那么文件名是去掉后缀以后的数据集名称 | |||
if b.Opts.NotArchive { | |||
name = datasetInfo.Name | |||
if b.Opts.Uncompressed { | |||
name = datasetInfo.Uncompressed.Name | |||
objectKey = datasetInfo.Uncompressed.ObjectKey | |||
s3DownloadUrl = datasetInfo.Uncompressed.S3DownloadUrl | |||
} else { | |||
name = datasetInfo.Compressed.Name | |||
objectKey = datasetInfo.Compressed.ObjectKey | |||
s3DownloadUrl = datasetInfo.Compressed.S3DownloadUrl | |||
} | |||
//由于云脑一训练任务单数据集情况比较特殊,挂载时没有数据集名字的父文件夹,因此特殊处理 | |||
//todo AITask 解决此特殊处理 | |||
if ctx.Request.Cluster == entity.OpenICloudbrainOne && | |||
ctx.Request.JobType == models.JobTypeTrain && len(datasetInfos) == 1 { | |||
name = "" | |||
} | |||
if datasetInfo.Type == models.TypeCloudBrainOne { | |||
//如果返回的localPath已经带了实际路径的前缀,需要去除掉以后才是在minio上的objectKey | |||
objectKey := datasetInfo.DataLocalPath | |||
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.RealPath) | |||
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.Bucket) | |||
objectKey = strings.TrimPrefix(objectKey, "/") | |||
data = append(data, entity.ContainerData{ | |||
Name: name, | |||
Bucket: minioUploader.GetBucket(), | |||
EndPoint: minioUploader.GetEndpoint(), | |||
ObjectKey: objectKey, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + name, | |||
ContainerPath: path.Join(b.Opts.ContainerPath, name), | |||
RealPath: minioUploader.GetRealPath(objectKey), | |||
IsDir: b.Opts.Uncompressed, | |||
Size: datasetInfo.Size, | |||
StorageType: entity.MINIO, | |||
}) | |||
} else { | |||
objectKey := datasetInfo.DataLocalPath + datasetInfo.FullName | |||
data = append(data, entity.ContainerData{ | |||
Name: name, | |||
Bucket: obsUploader.GetBucket(), | |||
EndPoint: obsUploader.GetEndpoint(), | |||
ObjectKey: objectKey, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + name, | |||
ContainerPath: path.Join(b.Opts.ContainerPath, name), | |||
S3DownloadUrl: s3DownloadUrl, | |||
IsDir: b.Opts.Uncompressed, | |||
Size: datasetInfo.Size, | |||
StorageType: entity.OBS, | |||
}) | |||
} | |||
} | |||
ctx.Request.DatasetNames = datasetNames | |||
return data, nil | |||
} | |||
@@ -34,6 +34,7 @@ func (b *FileNoteBookCodeBuilder) Build(ctx *context.CreationContext) ([]entity. | |||
if repo == nil { | |||
return nil, nil | |||
} | |||
//在线运行notebook不需要代码挂载或者调度,只需要把对对应分支的代码仓下载到指定目录。上传目标分支的逻辑在其他地方(继承原有逻辑) | |||
err := DownloadBranch(repo, getCodePath(ctx.Request.JobName, repo, ctx.Request.FileBranchName), ctx.Request.FileBranchName) | |||
if err != nil { | |||
log.Error("download code failed", err) | |||
@@ -0,0 +1,60 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"path" | |||
) | |||
type LogPathBuilder struct { | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func init() { | |||
o := &LogPathBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
func (b *LogPathBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *LogPathBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
storageTypes := b.Opts.AcceptStorageType | |||
if storageTypes == nil || len(storageTypes) == 0 { | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
jobName := ctx.Request.JobName | |||
uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), b.Opts.GetLocalPath()) | |||
if b.Opts.MKDIR { | |||
err := uploader.MKDIR(remoteDir) | |||
if err != nil { | |||
log.Error("MKDIR err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err) | |||
return nil, response.NewBizError(err) | |||
} | |||
} | |||
return []entity.ContainerData{{ | |||
ContainerPath: b.Opts.ContainerPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ObjectKey: remoteDir, | |||
RealPath: uploader.GetRealPath(remoteDir), | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
IsDir: true, | |||
StorageType: storageTypes[0], | |||
}}, nil | |||
} | |||
func (b *LogPathBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerLogPath | |||
} |
@@ -1,15 +1,13 @@ | |||
package container_builder | |||
import ( | |||
"fmt" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"path" | |||
) | |||
type OutputPathBuilder struct { | |||
@@ -29,25 +27,36 @@ func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.Contai | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
log.Info("go here len(storageTypes)=") | |||
storageTypes := b.Opts.AcceptStorageType | |||
log.Info("len=" + fmt.Sprint(len(storageTypes))) | |||
if storageTypes == nil || len(storageTypes) == 0 { | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
jobName := ctx.Request.JobName | |||
uploader := upload.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.ModelMountPath | |||
if ctx.Request.JobType != models.JobTypeOnlineInference { | |||
uploader := storage_helper.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), b.Opts.GetLocalPath()) | |||
if b.Opts.MKDIR { | |||
err := uploader.MKDIR(remoteDir) | |||
if err != nil { | |||
log.Error("MKDIR err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err) | |||
return nil, response.NewBizError(err) | |||
} | |||
} | |||
//如果是继续训练,需要将上次的结果拷贝到本次训练任务的输出目录 | |||
if ctx.Request.IsContinueRequest { | |||
if ctx.SourceCloudbrain == nil { | |||
log.Error("SourceCloudbrain empty.displayJobName = %s", ctx.Request.DisplayJobName) | |||
return nil, response.PARAM_ERROR | |||
} | |||
sourcePath := getSourceOutputPath(ctx.SourceCloudbrain, uploader, b.Opts.ContainerPath) | |||
err := uploader.CopyByPath(sourcePath, remoteDir, []string{"README", ".txt"}) | |||
if err != nil { | |||
log.Error("CopyByPath err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err) | |||
return nil, response.NewBizError(err) | |||
} | |||
} | |||
return []entity.ContainerData{{ | |||
ContainerPath: b.Opts.ContainerPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
@@ -56,9 +65,19 @@ func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.Contai | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
GetBackEndpoint: uploader.GetEndpoint(), | |||
IsDir: true, | |||
StorageType: storageTypes[0], | |||
}}, nil | |||
} | |||
func getSourceOutputPath(sourceCloudbrain *models.Cloudbrain, helper storage_helper.StorageHelper, containerPath string) string { | |||
c := sourceCloudbrain.GetCloudbrainConfig() | |||
if c != nil { | |||
return c.OutputObjectPrefix | |||
} | |||
return path.Join(helper.GetJobDefaultObjectKeyPrefix(sourceCloudbrain.JobName), sourceCloudbrain.VersionName, containerPath) | |||
} | |||
func (b *OutputPathBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerOutPutPath | |||
} |
@@ -3,6 +3,7 @@ package container_builder | |||
import ( | |||
"code.gitea.io/gitea/routers/response" | |||
"fmt" | |||
"path" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
@@ -12,7 +13,7 @@ import ( | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
) | |||
@@ -69,7 +70,8 @@ func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.Con | |||
storageType := oldStorageType | |||
ckptNames := strings.Split(form.PretrainModelCkptName, ";") | |||
for _, ckptName := range ckptNames { | |||
if !cloudbrainTask.IsModelFileExists(m, ckptName) { | |||
isExists, size := cloudbrainTask.CheckAndGetFileSize(m, ckptName) | |||
if !isExists { | |||
log.Error("model file not exist.name = %s", ckptName) | |||
return nil, response.MODEL_NOT_EXISTS | |||
} | |||
@@ -90,15 +92,18 @@ func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.Con | |||
storageType = entity.MINIO | |||
} | |||
} | |||
uploader := upload.SelectUploaderFromStorageType(storageType) | |||
uploader := storage_helper.SelectUploaderFromStorageType(storageType) | |||
modelData := entity.ContainerData{ | |||
Name: form.PretrainModelName, | |||
Name: ckptName, | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
ObjectKey: preTrainModelPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + ckptName, | |||
ContainerPath: path.Join(b.Opts.ContainerPath, ckptName), | |||
RealPath: uploader.GetRealPath(preTrainModelPath), | |||
S3DownloadUrl: uploader.GetS3DownloadUrl(preTrainModelPath), | |||
IsDir: false, | |||
Size: size, | |||
} | |||
preTrainModelEntity = append(preTrainModelEntity, modelData) | |||
} | |||
@@ -4,6 +4,7 @@ import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
"encoding/json" | |||
) | |||
type CreationContext struct { | |||
@@ -17,7 +18,7 @@ type CreationContext struct { | |||
Response *entity.CreationResponse | |||
SourceCloudbrain *models.Cloudbrain | |||
NewCloudbrain *models.Cloudbrain | |||
AITaskConfig entity.AITaskConfig | |||
Config *entity.AITaskBaseConfig | |||
} | |||
func (ctx *CreationContext) AddContainerData(t entity.ContainerDataType, d []entity.ContainerData) { | |||
@@ -46,3 +47,31 @@ func (ctx *CreationContext) WriteResponse(t entity.ContainerDataType) entity.Con | |||
} | |||
return a[0] | |||
} | |||
func (ctx *CreationContext) BuildCloudbrainConfig() *models.CloudbrainConfig { | |||
var aiConfigStr = "" | |||
s, err := json.Marshal(ctx.Config) | |||
if err == nil { | |||
aiConfigStr = string(s) | |||
} | |||
var containerDataStr = "" | |||
t, err := json.Marshal(ctx.ContainerData) | |||
if err == nil { | |||
containerDataStr = string(t) | |||
} | |||
output := ctx.GetContainerData(entity.ContainerOutPutPath) | |||
log := ctx.GetContainerData(entity.ContainerLogPath) | |||
c := &models.CloudbrainConfig{ | |||
ConfigurationSnapshot: aiConfigStr, | |||
OutputBucket: output.Bucket, | |||
OutputObjectPrefix: output.ObjectKey, | |||
OutputStorageType: string(output.StorageType), | |||
OutputEndpoint: output.EndPoint, | |||
LogBucket: log.Bucket, | |||
LogObjectPrefix: log.ObjectKey, | |||
LogStorageType: string(log.StorageType), | |||
LogEndpoint: log.EndPoint, | |||
ContainerDataSnapshot: containerDataStr, | |||
} | |||
return c | |||
} |
@@ -55,33 +55,28 @@ func GetModelScheduleStatus(jobId string) (models.ModelMigrateStatus, error) { | |||
return record.Status, nil | |||
} | |||
func RetryModelMigrate(jobId string) error { | |||
job, err := models.GetCloudbrainByJobID(jobId) | |||
if err != nil { | |||
log.Error("RetryModelMigrate GetCloudbrainByJobID err.jobId=%s err=%v", jobId, err) | |||
return errors.New("jobId not correct") | |||
} | |||
func RetryModelMigrate(job *models.Cloudbrain) error { | |||
if !job.IsTerminal() { | |||
log.Info("RetryModelMigrate job is not terminal.jobId=%s", jobId) | |||
log.Info("RetryModelMigrate job is not terminal.id=%s", job.ID) | |||
return errors.New("task is not terminal") | |||
} | |||
//避免并发问题,先尝试获取锁,获取锁以后再查最新的记录 | |||
lock := redis_lock.NewDistributeLock(redis_key.RecordHandleLock(jobId)) | |||
lock := redis_lock.NewDistributeLock(redis_key.RecordHandleLock(job.JobID)) | |||
success, err := lock.LockWithWait(10*time.Second, 10*time.Second) | |||
if err != nil { | |||
log.Error("HandleUnfinishedMigrateRecord lock err.jobId=%d %v", jobId, err) | |||
log.Error("HandleUnfinishedMigrateRecord lock err.id=%d %v", job.ID, err) | |||
return err | |||
} | |||
if !success { | |||
log.Error("HandleUnfinishedMigrateRecord lock failed.ID=%d ", jobId) | |||
log.Error("HandleUnfinishedMigrateRecord lock failed.ID=%d ", job.ID) | |||
return nil | |||
} | |||
defer lock.UnLock() | |||
record, err := models.GetModelMigrateRecordByCloudbrainId(job.ID) | |||
if err != nil { | |||
log.Error("RetryModelMigrate GetModelMigrateRecordByCloudbrainId err.jobId=%s err=%v", jobId, err) | |||
log.Error("RetryModelMigrate GetModelMigrateRecordByCloudbrainId err.id=%s err=%v", job.ID, err) | |||
if models.IsErrRecordNotExist(err) { | |||
return nil | |||
} | |||
@@ -91,7 +86,7 @@ func RetryModelMigrate(jobId string) error { | |||
//只有两种情况可以再次调度,一是虎鲸调度失败 二是本地移桶失败 | |||
if record.CurrentStep == models.GrampusMigrateFailed { | |||
log.Info("retry PostModelMigrate. record.id = %d", record.ID) | |||
_, err := grampus.PostModelMigrate(jobId) | |||
_, err := grampus.PostModelMigrate(job.JobID) | |||
if err != nil { | |||
log.Error("PostModelMigrate err.%v", err) | |||
return err | |||
@@ -217,6 +212,11 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe | |||
} | |||
log.Info("DestObjectKey", r.DestObjectKey) | |||
if strings.Contains(r.DestObjectKey, ".") { | |||
isExists, _ := storage.IsObjectExist4Obs(r.DestBucket, r.DestObjectKey) | |||
if !isExists { | |||
//此时没有文件需要解压迁移,直接更新为成功 | |||
models.UpdateModelMigrateStatusByStep(r, models.BucketMoveSuccess) | |||
} | |||
decompress(r.DestBucket+"/"+r.DestObjectKey, setting.Bucket+"/"+strings.TrimSuffix(r.DestObjectKey, models.ModelSuffix)) | |||
} else { //如果是文件夹,遍历文件 | |||
@@ -225,7 +225,10 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe | |||
log.Error("UpdateModelMigrateStatusByStep err. r.ID=%d step=%d err=%v", r.ID, models.BucketMoveFailed, err) | |||
return err | |||
} | |||
if len(fileInfos) == 0 { | |||
//此时没有文件需要解压迁移,直接更新为成功 | |||
models.UpdateModelMigrateStatusByStep(r, models.BucketMoveSuccess) | |||
} | |||
for _, fileInfo := range fileInfos { | |||
log.Info("decompress file:", fileInfo.FileName) | |||
sourceFilPath := r.DestBucket + "/" + r.DestObjectKey + fileInfo.FileName | |||
@@ -311,7 +314,7 @@ func updateModelMigrateFromRes(r *models.ModelMigrateRecord, res *models.Grampus | |||
} | |||
func MoveBucketInOpenIMinio(objectKeyPrefix, targetObjectPrefix, oldBucket, newBucket string) error { | |||
var core = storage.ScheduleMinioCore | |||
var core = storage.MinioCore | |||
objectInfo := core.Client.ListObjects(oldBucket, objectKeyPrefix, true, nil) | |||
log.Info("MoveBucketInOpenIMinio start.objectKeyPrefix=%s", objectKeyPrefix) | |||
count := 0 | |||
@@ -0,0 +1,48 @@ | |||
package storage_helper | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/storage" | |||
"io" | |||
"strings" | |||
) | |||
type UploaderConfig struct { | |||
Bucket string | |||
Endpoint string | |||
} | |||
type StorageHelper interface { | |||
UploadDir(codePath, jobName string) error | |||
GetRealPath(objectKey string) string | |||
GetBucket() string | |||
GetEndpoint() string | |||
GetJobDefaultObjectKeyPrefix(jobName string) string | |||
MKDIR(path string) error | |||
GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error) | |||
GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error) | |||
OpenFile(path string) (io.ReadCloser, error) | |||
GetSignedDownloadUrl(key string) (string, error) | |||
GetS3DownloadUrl(key string) string | |||
CopyByPath(sourcePath, targetPath string, filterSuffix []string) error | |||
} | |||
func SelectUploaderFromStorageType(storageType entity.StorageType) StorageHelper { | |||
switch storageType { | |||
case entity.OBS: | |||
return &OBSHelper{} | |||
case entity.MINIO: | |||
return &MinioHelper{} | |||
} | |||
return nil | |||
} | |||
func isMatchSuffix(fileName string, filterSuffix []string) bool { | |||
for _, s := range filterSuffix { | |||
if strings.HasSuffix(fileName, s) { | |||
return true | |||
} | |||
} | |||
return false | |||
} |
@@ -0,0 +1,203 @@ | |||
package storage_helper | |||
import ( | |||
"bytes" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"fmt" | |||
"github.com/minio/minio-go" | |||
"io" | |||
"net/url" | |||
"path" | |||
"sort" | |||
"strings" | |||
"time" | |||
) | |||
type MinioHelper struct { | |||
} | |||
func (m *MinioHelper) UploadDir(codePath, objectKeyPrefix string) error { | |||
return UploadDirToMinio(codePath, objectKeyPrefix, "") | |||
} | |||
func (m *MinioHelper) GetJobDefaultObjectKeyPrefix(jobName string) string { | |||
return path.Join(setting.CBCodePathPrefix, jobName) | |||
} | |||
func (m *MinioHelper) GetRealPath(objectKey string) string { | |||
return setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + strings.TrimPrefix(objectKey, "/") | |||
} | |||
func (m *MinioHelper) GetBucket() string { | |||
return setting.Attachment.Minio.Bucket | |||
} | |||
func (m *MinioHelper) GetEndpoint() string { | |||
return setting.Attachment.Minio.Endpoint | |||
} | |||
const README = "README" | |||
func (m *MinioHelper) MKDIR(path string) error { | |||
//无法直接创建空文件夹,上传一个readme文件模拟 | |||
path = strings.TrimSuffix(path, "/") + "/" + README | |||
val := "You can put the files into this directory and download the files by the web page." | |||
_, err := storage.Attachments.UploadContent(m.GetBucket(), path, bytes.NewReader([]byte(val))) | |||
return err | |||
} | |||
func (m *MinioHelper) OpenFile(objectKey string) (io.ReadCloser, error) { | |||
reader, _, err := storage.MinioCore.GetObject(m.GetBucket(), objectKey, minio.GetObjectOptions{}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return reader, nil | |||
} | |||
func (m *MinioHelper) GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error) { | |||
if !strings.HasSuffix(dirPath, "/") { | |||
dirPath += "/" | |||
} | |||
maxKey := setting.OUTPUT_SHOW_MAX_KEY | |||
if len(maxKeyArray) > 0 { | |||
maxKey = maxKeyArray[0] | |||
} | |||
r, err := storage.MinioCore.ListObjectsV2(m.GetBucket(), dirPath, "", false, "/", maxKey, "") | |||
if err != nil { | |||
return nil, err | |||
} | |||
list := r.Contents | |||
fileInfos := make([]storage.FileInfo, 0) | |||
prefixLen := len(dirPath) | |||
for _, val := range list { | |||
var fileName string | |||
if val.Key == dirPath { | |||
continue | |||
} | |||
fileName = val.Key[prefixLen:] | |||
fileInfo := storage.FileInfo{ | |||
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"), | |||
FileName: fileName, | |||
Size: val.Size, | |||
IsDir: false, | |||
RelativePath: dirPath + fileName, | |||
} | |||
fileInfos = append(fileInfos, fileInfo) | |||
} | |||
for _, val := range r.CommonPrefixes { | |||
fileName := strings.TrimSuffix(strings.TrimPrefix(val.Prefix, dirPath), "/") | |||
fileInfo := storage.FileInfo{ | |||
FileName: fileName, | |||
IsDir: true, | |||
RelativePath: dirPath + "/" + fileName, | |||
} | |||
fileInfos = append(fileInfos, fileInfo) | |||
} | |||
return fileInfos, nil | |||
} | |||
func (m *MinioHelper) GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error) { | |||
prefix = strings.TrimSuffix(prefix, "/") + "/" | |||
prefixLen := len(prefix) | |||
delimiter := "" | |||
marker := "" | |||
index := 1 | |||
fileInfoList := storage.FileInfoList{} | |||
maxKey := setting.OUTPUT_DOWNLOAD_MAX_KEY | |||
if len(maxKeyArray) > 0 { | |||
maxKey = maxKeyArray[0] | |||
} | |||
for { | |||
output, err := storage.MinioCore.ListObjects(m.GetBucket(), prefix, marker, delimiter, maxKey) | |||
if err == nil { | |||
log.Info("Page:%d\n", index) | |||
index++ | |||
for _, val := range output.Contents { | |||
var isDir bool | |||
if prefixLen == len(val.Key) { | |||
continue | |||
} | |||
if strings.HasSuffix(val.Key, "/") { | |||
isDir = true | |||
} else { | |||
isDir = false | |||
} | |||
if isDir { | |||
continue | |||
} | |||
fileInfo := storage.FileInfo{ | |||
ModTime: val.LastModified.Format("2006-01-02 15:04:05"), | |||
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"), | |||
Size: val.Size, | |||
IsDir: isDir, | |||
ParenDir: "", | |||
RelativePath: val.Key, | |||
} | |||
fileInfoList = append(fileInfoList, fileInfo) | |||
} | |||
if output.IsTruncated { | |||
marker = output.NextMarker | |||
} else { | |||
break | |||
} | |||
} else { | |||
log.Info("list error." + err.Error()) | |||
return nil, err | |||
} | |||
} | |||
sort.Sort(fileInfoList) | |||
return fileInfoList, nil | |||
} | |||
func (m *MinioHelper) GetSignedDownloadUrl(key string) (string, error) { | |||
fileName := key[strings.LastIndex(key, "/"):] | |||
fileName = strings.TrimPrefix(fileName, "/") | |||
if fileName == "" { | |||
fileName = fmt.Sprint(time.Now().Unix()) | |||
} | |||
reqParams := make(url.Values) | |||
reqParams.Set("response-content-disposition", "attachment; filename=\""+fileName+"\"") | |||
var preURL *url.URL | |||
preURL, err := storage.MinioCore.PresignedGetObject(m.GetBucket(), key, storage.PresignedGetUrlExpireTime, reqParams) | |||
if err != nil { | |||
return "", err | |||
} | |||
return preURL.String(), nil | |||
} | |||
func (m *MinioHelper) GetS3DownloadUrl(key string) string { | |||
return "" | |||
} | |||
func (m *MinioHelper) CopyByPath(sourcePath, targetPath string, filterSuffix []string) error { | |||
log.Info("CopyByPath sourcePath=%s,targetPath=%s", sourcePath, targetPath) | |||
allFiles, _ := m.GetAllObjectsUnderDir(sourcePath) | |||
var fileNames []string | |||
for _, file := range allFiles { | |||
if isMatchSuffix(file.FileName, filterSuffix) { | |||
continue | |||
} | |||
fileNames = append(fileNames, file.FileName) | |||
} | |||
log.Info("Previous task all files", fileNames) | |||
if len(fileNames) == 0 { | |||
return nil | |||
} | |||
for _, file := range fileNames { | |||
srcObjectName := path.Join(sourcePath, file) | |||
destObjectName := path.Join(targetPath, file) | |||
_, err := storage.MinioCore.Client.StatObject(m.GetBucket(), srcObjectName, minio.StatObjectOptions{}) | |||
if err != nil { | |||
log.Info("Get file error:" + err.Error()) | |||
} | |||
_, err = storage.MinioCore.CopyObject(m.GetBucket(), srcObjectName, m.GetBucket(), destObjectName, map[string]string{}) | |||
if err != nil { | |||
log.Error("CopyByPath MinioCopyFiles error. sourcePath=%s targetPath=%s err=%v", sourcePath, targetPath, err) | |||
return err | |||
} | |||
} | |||
return nil | |||
} |
@@ -0,0 +1,225 @@ | |||
package storage_helper | |||
import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/obs" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"io" | |||
"net/url" | |||
"path" | |||
"sort" | |||
"strings" | |||
) | |||
type OBSHelper struct { | |||
} | |||
func (m *OBSHelper) UploadDir(codePath, objectKeyPrefix string) error { | |||
return UploadDirToObs(codePath, objectKeyPrefix, "") | |||
} | |||
func (m *OBSHelper) GetJobDefaultObjectKeyPrefix(jobName string) string { | |||
return path.Join(setting.CodePathPrefix, jobName) | |||
} | |||
func (m *OBSHelper) GetRealPath(objectKey string) string { | |||
return "" | |||
} | |||
func (m *OBSHelper) GetBucket() string { | |||
return setting.Bucket | |||
} | |||
func (m *OBSHelper) MKDIR(path string) error { | |||
path = strings.TrimSuffix(path, "/") + "/" | |||
input := &obs.PutObjectInput{} | |||
input.Bucket = setting.Bucket | |||
input.Key = path | |||
_, err := storage.ObsCli.PutObject(input) | |||
if err != nil { | |||
log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (m *OBSHelper) GetEndpoint() string { | |||
index := strings.Index(setting.Endpoint, "//") | |||
endpoint := setting.Endpoint[index+2:] | |||
return endpoint | |||
} | |||
func (m *OBSHelper) GetOneLevelObjectsUnderDir(dirPath string, maxKeyArray ...int) ([]storage.FileInfo, error) { | |||
input := &obs.ListObjectsInput{} | |||
input.Bucket = m.GetBucket() | |||
input.Prefix = dirPath | |||
input.Delimiter = "/" | |||
maxKey := setting.OUTPUT_SHOW_MAX_KEY | |||
if len(maxKeyArray) > 0 { | |||
maxKey = maxKeyArray[0] | |||
} | |||
input.MaxKeys = maxKey | |||
if !strings.HasSuffix(input.Prefix, "/") { | |||
input.Prefix += "/" | |||
} | |||
fileInfos := make([]storage.FileInfo, 0) | |||
prefixLen := len(input.Prefix) | |||
index := 1 | |||
output, err := storage.ObsCli.ListObjects(input) | |||
if err != nil { | |||
if obsError, ok := err.(obs.ObsError); ok { | |||
log.Error("Code:%s, Message:%s", obsError.Code, obsError.Message) | |||
} | |||
return nil, err | |||
} | |||
log.Info("Page:%d\n", index) | |||
index++ | |||
for _, val := range output.Contents { | |||
var fileName string | |||
if val.Key == input.Prefix { | |||
continue | |||
} | |||
fileName = val.Key[prefixLen:] | |||
fileInfo := storage.FileInfo{ | |||
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"), | |||
FileName: fileName, | |||
Size: val.Size, | |||
IsDir: false, | |||
RelativePath: dirPath + "/" + fileName, | |||
} | |||
fileInfos = append(fileInfos, fileInfo) | |||
} | |||
for _, val := range output.CommonPrefixes { | |||
fileName := strings.TrimSuffix(strings.TrimPrefix(val, input.Prefix), "/") | |||
fileInfo := storage.FileInfo{ | |||
FileName: fileName, | |||
IsDir: true, | |||
} | |||
fileInfos = append(fileInfos, fileInfo) | |||
} | |||
return fileInfos, nil | |||
} | |||
func (m *OBSHelper) GetAllObjectsUnderDir(prefix string, maxKeyArray ...int) ([]storage.FileInfo, error) { | |||
prefix = strings.TrimSuffix(prefix, "/") + "/" | |||
bucket := m.GetBucket() | |||
input := &obs.ListObjectsInput{} | |||
input.Bucket = bucket | |||
input.MaxKeys = 1000 | |||
input.Prefix = prefix | |||
maxKey := setting.OUTPUT_DOWNLOAD_MAX_KEY | |||
if len(maxKeyArray) > 0 { | |||
maxKey = maxKeyArray[0] | |||
} | |||
input.MaxKeys = maxKey | |||
index := 1 | |||
fileInfoList := storage.FileInfoList{} | |||
prefixLen := len(prefix) | |||
log.Info("full obs path:", input.Bucket+input.Prefix) | |||
log.Info("prefix=" + input.Prefix) | |||
for { | |||
output, err := storage.ObsCli.ListObjects(input) | |||
if err == nil { | |||
log.Info("Page:%d\n", index) | |||
index++ | |||
for _, val := range output.Contents { | |||
var isDir bool | |||
if prefixLen == len(val.Key) { | |||
continue | |||
} | |||
if strings.HasSuffix(val.Key, "/") { | |||
isDir = true | |||
} else { | |||
isDir = false | |||
} | |||
if isDir { | |||
continue | |||
} | |||
fileInfo := storage.FileInfo{ | |||
ModTime: val.LastModified.Format("2006-01-02 15:04:05"), | |||
FileName: strings.TrimPrefix(val.Key[prefixLen:], "/"), | |||
Size: val.Size, | |||
IsDir: isDir, | |||
ParenDir: "", | |||
RelativePath: val.Key, | |||
} | |||
fileInfoList = append(fileInfoList, fileInfo) | |||
} | |||
if output.IsTruncated { | |||
input.Marker = output.NextMarker | |||
} else { | |||
break | |||
} | |||
} else { | |||
if obsError, ok := err.(obs.ObsError); ok { | |||
log.Info("Code:%s\n", obsError.Code) | |||
log.Info("Message:%s\n", obsError.Message) | |||
} | |||
return nil, err | |||
} | |||
} | |||
sort.Sort(fileInfoList) | |||
return fileInfoList, nil | |||
} | |||
func (m *OBSHelper) OpenFile(path string) (io.ReadCloser, error) { | |||
input := &obs.GetObjectInput{} | |||
input.Bucket = m.GetBucket() | |||
input.Key = path | |||
output, err := storage.ObsCli.GetObject(input) | |||
if err != nil { | |||
log.Error("OpenFile err. path=%s err=%v", path, err) | |||
return nil, err | |||
} | |||
return output.Body, nil | |||
} | |||
func (m *OBSHelper) GetSignedDownloadUrl(key string) (string, error) { | |||
input := &obs.CreateSignedUrlInput{} | |||
input.Bucket = m.GetBucket() | |||
input.Key = key | |||
input.Expires = 60 * 60 | |||
input.Method = obs.HttpMethodGet | |||
comma := strings.LastIndex(key, "/") | |||
filename := key | |||
if comma != -1 { | |||
filename = key[comma+1:] | |||
} | |||
reqParams := make(map[string]string) | |||
filename = url.PathEscape(filename) | |||
reqParams["response-content-disposition"] = "attachment; filename=\"" + filename + "\"" | |||
input.QueryParams = reqParams | |||
output, err := storage.ObsCli.CreateSignedUrl(input) | |||
if err != nil { | |||
log.Error("CreateSignedUrl failed:", err.Error()) | |||
return "", err | |||
} | |||
return output.SignedUrl, nil | |||
} | |||
func (m *OBSHelper) GetS3DownloadUrl(key string) string { | |||
return "s3://" + setting.Bucket + "/" + strings.TrimPrefix(key, "/") | |||
} | |||
func (m *OBSHelper) CopyByPath(sourcePath, targetPath string, filterSuffix []string) error { | |||
log.Info("CopyByPath sourcePath=%s,targetPath=%s", sourcePath, targetPath) | |||
allFiles, _ := m.GetAllObjectsUnderDir(sourcePath) | |||
var fileNames []string | |||
for _, file := range allFiles { | |||
if isMatchSuffix(file.FileName, filterSuffix) { | |||
continue | |||
} | |||
fileNames = append(fileNames, file.FileName) | |||
} | |||
log.Info("Previous task all files", fileNames) | |||
_, err := storage.ObsCopyManyFile(m.GetBucket(), sourcePath, m.GetBucket(), targetPath, fileNames) | |||
if err != nil { | |||
log.Error("CopyByPath ObsCopyManyFile error. sourcePath=%s targetPath=%s err=%v", sourcePath, targetPath, err) | |||
return err | |||
} | |||
return nil | |||
} |
@@ -1,4 +1,4 @@ | |||
package upload | |||
package storage_helper | |||
import ( | |||
"bufio" |
@@ -3,14 +3,12 @@ package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"strconv" | |||
"strings" | |||
) | |||
@@ -23,60 +21,31 @@ func init() { | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.OpenICloudbrainOne, | |||
JobType: models.JobTypeDebug, | |||
Config: GetCloudbrainOneNotebookConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainOne, t) | |||
} | |||
func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
if opts.IsFileNoteBookRequest { | |||
return entity.AITaskConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
}, | |||
}, | |||
} | |||
} | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
func GetCloudbrainOneNotebookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
//默认配置 | |||
config := &entity.AITaskBaseConfig{ | |||
ActionType: models.ActionCreateDebugGPUTask, | |||
IsActionUseJobId: false, | |||
DatasetsLimitSizeGB: setting.DebugAttachSize, | |||
DatasetsMaxNum: setting.MaxDatasetNum, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
Uncompressed: true, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/dataset", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
Uncompressed: true, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/pretrainmodel", | |||
@@ -84,22 +53,60 @@ func (g CloudbrainOneNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfig | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/model", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
ContainerPath: "/model", | |||
StorageRelativePath: cloudbrain.ModelMountPath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
MKDIR: true, | |||
}, | |||
}, | |||
} | |||
//在线运行notebook配置 | |||
if opts.IsFileNoteBookRequest { | |||
config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
Uncompressed: true, | |||
}, | |||
}, | |||
} | |||
} | |||
return config | |||
} | |||
func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (t CloudbrainOneNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.BuildRequest4Restart). | |||
Next(t.CheckOutput4Restart). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckModel). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.BuildContainerData). | |||
@@ -119,17 +126,6 @@ func (t CloudbrainOneNotebookTaskTemplate) Restart(ctx *context.CreationContext) | |||
} | |||
func (c CloudbrainOneNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
@@ -175,17 +171,3 @@ func (g CloudbrainOneNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC | |||
//云脑一没有再次调试接口,通过使用同样的参数新建接口来模拟 | |||
return g.CallCreationAPI(ctx) | |||
} | |||
func (CloudbrainOneNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
jobID := ctx.Response.JobID | |||
task, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugGPUTask) | |||
return nil | |||
} |
@@ -0,0 +1,173 @@ | |||
package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"strings" | |||
) | |||
type CloudbrainOneTrainTaskTemplate struct { | |||
DefaultAITaskTemplate | |||
} | |||
func init() { | |||
t := &CloudbrainOneTrainTaskTemplate{ | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.OpenICloudbrainOne, | |||
JobType: models.JobTypeTrain, | |||
Config: GetCloudbrainOneTrainConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeTrain, entity.OpenICloudbrainOne, t) | |||
} | |||
func GetCloudbrainOneTrainConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
config := &entity.AITaskBaseConfig{ | |||
ActionType: models.ActionCreateGPUTrainTask, | |||
IsActionUseJobId: true, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
Uncompressed: true, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/dataset", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
Uncompressed: true, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/pretrainmodel", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/model", | |||
StorageRelativePath: cloudbrain.ModelMountPath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
}, | |||
entity.ContainerLogPath: { | |||
ContainerPath: "/model", | |||
StorageRelativePath: cloudbrain.ModelMountPath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
MKDIR: true, | |||
}, | |||
}} | |||
return config | |||
} | |||
func (t CloudbrainOneTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.HandleReqParameters). | |||
Next(t.CheckPrivilege4Continue). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckWorkerNum). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckBootFile). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
req := entity.CreateTrainTaskRequest{ | |||
Name: form.JobName, | |||
DisplayJobName: form.DisplayJobName, | |||
Tasks: []entity.TrainTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
}, | |||
}, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateTrainJob(req) | |||
if err != nil { | |||
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
return nil | |||
} | |||
func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
req := entity.CreateTrainTaskRequest{ | |||
Name: form.JobName, | |||
DisplayJobName: form.DisplayJobName, | |||
Tasks: []entity.TrainTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
}, | |||
}, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateTrainJob(req) | |||
if err != nil { | |||
log.Error("CloudbrainOneTrainTaskTemplate CallRestartAPI err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
return nil | |||
} |
@@ -5,15 +5,12 @@ import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/convert" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
api "code.gitea.io/gitea/modules/structs" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
"strconv" | |||
"strings" | |||
) | |||
@@ -26,43 +23,19 @@ func init() { | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.OpenICloudbrainTwo, | |||
JobType: models.JobTypeDebug, | |||
Config: GetCloudbrainTwoNotebookConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainTwo, t) | |||
} | |||
func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
if opts.IsFileNoteBookRequest { | |||
return entity.AITaskConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
}, | |||
} | |||
} | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
func GetCloudbrainTwoNotebookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
//默认配置 | |||
config := &entity.AITaskBaseConfig{ | |||
ActionType: models.ActionCreateDebugNPUTask, | |||
IsActionUseJobId: false, | |||
DatasetsLimitSizeGB: setting.DebugAttachSize, | |||
DatasetsMaxNum: setting.MaxDatasetNum, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
Disable: true, | |||
@@ -78,17 +51,46 @@ func (g CloudbrainTwoNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfig | |||
}, | |||
}, | |||
} | |||
//在线运行notebook配置 | |||
if opts.IsFileNoteBookRequest { | |||
config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
}, | |||
} | |||
} | |||
return config | |||
} | |||
func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (t CloudbrainTwoNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.BuildRequest4Restart). | |||
Next(t.CheckOutput4Restart). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckModel). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckIsCleared). | |||
Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CallRestartAPI). | |||
@@ -166,31 +168,6 @@ func (g CloudbrainTwoNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC | |||
return nil | |||
} | |||
func (c CloudbrainTwoNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (CloudbrainTwoNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
jobID := ctx.Response.JobID | |||
task, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugNPUTask) | |||
return nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) { | |||
var aiCenterCode = models.AICenterOfCloudBrainTwo | |||
if setting.ModelartsCD.Enabled { | |||
@@ -0,0 +1,149 @@ | |||
package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"encoding/json" | |||
"strings" | |||
) | |||
type CloudbrainTwoTrainTaskTemplate struct { | |||
DefaultAITaskTemplate | |||
} | |||
func init() { | |||
t := &CloudbrainTwoTrainTaskTemplate{ | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.OpenICloudbrainTwo, | |||
JobType: models.JobTypeTrain, | |||
Config: GetCloudbrainTwoTrainConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeTrain, entity.OpenICloudbrainTwo, t) | |||
} | |||
func GetCloudbrainTwoTrainConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
var config = &entity.AITaskBaseConfig{ | |||
ActionType: models.ActionCreateTrainTask, | |||
IsActionUseJobId: true, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
Uncompressed: true, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/dataset", | |||
ReadOnly: true, | |||
Uncompressed: true, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/pretrainmodel", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/output", | |||
StorageRelativePath: "/output" + models.CloudbrainTwoDefaultVersion, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
MKDIR: true, | |||
}, | |||
entity.ContainerLogPath: { | |||
ContainerPath: "/log", | |||
StorageRelativePath: "/log" + models.CloudbrainTwoDefaultVersion, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
MKDIR: true, | |||
}, | |||
}, | |||
} | |||
return config | |||
} | |||
func (t CloudbrainTwoTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.HandleReqParameters). | |||
Next(t.CheckPrivilege4Continue). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckWorkerNum). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckBootFile). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g CloudbrainTwoTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
return response.SYSTEM_ERROR | |||
} | |||
var resourcePools modelarts.ResourcePool | |||
if err := json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { | |||
log.Error("Unmarshal error. %v", err) | |||
return response.NewBizError(err) | |||
} else if len(resourcePools.Info) == 0 { | |||
log.Error("UresourcePools.Info is empty. %v", err) | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
req := entity.CreateTrainTaskRequest{ | |||
Name: form.JobName, | |||
DisplayJobName: form.DisplayJobName, | |||
Description: form.Description, | |||
Tasks: []entity.TrainTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
PoolId: resourcePools.Info[0].ID, | |||
WorkServerNumber: form.WorkServerNumber, | |||
}, | |||
}, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateTrainJob(req) | |||
if err != nil { | |||
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
VersionID: res.VersionID, | |||
VersionName: res.VersionName, | |||
} | |||
return nil | |||
} |
@@ -1,19 +1,15 @@ | |||
package task | |||
import ( | |||
"encoding/json" | |||
"strconv" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
) | |||
@@ -26,53 +22,52 @@ func init() { | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.C2Net, | |||
JobType: models.JobTypeDebug, | |||
Config: GetGrampusNoteBookConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeDebug, entity.C2Net, t) | |||
} | |||
func (t GrampusNoteBookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g GrampusNoteBookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
containerPrefix := "" | |||
func GetGrampusNoteBookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
codePath := "/code" | |||
datasetPath := "/dataset" | |||
pretrainModelPath := "/pretrainmodel" | |||
config := &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
}, | |||
} | |||
if opts.ComputeSource == models.NPU || opts.ComputeSource == models.DCU { | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: containerPrefix + codePath, | |||
ContainerPath: codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: containerPrefix + datasetPath, | |||
ContainerPath: datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: containerPrefix + pretrainModelPath, | |||
ContainerPath: pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
@@ -81,39 +76,77 @@ func (g GrampusNoteBookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) | |||
} | |||
if opts.ComputeSource == models.GCU { | |||
containerPrefix = "/tmp" | |||
config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/tmp" + codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/tmp" + datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/tmp" + pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
}, | |||
} | |||
} | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: containerPrefix + codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: containerPrefix + datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: containerPrefix + pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
}, | |||
switch opts.ComputeSource { | |||
case models.NPU: | |||
config.ActionType = models.ActionCreateGrampusNPUDebugTask | |||
case models.GPU: | |||
config.ActionType = models.ActionCreateGrampusGPUDebugTask | |||
case models.GCU: | |||
config.ActionType = models.ActionCreateGrampusGCUDebugTask | |||
case models.MLU: | |||
config.ActionType = models.ActionCreateGrampusMLUDebugTask | |||
case models.DCU: | |||
config.ActionType = models.ActionCreateGrampusDCUDebugTask | |||
case models.CPU: | |||
config.ActionType = models.ActionCreateSuperComputeTask | |||
} | |||
config.IsActionUseJobId = false | |||
config.DatasetsLimitSizeGB = setting.DebugAttachSize | |||
config.DatasetsMaxNum = setting.MaxDatasetNum | |||
return config | |||
} | |||
func (t GrampusNoteBookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (t GrampusNoteBookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.BuildRequest4Restart). | |||
Next(t.CheckOutput4Restart). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckModel). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CallRestartAPI). | |||
@@ -131,17 +164,6 @@ func (t GrampusNoteBookTaskTemplate) Restart(ctx *context.CreationContext) (*ent | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID, Status: ctx.NewCloudbrain.Status}, nil | |||
} | |||
func (c GrampusNoteBookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
var autoStopDurationMs = int64(4 * 60 * 60 * 1000) | |||
func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
@@ -172,9 +194,6 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex | |||
}, | |||
}, | |||
} | |||
reqJson, _ := json.Marshal(req) | |||
log.Info("DCU REQ:" + string(reqJson)) | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateNoteBook(req) | |||
if err != nil { | |||
@@ -216,33 +235,3 @@ func (g GrampusNoteBookTaskTemplate) CallRestartAPI(ctx *context.CreationContext | |||
} | |||
return nil | |||
} | |||
func (GrampusNoteBookTaskTemplate) CheckOldJobPath(ctx *context.CreationContext) *response.BizError { | |||
return nil | |||
} | |||
func (GrampusNoteBookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
//todo 需要进一步优化 | |||
var actionType models.ActionType | |||
switch req.ComputeSource.Name { | |||
case models.NPU: | |||
actionType = models.ActionCreateGrampusNPUDebugTask | |||
case models.GPU: | |||
actionType = models.ActionCreateGrampusGPUDebugTask | |||
case models.GCU: | |||
actionType = models.ActionCreateGrampusGCUDebugTask | |||
case models.MLU: | |||
actionType = models.ActionCreateGrampusMLUDebugTask | |||
case models.DCU: | |||
actionType = models.ActionCreateGrampusDCUDebugTask | |||
} | |||
task, err := models.GetCloudbrainByCloudbrainID(ctx.NewCloudbrain.ID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, actionType) | |||
return nil | |||
} |
@@ -1,17 +1,14 @@ | |||
package task | |||
import ( | |||
"strconv" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
) | |||
@@ -25,65 +22,69 @@ func init() { | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.C2Net, | |||
JobType: models.JobTypeOnlineInference, | |||
Config: GetGrampusOnlineInferConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeOnlineInference, entity.C2Net, t) | |||
} | |||
func (t GrampusOnlineInferTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
log.Info("GrampusOnlineInferTaskTemplate create") | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckBootFile). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g GrampusOnlineInferTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
containerPrefix := "" | |||
func GetGrampusOnlineInferConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
codePath := "/code" | |||
datasetPath := "/dataset" | |||
pretrainModelPath := "/pretrainmodel" | |||
outputPath := "/output" | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
config := &entity.AITaskBaseConfig{ | |||
ActionType: models.ActionCreateGrampusGPUOnlineInferTask, | |||
IsActionUseJobId: false, | |||
DatasetsLimitSizeGB: setting.DebugAttachSize, | |||
DatasetsMaxNum: setting.MaxDatasetNum, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: containerPrefix + codePath, | |||
ContainerPath: codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: containerPrefix + datasetPath, | |||
ContainerPath: datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: containerPrefix + pretrainModelPath, | |||
ContainerPath: pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: containerPrefix + outputPath, | |||
ContainerPath: outputPath, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
MKDIR: false, | |||
}, | |||
}, | |||
} | |||
return config | |||
} | |||
func (t GrampusOnlineInferTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
log.Info("GrampusOnlineInferTaskTemplate create") | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckBootFile). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
@@ -129,10 +130,7 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon | |||
res, err := c.CreateOnlineInfer(req) | |||
if err != nil { | |||
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
ctx.Response = &entity.CreationResponse{ | |||
Error: err, | |||
} | |||
return nil | |||
return response.NewBizError(err) | |||
} | |||
if res.JobID == "" { | |||
log.Error("GrampusNoteBookTask CreateNoteBook failed.Cloudbrain.JobID=%s", ctx.SourceCloudbrain.JobID) | |||
@@ -159,30 +157,3 @@ func (g GrampusOnlineInferTaskTemplate) LoadSpec(ctx *context.CreationContext) * | |||
ctx.Spec = spec | |||
return nil | |||
} | |||
func (c GrampusOnlineInferTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c GrampusOnlineInferTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
//todo 需要进一步优化 | |||
var actionType models.ActionType | |||
actionType = models.ActionCreateGrampusGPUOnlineInferTask | |||
task, err := models.GetCloudbrainByCloudbrainID(ctx.NewCloudbrain.ID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, actionType) | |||
return nil | |||
} |
@@ -3,11 +3,11 @@ package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"strings" | |||
) | |||
@@ -21,25 +21,105 @@ func init() { | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.C2Net, | |||
JobType: models.JobTypeTrain, | |||
Config: GetGrampusTrainTaskConfig, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeTrain, entity.C2Net, t) | |||
} | |||
func GetGrampusTrainTaskConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
codePath := "/code" | |||
datasetPath := "/dataset" | |||
pretrainModelPath := "/pretrainmodel" | |||
outputPath := "/output" | |||
var config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/tmp" + codePath, | |||
StorageRelativePath: codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/tmp" + datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/tmp" + pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/tmp" + outputPath, | |||
StorageRelativePath: cloudbrain.ModelMountPath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
MKDIR: false, | |||
}, | |||
}, | |||
} | |||
if opts.ComputeSource == models.NPU { | |||
config = &entity.AITaskBaseConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/cache" + codePath, | |||
StorageRelativePath: codePath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/cache" + datasetPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/cache" + pretrainModelPath, | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/cache" + outputPath, | |||
StorageRelativePath: setting.OutPutPath, | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
}, | |||
} | |||
} | |||
switch opts.ComputeSource { | |||
case models.NPU: | |||
config.ActionType = models.ActionCreateGrampusNPUTrainTask | |||
case models.GPU: | |||
config.ActionType = models.ActionCreateGrampusGPUTrainTask | |||
case models.GCU: | |||
config.ActionType = models.ActionCreateGrampusGCUTrainTask | |||
} | |||
config.IsActionUseJobId = true | |||
return config | |||
} | |||
func (t GrampusTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.HandleReqParameters). | |||
Next(t.CheckPrivilege4Continue). | |||
Next(t.CheckSourceTaskIsCleared). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckBootFile). | |||
Next(t.CheckWorkerNum). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.BuildContainerData). | |||
Next(t.CallCreationAPI). | |||
Next(t.AfterCallCreationAPI4Sync). | |||
Next(t.NotifyCreation). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusTrainTask err.%v", err) | |||
log.Error("create GrampusTrainTaskTemplate err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
@@ -51,34 +131,38 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
imageUrl := strings.TrimSpace(form.ImageUrl) | |||
if form.ImageID != "" { | |||
imageUrl = "" | |||
} | |||
req := entity.CreateTrainTaskRequest{ | |||
Name: form.JobName, | |||
Name: form.JobName, | |||
DisplayJobName: form.DisplayJobName, | |||
Tasks: []entity.TrainTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerData(entity.ContainerCode), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
Models: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerData(entity.ContainerOutPutPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: imageUrl, | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
RepoName: ctx.Repository.Name, | |||
WorkServerNumber: ctx.Request.WorkServerNumber, | |||
}, | |||
}, | |||
TaskConfig: ctx.Config, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateTrainJob(req) | |||
if err != nil { | |||
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
log.Error("GrampusTrainTaskTemplate CreateTrainJob err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
ctx.Response = &entity.CreationResponse{ | |||
Error: err, | |||
} | |||
return nil | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
@@ -88,17 +172,6 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) | |||
return nil | |||
} | |||
func (c GrampusTrainTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
@@ -106,7 +179,8 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * | |||
} | |||
form := ctx.Request | |||
req := entity.CreateTrainTaskRequest{ | |||
Name: form.JobName, | |||
Name: form.JobName, | |||
DisplayJobName: form.DisplayJobName, | |||
Tasks: []entity.TrainTask{ | |||
{ | |||
Name: form.JobName, | |||
@@ -114,11 +188,11 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerData(entity.ContainerCode), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
Models: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
BootFile: form.BootFile, | |||
OutPut: ctx.GetContainerData(entity.ContainerOutPutPath), | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
Params: form.ParamArray, | |||
Spec: ctx.Spec, | |||
}, | |||
@@ -127,7 +201,7 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateTrainJob(req) | |||
if err != nil { | |||
log.Error("GrampusNoteBookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
log.Error("GrampusTrainTaskTemplate CallRestartAPI err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
@@ -137,17 +211,3 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * | |||
} | |||
return nil | |||
} | |||
func (GrampusTrainTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
jobID := ctx.Response.JobID | |||
//todo 需要进一步优化 | |||
var actionType models.ActionType | |||
if req.ComputeSource.Name == models.NPU { | |||
actionType = models.ActionCreateGrampusNPUTrainTask | |||
} else if req.ComputeSource.Name == models.GPU { | |||
actionType = models.ActionCreateGrampusGPUTrainTask | |||
} | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, jobID, req.DisplayJobName, actionType) | |||
return nil | |||
} |
@@ -31,6 +31,7 @@ type CreateOperator struct { | |||
FuncArray []CreationFuncNode | |||
} | |||
//添加同步节点 | |||
func (o *CreateOperator) Next(f ...CreateFunc) *CreateOperator { | |||
if o.FuncArray == nil { | |||
o.FuncArray = make([]CreationFuncNode, 0) | |||
@@ -39,6 +40,7 @@ func (o *CreateOperator) Next(f ...CreateFunc) *CreateOperator { | |||
return o | |||
} | |||
//添加异步节点 | |||
func (o *CreateOperator) AsyncNext(f ...CreateFunc) *CreateOperator { | |||
if o.FuncArray == nil { | |||
o.FuncArray = make([]CreationFuncNode, 0) | |||
@@ -46,6 +48,9 @@ func (o *CreateOperator) AsyncNext(f ...CreateFunc) *CreateOperator { | |||
o.FuncArray = append(o.FuncArray, CreationFuncNode{Funcs: f, IsAsync: true}) | |||
return o | |||
} | |||
//添加同步节点,参数的最后一个Fun是异常处理节点,其他的Fun是正常节点 | |||
//只有当正常节点返回error时,异常处理节点才会执行 | |||
func (o *CreateOperator) NextWithErrFun(f ...CreateFunc) *CreateOperator { | |||
if o.FuncArray == nil { | |||
o.FuncArray = make([]CreationFuncNode, 0) | |||
@@ -64,6 +69,8 @@ func (o *CreateOperator) NextWithErrFun(f ...CreateFunc) *CreateOperator { | |||
return o | |||
} | |||
//添加异步节点,参数的最后一个Fun是异常处理节点,其他的Fun是正常节点 | |||
//只有当正常节点返回error时,异常处理节点才会执行 | |||
func (o *CreateOperator) AsyncNextWithErrFun(f ...CreateFunc) *CreateOperator { | |||
if o.FuncArray == nil { | |||
o.FuncArray = make([]CreationFuncNode, 0) | |||
@@ -102,6 +109,16 @@ func runFuncNode(node CreationFuncNode, ctx *context.CreationContext) *response. | |||
combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2)) | |||
log.Error("PANIC:%v", combinedErr) | |||
} | |||
if err != nil && node.ErrFunc != nil { | |||
ctx.Response = &entity.CreationResponse{ | |||
Error: errors.New(err.DefaultMsg), | |||
} | |||
newErr := node.ErrFunc(ctx) | |||
if newErr != nil { | |||
log.Error("runFuncNode ErrFunc error.%v", err) | |||
return | |||
} | |||
} | |||
}() | |||
for _, f := range node.Funcs { | |||
err = f(ctx) | |||
@@ -110,15 +127,5 @@ func runFuncNode(node CreationFuncNode, ctx *context.CreationContext) *response. | |||
break | |||
} | |||
} | |||
if err != nil && node.ErrFunc != nil { | |||
ctx.Response = &entity.CreationResponse{ | |||
Error: errors.New(err.DefaultMsg), | |||
} | |||
newErr := node.ErrFunc(ctx) | |||
if newErr != nil { | |||
log.Error("runFuncNode ErrFunc error.%v", err) | |||
return err | |||
} | |||
} | |||
return err | |||
} |
@@ -3,7 +3,7 @@ package task | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"os" | |||
"path" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
@@ -21,18 +21,18 @@ import ( | |||
type CreationHandler interface { | |||
BuildRequest4Restart(ctx *context.CreationContext) *response.BizError | |||
CheckParam(ctx *context.CreationContext) *response.BizError | |||
CheckMulti(ctx *context.CreationContext) *response.BizError | |||
CheckParamFormat(ctx *context.CreationContext) *response.BizError | |||
CheckPrivilege4Continue(ctx *context.CreationContext) *response.BizError | |||
HandleReqParameters(ctx *context.CreationContext) *response.BizError | |||
CheckMultiRequest(ctx *context.CreationContext) *response.BizError | |||
CheckDisplayJobName(ctx *context.CreationContext) *response.BizError | |||
LoadSpec(ctx *context.CreationContext) *response.BizError | |||
CheckPointBalance(ctx *context.CreationContext) *response.BizError | |||
CheckDatasetExists(ctx *context.CreationContext) *response.BizError | |||
CheckDatasetSize(ctx *context.CreationContext) *response.BizError | |||
CheckDatasets(ctx *context.CreationContext) *response.BizError | |||
CheckBranchExists(ctx *context.CreationContext) *response.BizError | |||
CheckModel(ctx *context.CreationContext) *response.BizError | |||
CheckBootFile(ctx *context.CreationContext) *response.BizError | |||
CheckIsCleared(ctx *context.CreationContext) *response.BizError | |||
CheckOutput4Restart(ctx *context.CreationContext) *response.BizError | |||
CheckSourceTaskIsCleared(ctx *context.CreationContext) *response.BizError | |||
BuildContainerData(ctx *context.CreationContext) *response.BizError | |||
InsertCloudbrainRecord4Async(ctx *context.CreationContext) *response.BizError | |||
CallCreationAPI(ctx *context.CreationContext) *response.BizError | |||
@@ -48,6 +48,10 @@ type CreationHandler interface { | |||
type DefaultCreationHandler struct { | |||
} | |||
func (g DefaultCreationHandler) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
return nil | |||
} | |||
func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext) *response.BizError { | |||
task := ctx.SourceCloudbrain | |||
if task == nil { | |||
@@ -65,10 +69,6 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext) | |||
log.Error("GetCloudbrainSpec err. %v", err) | |||
return response.SPEC_NOT_AVAILABLE | |||
} | |||
computeSourceStr := "" | |||
if c := models.GetComputeSourceInstance(task.ComputeResource); c != nil { | |||
computeSourceStr = c.Name | |||
} | |||
imageUrl := task.Image | |||
imageName := task.Image | |||
imageId := task.ImageID | |||
@@ -85,7 +85,7 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext) | |||
DisplayJobName: task.DisplayJobName, | |||
JobName: task.JobName, | |||
SpecId: oldSpec.ID, | |||
ComputeSourceStr: computeSourceStr, | |||
ComputeSourceStr: task.GetStandardComputeSource(), | |||
Cluster: entity.GetClusterTypeFromCloudbrainType(task.Type), | |||
WorkServerNumber: task.WorkServerNumber, | |||
BranchName: task.BranchName, | |||
@@ -111,29 +111,48 @@ func (DefaultCreationHandler) BuildRequest4Restart(ctx *context.CreationContext) | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckDatasetExists(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckDataset.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
func (g DefaultCreationHandler) CheckDatasets(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckDatasets.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
datasetUUIDStr := ctx.Request.DatasetUUIDStr | |||
if datasetUUIDStr == "" { | |||
return nil | |||
} | |||
//check datasets num | |||
uuids := strings.Split(datasetUUIDStr, ";") | |||
attachs, _ := models.GetAttachmentsByUUIDs(uuids) | |||
if ctx.Config.DatasetsMaxNum > 0 && len(uuids) > setting.MaxDatasetNum { | |||
log.Error("the dataset count(%d) exceed the limit", len(uuids)) | |||
return response.DATASET_NUMBER_OVER_LIMIT | |||
} | |||
datasetInfos, err := models.GetDatasetInfo4AITask(ctx.Request.DatasetUUIDStr) | |||
if err != nil { | |||
log.Error("GetDatasetInfo failed: %v", err) | |||
return response.SYSTEM_ERROR | |||
} | |||
if len(attachs) < len(uuids) { | |||
if len(datasetInfos) < len(uuids) { | |||
log.Info("CheckDataset hasDatasetDeleted.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return response.DATASET_NOT_EXISTS | |||
} | |||
//check datasets size | |||
var attachSize int64 | |||
for _, infos := range datasetInfos { | |||
attachSize += infos.Size | |||
} | |||
limitSizeGB := ctx.Config.DatasetsLimitSizeGB | |||
if limitSizeGB > 0 && attachSize > int64(limitSizeGB*1000*1000*1000) { | |||
log.Error("The DatasetSize exceeds the limit (%dGB)", limitSizeGB) // GB | |||
return response.DATASET_SIZE_OVER_LIMIT.WithParams(limitSizeGB) | |||
} | |||
var datasetNames string | |||
for i := 0; i < len(uuids); i++ { | |||
for j := 0; j < len(attachs); j++ { | |||
if uuids[i] == attachs[j].UUID { | |||
datasetNames += attachs[j].Name + ";" | |||
} | |||
} | |||
attach := datasetInfos[uuids[i]] | |||
datasetNames += attach.Compressed.Name + ";" | |||
} | |||
ctx.Request.DatasetNames = strings.TrimSuffix(datasetNames, ";") | |||
log.Info("CheckDataset success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
log.Info("CheckDatasets success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return nil | |||
} | |||
@@ -149,29 +168,6 @@ func (DefaultCreationHandler) CheckBranchExists(ctx *context.CreationContext) *r | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckDatasetSize(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckDatasetSize.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
if ctx.Request.DatasetUUIDStr == "" { | |||
return nil | |||
} | |||
datasetInfos, _, err := models.GetDatasetInfo(ctx.Request.DatasetUUIDStr, ctx.Request.ComputeSource.Name) | |||
if err != nil { | |||
log.Error("GetDatasetInfo failed: %v", err) | |||
return response.SYSTEM_ERROR | |||
} | |||
var attachSize int64 | |||
for _, infos := range datasetInfos { | |||
attachSize += infos.Size | |||
} | |||
limitSize := ctx.AITaskConfig.DatasetMaxSize | |||
if limitSize > 0 && attachSize > int64(limitSize) { | |||
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB | |||
return response.DATASET_SIZE_OVER_LIMIT.WithParams(setting.DebugAttachSize) | |||
} | |||
log.Info("CheckDatasetSize success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckModel(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckModel.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
if hasModelNumOverLimit(ctx.Request.PretrainModelCkptName) { //检查模型数量是否超出限制 | |||
@@ -186,72 +182,19 @@ func (DefaultCreationHandler) CheckModel(ctx *context.CreationContext) *response | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckBootFile(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
branch := req.BranchName | |||
if req.BootFile == "" { | |||
return response.PARAM_ERROR | |||
} | |||
if !strings.HasSuffix(strings.TrimSpace(req.BootFile), ".py") { | |||
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(req.BootFile)) | |||
return response.BOOT_FILE_MUST_BE_PYTHON | |||
} | |||
if branch == "" { | |||
branch = ctx.Repository.DefaultBranch | |||
} | |||
commit, err := ctx.GitRepo.GetBranchCommit(branch) | |||
if err != nil { | |||
log.Error("CheckBootFile GetBranchCommit error,repoId:=%d err=%v", ctx.Repository.ID, err) | |||
return response.BOOT_FILE_NOT_EXIST | |||
} | |||
if _, err := commit.GetTreeEntryByPath(req.BootFile); err != nil { | |||
log.Error("CheckBootFile GetTreeEntryByPath error,repoId:=%d BootFile=%s err=%v", ctx.Repository.ID, req.BootFile, err) | |||
return response.BOOT_FILE_NOT_EXIST | |||
} | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckIsCleared(ctx *context.CreationContext) *response.BizError { | |||
if ctx.SourceCloudbrain.Cleared { | |||
return response.RESULT_CLEARD | |||
} | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckOutput4Restart(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckOutput4Restart.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
func (DefaultCreationHandler) CheckSourceTaskIsCleared(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckSourceTaskIsCleared.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
task := ctx.SourceCloudbrain | |||
if task == nil { | |||
return nil | |||
} | |||
if task.Cleared { | |||
return response.RESULT_CLEARD | |||
} | |||
if !(task.IsNPUTask() || task.IsDCUTask()) { | |||
if _, err := os.Stat(getTaskOldJobPath(task)); err != nil { | |||
log.Error("Can not find job minio path.displayJobName=%s jobType=%s cluster=%s err=%v", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster, err) | |||
return response.RESULT_CLEARD | |||
} | |||
} | |||
log.Info("CheckOutput4Restart success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
log.Info("CheckSourceTaskIsCleared success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return nil | |||
} | |||
func getTaskOldJobPath(task *models.Cloudbrain) string { | |||
var path string | |||
if !task.IsNPUTask() { | |||
path = setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + task.JobName | |||
} | |||
return path | |||
} | |||
func hasDatasetDeleted(datasetUUIDStr string) bool { | |||
if datasetUUIDStr == "" { | |||
return false | |||
} | |||
uuids := strings.Split(datasetUUIDStr, ";") | |||
attachs, _ := models.GetAttachmentsByUUIDs(uuids) | |||
return len(attachs) < len(uuids) | |||
} | |||
func hasModelFileDeleted(modelId, pretrainModelCkptName string) bool { | |||
if modelId == "" { | |||
return false | |||
@@ -273,9 +216,53 @@ func hasModelNumOverLimit(pretrainModelCkptName string) bool { | |||
return false | |||
} | |||
func (DefaultCreationHandler) CheckParam(ctx *context.CreationContext) *response.BizError { | |||
func (DefaultCreationHandler) CheckParamFormat(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
log.Info("Start to CheckParam.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster) | |||
c := models.GetComputeSourceInstance(req.ComputeSourceStr) | |||
if c == nil { | |||
log.Error("ComputeSourceStr invalid") | |||
return response.PARAM_ERROR | |||
} | |||
ctx.Request.ComputeSource = c | |||
ctx.Request.BootFile = strings.TrimSpace(ctx.Request.BootFile) | |||
log.Info("CheckParam success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster) | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckPrivilege4Continue(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
//继续训练或者创建新版本时需要校验对旧云脑任务的权限 | |||
if !ctx.Request.IsContinueRequest { | |||
return nil | |||
} | |||
log.Info("Start to CheckPrivilege4Continue.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster) | |||
oldCloudbrainId := req.SourceCloudbrainId | |||
if oldCloudbrainId <= 0 { | |||
return response.PARAM_ERROR | |||
} | |||
oldCloudbrain, err := models.GetCloudbrainByCloudbrainID(oldCloudbrainId) | |||
if err != nil { | |||
log.Error("CheckPrivilege4NewVersion get old cloudbrain task error.oldCloudbrainId=%d err=%v", oldCloudbrainId, err) | |||
if models.IsErrRecordNotExist(err) { | |||
return response.PARAM_ERROR | |||
} | |||
return response.SYSTEM_ERROR | |||
} | |||
ctx.SourceCloudbrain = oldCloudbrain | |||
if oldCloudbrain.UserID != ctx.User.ID && !ctx.User.IsAdmin { | |||
return response.INSUFFICIENT_PERMISSION | |||
} | |||
log.Info("CheckPrivilege4Continue success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster) | |||
return nil | |||
} | |||
func (DefaultCreationHandler) HandleReqParameters(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
var parameters models.Parameters | |||
if req.Params != "" { | |||
err := json.Unmarshal([]byte(req.Params), ¶meters) | |||
@@ -283,22 +270,46 @@ func (DefaultCreationHandler) CheckParam(ctx *context.CreationContext) *response | |||
log.Error("Failed to Unmarshal params: %s (%v)", req.Params, err) | |||
return response.PARAM_ERROR | |||
} | |||
// label去掉所有的空格,value去掉首位的空格 | |||
for i := 0; i < len(parameters.Parameter); i++ { | |||
parameters.Parameter[i].Label = strings.ReplaceAll(parameters.Parameter[i].Label, " ", "") | |||
parameters.Parameter[i].Value = strings.TrimSpace(parameters.Parameter[i].Value) | |||
} | |||
ctx.Request.ParamArray = parameters | |||
p, err := json.Marshal(parameters) | |||
if err == nil { | |||
ctx.Request.Params = string(p) | |||
} | |||
} | |||
c := models.GetComputeSourceInstance(req.ComputeSourceStr) | |||
if c == nil { | |||
log.Error("ComputeSourceStr invalid") | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckBootFile(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
branch := req.BranchName | |||
if req.BootFile == "" { | |||
return response.PARAM_ERROR | |||
} | |||
ctx.Request.ComputeSource = c | |||
//todo 校验模型,数据集是否存在 | |||
log.Info("CheckParam success.displayJobName=%s jobType=%s cluster=%s", req.DisplayJobName, req.JobType, req.Cluster) | |||
if !strings.HasSuffix(strings.TrimSpace(req.BootFile), ".py") { | |||
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(req.BootFile)) | |||
return response.BOOT_FILE_MUST_BE_PYTHON | |||
} | |||
if branch == "" { | |||
branch = ctx.Repository.DefaultBranch | |||
} | |||
commit, err := ctx.GitRepo.GetBranchCommit(branch) | |||
if err != nil { | |||
log.Error("CheckBootFile GetBranchCommit error,repoId:=%d err=%v", ctx.Repository.ID, err) | |||
return response.BOOT_FILE_NOT_EXIST | |||
} | |||
if _, err := commit.GetTreeEntryByPath(req.BootFile); err != nil { | |||
log.Error("CheckBootFile GetTreeEntryByPath error,repoId:=%d BootFile=%s err=%v", ctx.Repository.ID, req.BootFile, err) | |||
return response.BOOT_FILE_NOT_EXIST | |||
} | |||
return nil | |||
} | |||
func (DefaultCreationHandler) CheckMulti(ctx *context.CreationContext) *response.BizError { | |||
func (DefaultCreationHandler) CheckMultiRequest(ctx *context.CreationContext) *response.BizError { | |||
jobType := string(ctx.Request.JobType) | |||
log.Info("Start to CheckMulti success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
@@ -357,18 +368,14 @@ func (DefaultCreationHandler) LoadSpec(ctx *context.CreationContext) *response.B | |||
return nil | |||
} | |||
func (DefaultCreationHandler) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
log.Error("BuildContainerData not implements") | |||
return response.SYSTEM_ERROR | |||
} | |||
func (DefaultCreationHandler) InsertCloudbrainRecord4Async(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to InsertCloudbrainRecord4Async.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
req := ctx.Request | |||
imageUrl := req.ImageUrl | |||
if req.ImageUrl == "" && req.ImageName != "" { | |||
req.ImageUrl = req.ImageName | |||
imageUrl = req.ImageName | |||
} | |||
taskType := req.Cluster.GetCloudbrainType() | |||
if taskType == models.TypeCloudBrainTwo && setting.ModelartsCD.Enabled { | |||
@@ -391,15 +398,16 @@ func (DefaultCreationHandler) InsertCloudbrainRecord4Async(ctx *context.Creation | |||
DatasetName: req.DatasetNames, | |||
CommitID: ctx.CommitID, | |||
IsLatestVersion: "1", | |||
VersionCount: 1, | |||
ComputeResource: req.ComputeSource.GetCloudbrainFormat(), | |||
ImageID: req.ImageID, | |||
Image: req.ImageUrl, | |||
Image: imageUrl, | |||
BranchName: branchName, | |||
Parameters: req.Params, | |||
BootFile: req.BootFile, | |||
Description: req.Description, | |||
WorkServerNumber: req.WorkServerNumber, | |||
EngineName: req.ImageUrl, | |||
EngineName: imageUrl, | |||
Spec: ctx.Spec, | |||
ModelName: req.PretrainModelName, | |||
ModelVersion: req.PretrainModelVersion, | |||
@@ -411,6 +419,7 @@ func (DefaultCreationHandler) InsertCloudbrainRecord4Async(ctx *context.Creation | |||
CreatedUnix: timeutil.TimeStampNow(), | |||
UpdatedUnix: timeutil.TimeStampNow(), | |||
GpuQueue: ctx.Spec.QueueCode, | |||
AppName: req.AppName, | |||
} | |||
err := models.CreateCloudbrain(c) | |||
@@ -435,8 +444,9 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon | |||
return response.NewBizError(res.Error) | |||
} | |||
imageUrl := req.ImageUrl | |||
if req.ImageUrl == "" && req.ImageName != "" { | |||
req.ImageUrl = req.ImageName | |||
imageUrl = req.ImageName | |||
} | |||
taskType := req.Cluster.GetCloudbrainType() | |||
if taskType == models.TypeCloudBrainTwo && setting.ModelartsCD.Enabled { | |||
@@ -455,13 +465,13 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon | |||
IsLatestVersion: "1", | |||
ComputeResource: req.ComputeSource.GetCloudbrainFormat(), | |||
ImageID: req.ImageID, | |||
Image: req.ImageUrl, | |||
Image: imageUrl, | |||
BranchName: req.BranchName, | |||
Parameters: req.Params, | |||
BootFile: req.BootFile, | |||
Description: req.Description, | |||
WorkServerNumber: req.WorkServerNumber, | |||
EngineName: req.ImageUrl, | |||
EngineName: imageUrl, | |||
Spec: ctx.Spec, | |||
ModelName: req.PretrainModelName, | |||
ModelVersion: req.PretrainModelVersion, | |||
@@ -475,6 +485,13 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Sync(ctx *context.CreationCon | |||
CreatedUnix: res.CreateTime, | |||
UpdatedUnix: res.CreateTime, | |||
GpuQueue: ctx.Spec.QueueCode, | |||
Config: ctx.BuildCloudbrainConfig(), | |||
} | |||
config := ctx.BuildCloudbrainConfig() | |||
if config != nil { | |||
c.TrainUrl = path.Join("/", config.OutputBucket, config.OutputObjectPrefix) | |||
c.LogUrl = path.Join("/", config.LogBucket, config.LogObjectPrefix) | |||
} | |||
err := models.CreateCloudbrain(c) | |||
@@ -507,7 +524,14 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Async(ctx *context.CreationCo | |||
c.CreatedUnix = res.CreateTime | |||
c.UpdatedUnix = res.CreateTime | |||
c.DatasetName = ctx.Request.DatasetNames | |||
c.VersionName = res.VersionName | |||
c.VersionID = res.VersionID | |||
config := ctx.BuildCloudbrainConfig() | |||
if config != nil { | |||
c.TrainUrl = path.Join("/", config.OutputBucket, config.OutputObjectPrefix) | |||
c.LogUrl = path.Join("/", config.LogBucket, config.LogObjectPrefix) | |||
} | |||
err := models.UpdateJob(c) | |||
if err != nil { | |||
log.Error("AfterCallCreationAPI4Async UpdateJob err.displayJobName=%s jobType=%s cluster=%s err=%v", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster, err) | |||
@@ -515,6 +539,12 @@ func (DefaultCreationHandler) AfterCallCreationAPI4Async(ctx *context.CreationCo | |||
} | |||
log.Info("AfterCallCreationAPI4Async success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
//更新cloudbrain_config表 | |||
config.CloudbrainID = c.ID | |||
_, err = models.InsertCloudbrainConfig(config) | |||
if err != nil { | |||
log.Error("InsertCloudbrainConfig error,config=%+v err=%v", config, err) | |||
} | |||
return nil | |||
} | |||
@@ -530,7 +560,6 @@ func (DefaultCreationHandler) CreateCloudbrainRecord4Restart(ctx *context.Creati | |||
return response.RESTART_FAILED | |||
} | |||
req := ctx.Request | |||
c := &models.Cloudbrain{ | |||
Status: TransAITaskStatus(res.Status), | |||
UserID: ctx.SourceCloudbrain.UserID, | |||
@@ -577,10 +606,6 @@ func (DefaultCreationHandler) CreateCloudbrainRecord4Restart(ctx *context.Creati | |||
return nil | |||
} | |||
func (DefaultCreationHandler) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
return nil | |||
} | |||
func TransAITaskStatus(oldStatus string) string { | |||
switch oldStatus { | |||
case models.GrampusStatusPending: | |||
@@ -655,3 +680,7 @@ func (DefaultCreationHandler) HandleErr4Async(ctx *context.CreationContext) *res | |||
log.Info("HandleErr4Async success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
return nil | |||
} | |||
func (g DefaultCreationHandler) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
return nil | |||
} |
@@ -0,0 +1,88 @@ | |||
package task | |||
import ( | |||
"strconv" | |||
"time" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
) | |||
type SuperComputeTaskTemplate struct { | |||
GrampusNoteBookTaskTemplate | |||
} | |||
func init() { | |||
t := &SuperComputeTaskTemplate{ | |||
GrampusNoteBookTaskTemplate: GrampusNoteBookTaskTemplate{ | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.C2Net, | |||
JobType: models.JobTypeSuperCompute, | |||
Config: GetGrampusNoteBookConfig, | |||
}, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeSuperCompute, entity.C2Net, t) | |||
} | |||
func (g SuperComputeTaskTemplate) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed") | |||
return nil, false, response.SYSTEM_ERROR | |||
} | |||
l, f, err := c.GetNotebookImages(entity.GetImageReq{ | |||
ComputeSource: computeSource, | |||
JobType: models.JobTypeDebug, | |||
}) | |||
if err != nil { | |||
log.Error("GetImages err.computeSource=%s err =%v", computeSource.Name, err) | |||
return nil, false, response.NewBizError(err) | |||
} | |||
return l, f, nil | |||
} | |||
func (t SuperComputeTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParamFormat). | |||
Next(t.CheckMultiRequest). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasets). | |||
Next(t.CheckBranchExists). | |||
Next(t.CheckModel). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (SuperComputeTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
var actionType = models.ActionCreateSuperComputeTask | |||
task, err := models.GetCloudbrainByCloudbrainID(ctx.NewCloudbrain.ID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, actionType) | |||
return nil | |||
} | |||
func (g SuperComputeTaskTemplate) GetDisplayJobName(userName string) string { | |||
t := time.Now() | |||
millisecondStr := strconv.FormatInt((t.UnixNano()%1e6/1e3)%1000, 10) | |||
return "mmlspark-" + t.Format("20060102150405") + millisecondStr | |||
} |
@@ -1,19 +1,27 @@ | |||
package task | |||
import ( | |||
"errors" | |||
"time" | |||
cloudbrainService "code.gitea.io/gitea/services/cloudbrain" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/convert" | |||
"code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/redis/redis_key" | |||
"code.gitea.io/gitea/modules/redis/redis_lock" | |||
api "code.gitea.io/gitea/modules/structs" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/cluster" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
"errors" | |||
"time" | |||
"strconv" | |||
) | |||
var taskMap = map[string]AITaskTemplate{} | |||
@@ -43,20 +51,41 @@ type AITaskTemplate interface { | |||
BriefQuery(cloudbrainId int64) (*entity.AITaskBriefInfo, *response.BizError) | |||
Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) | |||
Update(cloudbrainId int64) *response.BizError | |||
GetLog(cloudbrainId int64) (*entity.ClusterLog, *response.BizError) | |||
GetOutput(cloudbrainId int64) *response.BizError | |||
GetLog(opts entity.QueryLogOpts) (*entity.ClusterLog, *response.BizError) | |||
GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) | |||
GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) | |||
GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) | |||
GetOutput(cloudbrainId int64, parentDir string) (*entity.AITaskOutput, *response.BizError) | |||
GetAllOutput(opts entity.GetAllOutputReq) (*entity.AllAITaskOutput, *response.BizError) | |||
GetDebugUrl(cloudbrainId int64, fileName ...string) (string, *response.BizError) | |||
GetOperationProfile(cloudbrainId int64) (*entity.OperationProfile, *response.BizError) | |||
GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError) | |||
GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) | |||
GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) | |||
GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig | |||
GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig | |||
GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError) | |||
GetAllowedWorkerNum(userId int64, computeSource *models.ComputeSource) ([]int, *response.BizError) | |||
GetDisplayJobName(userName string) string | |||
} | |||
type GetConfigFunc func(entity.AITaskConfigKey) *entity.AITaskBaseConfig | |||
type DefaultAITaskTemplate struct { | |||
DefaultCreationHandler | |||
ClusterType entity.ClusterType | |||
JobType models.JobType | |||
aiTaskConfig entity.AITaskConfig | |||
ClusterType entity.ClusterType | |||
JobType models.JobType | |||
Config GetConfigFunc | |||
} | |||
func (g DefaultAITaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
if ctx.Config == nil { | |||
return nil | |||
} | |||
err := container_builder.BuildContainerDataChain(ctx.Config.ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (g DefaultAITaskTemplate) GetMyCluster() cluster.ClusterAdapter { | |||
@@ -68,53 +97,81 @@ func (g DefaultAITaskTemplate) GetMyCluster() cluster.ClusterAdapter { | |||
return c | |||
} | |||
func (g DefaultAITaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
return entity.AITaskConfig{} | |||
} | |||
func (g DefaultAITaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
jobID := ctx.Response.JobID | |||
config := ctx.Config | |||
user := ctx.User | |||
repo := ctx.Repository | |||
displayJobName := req.DisplayJobName | |||
func (d DefaultAITaskTemplate) GetDatasetPath(ctx *context.CreationContext) string { | |||
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps | |||
if config == nil { | |||
return "" | |||
} | |||
opt := config[entity.ContainerDataset] | |||
if opt == nil { | |||
return "" | |||
if config.IsActionUseJobId { | |||
notification.NotifyOtherTask(user, repo, jobID, displayJobName, config.ActionType) | |||
} else { | |||
task, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("NotifyCreation GetCloudbrainByJobID failed: %v", err.Error()) | |||
return nil | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(user, repo, stringId, displayJobName, config.ActionType) | |||
} | |||
return opt.ContainerPath | |||
return nil | |||
} | |||
func (d DefaultAITaskTemplate) GetCodePath(ctx *context.CreationContext) string { | |||
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps | |||
if config == nil { | |||
return "" | |||
func (g DefaultAITaskTemplate) GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { | |||
if g.Config == nil { | |||
return &entity.AITaskBaseConfig{} | |||
} | |||
opt := config[entity.ContainerCode] | |||
if opt == nil { | |||
return "" | |||
c := g.Config(opts) | |||
if c == nil { | |||
return &entity.AITaskBaseConfig{} | |||
} | |||
return opt.ContainerPath | |||
return c | |||
} | |||
func (d DefaultAITaskTemplate) GetPretrainModelPath(ctx *context.CreationContext) string { | |||
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps | |||
if config == nil { | |||
return "" | |||
func (g DefaultAITaskTemplate) GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
opt := config[entity.ContainerPreTrainModel] | |||
if opt == nil { | |||
return "" | |||
res, err := GetAITaskNodeInfo(cloudbrainId, c.GetNodeInfo) | |||
if err != nil { | |||
log.Error("GetNodeInfo error,cloudbrainId=%d err=%v", cloudbrainId, err) | |||
return nil, response.NewBizError(err) | |||
} | |||
return opt.ContainerPath | |||
log.Info("GetNodeInfo success.cloudbrainId=%d", cloudbrainId) | |||
return res, nil | |||
} | |||
func (d DefaultAITaskTemplate) GetOutputPath(ctx *context.CreationContext) string { | |||
config := d.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: ctx.Request.ComputeSource.Name}).ContainerSteps | |||
if config == nil { | |||
return "" | |||
var GrampusNPUMultiNodeConfig *modelarts.MultiNodes | |||
var CloudbrainTwoNPUMultiNodeConfig *modelarts.MultiNodes | |||
func (g DefaultAITaskTemplate) GetAllowedWorkerNum(userId int64, computeSource *models.ComputeSource) ([]int, *response.BizError) { | |||
if g.JobType == models.JobTypeTrain && g.ClusterType == entity.OpenICloudbrainTwo && computeSource.Name == models.NPU { | |||
modelarts.InitMultiNode() | |||
if modelarts.MultiNodeConfig != nil { | |||
for _, info := range modelarts.MultiNodeConfig.Info { | |||
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg { | |||
return info.Node, nil | |||
} | |||
} | |||
} | |||
} | |||
opt := config[entity.ContainerOutPutPath] | |||
if opt == nil { | |||
return "" | |||
if g.JobType == models.JobTypeTrain && g.ClusterType == entity.C2Net && computeSource.Name == models.NPU { | |||
grampus.InitMultiNode() | |||
if grampus.MultiNodeConfig != nil { | |||
for _, info := range grampus.MultiNodeConfig.Info { | |||
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg { | |||
return info.Node, nil | |||
} | |||
} | |||
} | |||
} | |||
return opt.ContainerPath | |||
//未命中配置则只允许一个节点 | |||
return []int{1}, nil | |||
} | |||
func (g DefaultAITaskTemplate) Query(cloudbrainId int64) (*entity.AITaskDetailInfo, *response.BizError) { | |||
@@ -151,7 +208,13 @@ func (g DefaultAITaskTemplate) Delete(cloudbrainId int64) *response.BizError { | |||
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId) | |||
return response.SYSTEM_ERROR | |||
} | |||
err := DelTask(cloudbrainId, c.DeleteNoteBook) | |||
var err error | |||
if g.JobType == models.JobTypeDebug { | |||
err = DelTask(cloudbrainId, c.DeleteNoteBook) | |||
} else { | |||
err = DelTask(cloudbrainId, c.DeleteTrainJob) | |||
} | |||
if err != nil { | |||
log.Error("DelTask error,cloudbrainId=%d err=%v", cloudbrainId, err) | |||
return response.NewBizError(err) | |||
@@ -166,7 +229,13 @@ func (g DefaultAITaskTemplate) Stop(cloudbrainId int64) (*entity.AITaskBriefInfo | |||
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
err := StopTask(cloudbrainId, c.StopNoteBook) | |||
var err error | |||
if g.JobType == models.JobTypeDebug { | |||
err = StopTask(cloudbrainId, c.StopNoteBook) | |||
} else { | |||
err = StopTask(cloudbrainId, c.StopTrainJob) | |||
} | |||
if err != nil { | |||
log.Error("StopTask err.cloudbrainId=%d err=%v", cloudbrainId, err) | |||
return nil, response.NewBizError(err) | |||
@@ -215,7 +284,12 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError { | |||
//二是处于PREPARING的时间超过了配置的等待时间,此时意味着异步创建任务时间过长或者出现了未知异常 | |||
if cloudbrain.NeedActiveStop() { | |||
log.Info("AI task should active stop.cloudbrainId=%d", cloudbrainId) | |||
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryNoteBookByJobName, c.StopNoteBook) | |||
if g.JobType == models.JobTypeDebug { | |||
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryNoteBookByJobName, c.StopNoteBook) | |||
} else { | |||
err = StopAITaskByJobNameFromRemote(cloudbrain, c.QueryTrainJobByJobName, c.StopTrainJob) | |||
} | |||
if err != nil { | |||
log.Error("StopAITaskByJobNameFromRemote err.cloudbrainId=%d err=%v", cloudbrainId, err) | |||
return response.NewBizError(err) | |||
@@ -228,8 +302,12 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError { | |||
log.Info("AI task is preparing.No need to update from remote.cloudbrainId=%d", cloudbrainId) | |||
return nil | |||
} | |||
if g.JobType == models.JobTypeDebug { | |||
err = UpdateAITaskFromRemote(cloudbrain, c.QueryNoteBook) | |||
} else { | |||
err = UpdateAITaskFromRemote(cloudbrain, c.QueryTrainJob) | |||
} | |||
err = UpdateAITaskFromRemote(cloudbrain, c.QueryNoteBook) | |||
if err != nil { | |||
log.Error("UpdateAITaskFromRemote err.cloudbrainId=%d err=%v", cloudbrainId, err) | |||
return response.NewBizError(err) | |||
@@ -238,12 +316,92 @@ func (g DefaultAITaskTemplate) Update(cloudbrainId int64) *response.BizError { | |||
return nil | |||
} | |||
func (g DefaultAITaskTemplate) GetLog(cloudbrainId int64) (*entity.ClusterLog, *response.BizError) { | |||
return nil, nil | |||
func (g DefaultAITaskTemplate) GetLog(opts entity.QueryLogOpts) (*entity.ClusterLog, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := QueryTaskLog(opts, c.GetLog) | |||
if err != nil { | |||
log.Error("GetLog err.cloudbrainId=%d err =%v", opts, err) | |||
return &entity.ClusterLog{}, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetOutput(cloudbrainId int64) *response.BizError { | |||
return nil | |||
func (g DefaultAITaskTemplate) GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetLogDownloadInfo(opts, c.GetLogDownloadInfo) | |||
if err != nil { | |||
log.Error("GetLog err.cloudbrainId=%d ", opts) | |||
return nil, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetSingleOutputDownloadInfo(opts, c.GetSingleOutputDownloadInfo) | |||
if err != nil { | |||
log.Error("GetOutputDownloadInfo err.cloudbrainId=%d ", opts) | |||
return nil, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq) (*entity.FileDownloadInfo, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetAllOutputDownloadInfo(opts, c.GetAllOutputDownloadInfo) | |||
if err != nil { | |||
log.Error("GetOutputDownloadInfo err.cloudbrainId=%d ", opts) | |||
return nil, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetOutput(cloudbrainId int64, parentDir string) (*entity.AITaskOutput, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetAITaskOutput(cloudbrainId, parentDir, c.GetOutput) | |||
if err != nil { | |||
log.Error("GetOutput err.cloudbrainId=%d err =%v", cloudbrainId, err) | |||
return nil, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetAllOutput(opts entity.GetAllOutputReq) (*entity.AllAITaskOutput, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,cloudbrainId=%d", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetAllAITaskOutput(opts, c.GetAllOutput) | |||
if err != nil { | |||
log.Error("GetOutput err.cloudbrainId=%d err =%v", opts, err) | |||
return nil, nil | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetDebugUrl(cloudbrainId int64, fileName ...string) (string, *response.BizError) { | |||
@@ -270,32 +428,60 @@ func (g DefaultAITaskTemplate) GetOperationProfile(cloudbrainId int64) (*entity. | |||
log.Error("Get cluster failed,cloudbrainId=%d", cloudbrainId) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
s, err := GetOperationProfile(cloudbrainId, c.GetNoteBookOperationProfile) | |||
var s *entity.OperationProfile | |||
var err error | |||
if g.JobType == models.JobTypeDebug { | |||
s, err = GetOperationProfile(cloudbrainId, c.GetNoteBookOperationProfile) | |||
} else { | |||
s, err = GetOperationProfile(cloudbrainId, c.GetTrainJobOperationProfile) | |||
} | |||
if err != nil { | |||
log.Error("QueryNoteBookUrl err.cloudbrainId=%d err =%v", cloudbrainId, err) | |||
log.Error("GetOperationProfile err.cloudbrainId=%d err =%v", cloudbrainId, err) | |||
return nil, nil | |||
} | |||
if s == nil { | |||
s = &entity.OperationProfile{Events: []entity.ProfileEvent{}} | |||
} | |||
return s, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed,opts=%+v", opts) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
res, err := GetResourceUsage(opts, c.GetResourceUsage) | |||
if err != nil { | |||
log.Error("GetOperationProfile err.opts=%+v err =%v", opts, err) | |||
return nil, nil | |||
} | |||
return res, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed") | |||
return nil, false, response.SYSTEM_ERROR | |||
} | |||
l, f, err := c.GetImages(entity.GetImageReq{ | |||
ComputeSource: computeSource, | |||
JobType: g.JobType, | |||
}) | |||
var images []entity.ClusterImage | |||
var customFlag bool | |||
var err error | |||
if g.JobType == models.JobTypeDebug { | |||
images, customFlag, err = c.GetNotebookImages(entity.GetImageReq{ | |||
ComputeSource: computeSource, | |||
JobType: g.JobType, | |||
}) | |||
} else { | |||
images, customFlag, err = c.GetTrainImages(entity.GetImageReq{ | |||
ComputeSource: computeSource, | |||
JobType: g.JobType, | |||
}) | |||
} | |||
if err != nil { | |||
log.Error("GetImages err.computeSource=%s err =%v", computeSource.Name, err) | |||
return nil, false, response.NewBizError(err) | |||
} | |||
return l, f, nil | |||
return images, customFlag, nil | |||
} | |||
func (g DefaultAITaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) { | |||
@@ -314,3 +500,29 @@ func (g DefaultAITaskTemplate) GetSpecs(userId int64, computeSource models.Compu | |||
} | |||
return r, nil | |||
} | |||
func (g DefaultAITaskTemplate) CheckWorkerNum(ctx *context.CreationContext) *response.BizError { | |||
log.Info("Start to CheckMultiNode.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) | |||
serverNum := ctx.Request.WorkServerNumber | |||
if serverNum <= 1 { | |||
return nil | |||
} | |||
workerNums, _ := g.GetAllowedWorkerNum(ctx.User.ID, ctx.Request.ComputeSource) | |||
if !isInNodes(workerNums, serverNum) { | |||
return response.NO_NODE_RIGHR | |||
} | |||
return nil | |||
} | |||
func isInNodes(nodes []int, num int) bool { | |||
for _, node := range nodes { | |||
if node == num { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func (g DefaultAITaskTemplate) GetDisplayJobName(userName string) string { | |||
return cloudbrainService.GetDisplayJobName(userName) | |||
} |
@@ -3,16 +3,60 @@ package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/services/ai_task_service/storage_helper" | |||
"path" | |||
"strings" | |||
) | |||
func GetAITaskConfigByCloudbrain(cloudbrain *models.Cloudbrain) entity.AITaskConfig { | |||
func GetDetailConfigInfoByCloudbrain(cloudbrain *models.Cloudbrain) *entity.AITaskDetailConfigInfo { | |||
aiConfig := cloudbrain.GetCloudbrainConfig() | |||
if aiConfig != nil { | |||
return entity.BuildAITaskDetailConfigInfo(aiConfig) | |||
} | |||
return getHistoricalConfigInfo(cloudbrain) | |||
} | |||
//历史任务在cloudbrain_config表中没有对应的记录,因此根据实时配置模拟 | |||
func getHistoricalConfigInfo(cloudbrain *models.Cloudbrain) *entity.AITaskDetailConfigInfo { | |||
t, _ := GetAITaskTemplateFromCloudbrain(cloudbrain) | |||
if t == nil { | |||
return entity.AITaskConfig{} | |||
return &entity.AITaskDetailConfigInfo{} | |||
} | |||
c := t.GetConfig(entity.AITaskConfigKey{ComputeSource: cloudbrain.GetStandardComputeSource()}) | |||
return &entity.AITaskDetailConfigInfo{ | |||
BaseConfig: c, | |||
OutputObjectPrefix: GetContainerStorageObjectPrefix(c, cloudbrain.JobName, cloudbrain.VersionName, entity.ContainerOutPutPath), | |||
OutputStorageType: GetContainerStorageType(c, entity.ContainerOutPutPath), | |||
LogObjectPrefix: GetContainerStorageObjectPrefix(c, cloudbrain.JobName, cloudbrain.VersionName, entity.ContainerLogPath), | |||
LogStorageType: GetContainerStorageType(c, entity.ContainerLogPath), | |||
} | |||
} | |||
func GetContainerStorageObjectPrefix(c *entity.AITaskBaseConfig, jobName string, versionName string, containerType entity.ContainerDataType) string { | |||
config := c.GetContainerConfig(containerType) | |||
if config == nil { | |||
return "" | |||
} | |||
st := config.AcceptStorageType | |||
if st == nil && len(st) == 0 { | |||
return "" | |||
} | |||
uploader := storage_helper.SelectUploaderFromStorageType(st[0]) | |||
//兼容历史任务所以加上了versionName,另外云脑二训练任务为了适配modelarts接口加上了默认版本,此时要剔除 | |||
localPath := config.GetLocalPath() | |||
localPath = strings.TrimSuffix(localPath, models.CloudbrainTwoDefaultVersion) | |||
objectKey := path.Join(uploader.GetJobDefaultObjectKeyPrefix(jobName), localPath, versionName) | |||
return objectKey | |||
} | |||
func GetContainerStorageType(c *entity.AITaskBaseConfig, containerType entity.ContainerDataType) entity.StorageType { | |||
outputConfig := c.GetContainerConfig(containerType) | |||
if outputConfig == nil { | |||
return "" | |||
} | |||
computeSource := models.GetComputeSourceInstance(cloudbrain.ComputeResource) | |||
if computeSource == nil { | |||
return entity.AITaskConfig{} | |||
st := outputConfig.AcceptStorageType | |||
if st == nil && len(st) == 0 { | |||
return "" | |||
} | |||
return t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: computeSource.Name}) | |||
return st[0] | |||
} |
@@ -7,7 +7,6 @@ import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
cloudbrainService "code.gitea.io/gitea/services/cloudbrain" | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
"code.gitea.io/gitea/services/reward/point/account" | |||
) | |||
@@ -28,27 +27,33 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio | |||
} | |||
//获取代码分支 | |||
if branches, _, err := req.GitRepo.GetBranches(0, 0); err == nil { | |||
result.Branches = branches | |||
if len(req.Repo.DefaultBranch) > 0 && req.GitRepo.IsBranchExist(req.Repo.DefaultBranch) { | |||
result.DefaultBranch = req.Repo.DefaultBranch | |||
} else if len(branches) > 0 { | |||
result.DefaultBranch = branches[0] | |||
if req.GitRepo != nil { | |||
if branches, _, err := req.GitRepo.GetBranches(0, 0); err == nil { | |||
result.Branches = branches | |||
if len(req.Repo.DefaultBranch) > 0 && req.GitRepo.IsBranchExist(req.Repo.DefaultBranch) { | |||
result.DefaultBranch = req.Repo.DefaultBranch | |||
} else if len(branches) > 0 { | |||
result.DefaultBranch = branches[0] | |||
} | |||
} | |||
} | |||
//查询积分余额 | |||
if a, err := account.GetAccount(req.User.ID); err == nil { | |||
result.PointAccount = entity.ParsePointAccountInfo(a) | |||
} | |||
//积分开关 | |||
result.PaySwitch = setting.CloudBrainPaySwitch | |||
//生成任务名称 | |||
result.DisplayJobName = cloudbrainService.GetDisplayJobName(req.User.Name) | |||
t, err := GetAITaskTemplate(req.JobType, req.ClusterType) | |||
if err != nil { | |||
log.Error("param error") | |||
return nil, err | |||
} | |||
//生成任务名称 | |||
result.DisplayJobName = t.GetDisplayJobName(req.User.Name) | |||
// 查询镜像列表 | |||
if images, canUseAll, err := t.GetImages(*req.ComputeSource); err == nil { | |||
result.Images = images | |||
@@ -58,9 +63,14 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio | |||
if specs, err := t.GetSpecs(req.User.ID, *req.ComputeSource); err == nil { | |||
result.Specs = specs | |||
} | |||
c := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: req.ComputeSource.Name}) | |||
result.Config = entity.AITaskCreationConfig{ | |||
DatasetMaxSize: c.DatasetMaxSize, | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
} | |||
//查询可用节点数 | |||
if workerNums, err := t.GetAllowedWorkerNum(req.User.ID, req.ComputeSource); err == nil { | |||
result.AllowedWorkerNum = workerNums | |||
} else { | |||
result.AllowedWorkerNum = []int{1} | |||
} | |||
return result, nil | |||
} |
@@ -0,0 +1,195 @@ | |||
package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"encoding/json" | |||
"fmt" | |||
"strings" | |||
) | |||
func GetModelDownload(task *models.Cloudbrain) []*models.ModelDownload { | |||
var repositoryLink string | |||
pretrainModelList := []*models.ModelDownload{} | |||
ckptNames := strings.Split(task.CkptName, ";") | |||
var model *models.AiModelManage | |||
var err error | |||
if task.ModelId == "" { | |||
model, err = models.QueryModelByPath(task.PreTrainModelUrl) | |||
} else { | |||
model, err = models.QueryModelById(task.ModelId) | |||
} | |||
if err != nil || model == nil { | |||
return pretrainModelList | |||
} | |||
if r, err := models.QueryModelRepoByModelID(model.ID); err == nil { | |||
repositoryLink = r.Link() | |||
} | |||
for _, ckptName := range ckptNames { | |||
var url string | |||
if task.Type == models.TypeC2Net { | |||
url = getModelContainerLink(task.DataUrl, ckptName) | |||
} else { | |||
url = getModelLocalLink(model, ckptName) | |||
} | |||
modelDownload := models.ModelDownload{ | |||
Name: ckptName, | |||
DownloadLink: url, | |||
IsDelete: false, | |||
ModelName: model.Name, | |||
} | |||
if hasModelFileDeleted(task.ModelId, ckptName) { | |||
log.Warn("Can not get model by path:" + url) | |||
modelDownload.IsDelete = true | |||
} | |||
modelDownload.RepositoryLink = repositoryLink | |||
pretrainModelList = append(pretrainModelList, &modelDownload) | |||
} | |||
return pretrainModelList | |||
} | |||
func getModelLocalLink(model *models.AiModelManage, ckptName string) string { | |||
index := strings.Index(model.Path, "/") | |||
key := model.Path[index+1:] + ckptName | |||
url, _ := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, key) | |||
return url | |||
} | |||
func GetCloudBrainDataSetInfo(task *models.Cloudbrain) []*models.DatasetDownload { | |||
datasetDownload := getCloudBrainDatasetInfo4Local(task.Uuid, task.DatasetName, true) | |||
//非虎鲸的任务返回本地地址 | |||
if task.Type != models.TypeC2Net { | |||
return datasetDownload | |||
} | |||
//虎鲸的任务需要返回调度后的地址 | |||
datasetObsUrlList := make([]entity.NotebookDataset, 0) | |||
_ = json.Unmarshal([]byte(task.DataUrl), &datasetObsUrlList) | |||
for _, datasetInfo := range datasetDownload { | |||
datasetInfo.DatasetDownloadLink = "" | |||
for _, datasetObs := range datasetObsUrlList { | |||
log.Info("datasetObsUrl:" + datasetObs.DatasetUrl + "datasetName:" + datasetInfo.DatasetName) | |||
if strings.Contains(datasetObs.DatasetUrl, datasetInfo.DatasetName) { | |||
datasetInfo.DatasetDownloadLink = datasetObs.DatasetUrl | |||
break | |||
} | |||
} | |||
} | |||
return datasetDownload | |||
} | |||
func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown bool) []*models.DatasetDownload { | |||
datasetDownload := make([]*models.DatasetDownload, 0) | |||
if len(uuid) == 0 { | |||
return datasetDownload | |||
} | |||
uuidList := strings.Split(uuid, ";") | |||
datasetnameList := strings.Split(datasetname, ";") | |||
for i, uuidStr := range uuidList { | |||
name := "" | |||
link := "" | |||
url := "" | |||
isDelete := false | |||
attachment, err := models.GetAttachmentByUUID(uuidStr) | |||
if err != nil { | |||
log.Error("GetAttachmentByUUID failed:%v", err.Error()) | |||
if len(datasetnameList) <= i || len(datasetname) == 0 { | |||
continue | |||
} | |||
name = datasetnameList[i] | |||
isDelete = true | |||
} else { | |||
name = attachment.Name | |||
dataset, err := models.GetDatasetByID(attachment.DatasetID) | |||
if err != nil { | |||
log.Error("GetDatasetByID failed:%v", err.Error()) | |||
} else { | |||
repo, err := models.GetRepositoryByID(dataset.RepoID) | |||
if err != nil { | |||
log.Error("GetRepositoryByID failed:%v", err.Error()) | |||
} else { | |||
link = repo.Link() + "/datasets" | |||
} | |||
} | |||
if isNeedDown { | |||
url = attachment.S3DownloadURL() | |||
} | |||
} | |||
datasetDownload = append(datasetDownload, &models.DatasetDownload{ | |||
DatasetName: name, | |||
DatasetDownloadLink: url, | |||
RepositoryLink: link, | |||
IsDelete: isDelete, | |||
UUID: uuidStr, | |||
}) | |||
} | |||
log.Info("dataset length=" + fmt.Sprint(len(datasetDownload))) | |||
return datasetDownload | |||
} | |||
//根据实际调度的智算中心修正规格 | |||
func correctAITaskSpec(task *models.Cloudbrain) { | |||
if task.AiCenter == "" { | |||
return | |||
} | |||
s := strings.Split(task.AiCenter, "+") | |||
if len(s) < 2 { | |||
return | |||
} | |||
realCenterCode := s[0] | |||
if realCenterCode == "" { | |||
return | |||
} | |||
oldSpec, err := models.GetCloudbrainSpecByID(task.ID) | |||
if err != nil { | |||
log.Error("correctAITaskSpec GetCloudbrainSpecByID err.taskId=%d err=%v", task.ID, err) | |||
return | |||
} | |||
if oldSpec == nil { | |||
log.Error("correctAITaskSpec GetCloudbrainSpecByID spec is empty.taskId=%d ", task.ID) | |||
return | |||
} | |||
if oldSpec.AiCenterCode == realCenterCode { | |||
return | |||
} | |||
//智算中心不一样时才需要处理 | |||
r, err := models.FindSpecs(models.FindSpecsOptions{ | |||
SourceSpecId: oldSpec.SourceSpecId, | |||
AiCenterCode: realCenterCode, | |||
}) | |||
if err != nil { | |||
log.Error("correctAITaskSpec FindSpecs err.taskId=%d err=%v", task.ID, err) | |||
return | |||
} | |||
if r == nil || len(r) == 0 { | |||
log.Error("correctAITaskSpec FindSpecs 0.taskId=%d ", task.ID) | |||
return | |||
} | |||
n, err := models.UpdateCloudbrainSpec(task.ID, r[0]) | |||
if err == nil && n > 0 { | |||
log.Info("correctAITaskSpec success,taskId=%d oldCenter=%s realCenter=%s", task.ID, oldSpec.AiCenterCode, realCenterCode) | |||
} | |||
} | |||
func getModelContainerLink(dataUrl string, ckptName string) string { | |||
if dataUrl == "" { | |||
return "" | |||
} | |||
datasetObsUrlList := make([]entity.NotebookDataset, 0) | |||
_ = json.Unmarshal([]byte(dataUrl), &datasetObsUrlList) | |||
for _, datasetObs := range datasetObsUrlList { | |||
if strings.Contains(datasetObs.DatasetUrl, ckptName) { | |||
return datasetObs.DatasetUrl | |||
} | |||
} | |||
return "" | |||
} |
@@ -1,14 +1,6 @@ | |||
package task | |||
import ( | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"net/http" | |||
"net/url" | |||
"strconv" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/convert" | |||
@@ -24,15 +16,30 @@ import ( | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
"code.gitea.io/gitea/services/lock" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"net/http" | |||
"net/url" | |||
"path" | |||
"strconv" | |||
"strings" | |||
) | |||
type QueryFunc func(string) (*entity.QueryTaskResponse, error) | |||
type QueryFunc func(opts entity.JobIdAndVersionId) (*entity.QueryTaskResponse, error) | |||
type QueryListFunc func(string) ([]*entity.QueryTaskResponse, error) | |||
type DeleteFunc func(string) error | |||
type StopFunc func(string) error | |||
type GetLogFunc func(string) (*entity.ClusterLog, error) | |||
type DeleteFunc func(opts entity.JobIdAndVersionId) error | |||
type StopFunc func(opts entity.JobIdAndVersionId) error | |||
type GetLogFunc func(entity.ClusterLogOpts) (*entity.ClusterLog, error) | |||
type GetLogDownloadInfoFunc func(entity.ClusterLogDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
type GetNotebookUrlFunc func(string) (string, error) | |||
type GetNodeInfoFunc func(entity.ClusterNodeInfoOpts) ([]entity.AITaskNodeInfo, error) | |||
type GetOutputFunc func(entity.ClusterOutputOpts) (*entity.ClusterAITaskOutput, error) | |||
type GetAllOutputFunc func(entity.ClusterOutputOpts) (*entity.AllAITaskOutput, error) | |||
type GetSingleOutputDownloadInfoFunc func(req entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
type GetAllOutputDownloadInfoFunc func(req entity.ClusterOutputDownloadInfoOpts) (*entity.FileDownloadInfo, error) | |||
type GetOperationProfileFunc func(string) (*entity.OperationProfile, error) | |||
type GetResourceUsageFunc func(entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error) | |||
func BuildAITaskInfo(cloudbrainId int64) (*entity.AITaskDetailInfo, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(cloudbrainId) | |||
@@ -49,10 +56,11 @@ func BuildAITaskByCloudbrain(cloudbrain *models.Cloudbrain) (*entity.AITaskDetai | |||
if err != nil { | |||
return nil, err | |||
} | |||
return buildAITaskInfo(cloudbrain, creator, GetAITaskConfigByCloudbrain(cloudbrain)) | |||
c := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
return buildAITaskInfo(cloudbrain, creator, c) | |||
} | |||
func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entity.AITaskConfig) (*entity.AITaskDetailInfo, error) { | |||
func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config *entity.AITaskDetailConfigInfo) (*entity.AITaskDetailInfo, error) { | |||
spec, err := resource.GetCloudbrainSpec(task.ID) | |||
if err != nil { | |||
log.Error("buildAITaskInfo GetCloudbrainSpec error,id =%d ,err =%v", task.ID, err) | |||
@@ -70,20 +78,23 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit | |||
if task.WorkServerNumber > 1 { | |||
n = task.WorkServerNumber | |||
} | |||
computeSource := "" | |||
c := models.GetComputeSourceInstance(task.ComputeResource) | |||
if c != nil { | |||
computeSource = c.Name | |||
} | |||
imageName := task.Image | |||
imageUrl := task.Image | |||
imageId := task.ImageID | |||
if imageName == "" { | |||
imageName = task.EngineName | |||
} | |||
if imageUrl == "" { | |||
imageUrl = task.EngineName | |||
} | |||
if imageId == "" && task.EngineID > 0 { | |||
imageId = fmt.Sprint(task.EngineID) | |||
} | |||
baseConfig := &entity.AITaskBaseConfig{} | |||
if config != nil && config.BaseConfig != nil { | |||
baseConfig = config.BaseConfig | |||
} | |||
return &entity.AITaskDetailInfo{ | |||
ID: task.ID, | |||
JobID: task.JobID, | |||
@@ -91,7 +102,7 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit | |||
JobType: task.JobType, | |||
DisplayJobName: task.DisplayJobName, | |||
FormattedDuration: task.TrainJobDuration, | |||
ComputeSource: computeSource, | |||
ComputeSource: task.GetStandardComputeSource(), | |||
PreVersionName: task.PreVersionName, | |||
CurrentVersionName: task.VersionName, | |||
WorkServerNumber: n, | |||
@@ -103,14 +114,16 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit | |||
Cluster: string(entity.GetClusterTypeFromCloudbrainType(task.Type)), | |||
Parameters: parseAITaskParameters(task.Parameters), | |||
CreatedUnix: task.CreatedUnix, | |||
CodePath: config.GetContainerPath(entity.ContainerCode), | |||
DatasetPath: config.GetContainerPath(entity.ContainerDataset), | |||
PretrainModelPath: config.GetContainerPath(entity.ContainerPreTrainModel), | |||
OutputPath: config.GetContainerPath(entity.ContainerOutPutPath), | |||
CodeUrl: task.TrainUrl, | |||
CodePath: baseConfig.GetContainerPath(entity.ContainerCode), | |||
DatasetPath: baseConfig.GetContainerPath(entity.ContainerDataset), | |||
PretrainModelPath: baseConfig.GetContainerPath(entity.ContainerPreTrainModel), | |||
OutputPath: baseConfig.GetContainerPath(entity.ContainerOutPutPath), | |||
CodeUrl: task.RemoteCodeUrl, | |||
PretrainModelName: task.ModelName, | |||
PretrainModelVersion: task.ModelVersion, | |||
PretrainCkptName: task.CkptName, | |||
PretrainModelUrl: task.PreTrainModelUrl, | |||
PretrainModelId: task.ModelId, | |||
StartTime: task.StartTime, | |||
EndTime: task.EndTime, | |||
Description: task.Description, | |||
@@ -122,6 +135,8 @@ func buildAITaskInfo(task *models.Cloudbrain, creator *models.User, config entit | |||
ImageUrl: imageUrl, | |||
CreatorName: creator.GetDisplayName(), | |||
EngineName: task.EngineName, | |||
UserId: task.UserID, | |||
AppName: task.AppName, | |||
}, nil | |||
} | |||
@@ -138,7 +153,7 @@ func parseAITaskParameters(paramStr string) *models.Parameters { | |||
return parameters | |||
} | |||
func QueryTaskEarlyVersionList(id int64, operatorId int64) ([]*entity.AITaskDetailInfo, error) { | |||
func QueryTaskEarlyVersionList(id int64) ([]*entity.AITaskDetailInfo, error) { | |||
task, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
return nil, err | |||
@@ -158,10 +173,6 @@ func QueryTaskEarlyVersionList(id int64, operatorId int64) ([]*entity.AITaskDeta | |||
log.Error("QueryTaskEarlyVersionList convertCloudbrainToAITaskDetailInfo err.id=%d currentId=%d err=%v", id, taskList[i].ID, err) | |||
return nil, err | |||
} | |||
if operatorId == 0 || taskList[i].UserID != operatorId { | |||
t.RemoveDatasets() | |||
t.RemovePretrainModelList() | |||
} | |||
resultList[i] = t | |||
} | |||
return resultList, nil | |||
@@ -178,7 +189,7 @@ func QueryTaskBriefInfo(id int64) (*entity.AITaskBriefInfo, error) { | |||
func UpdateAITaskFromRemote(task *models.Cloudbrain, remoteFunc QueryFunc) error { | |||
log.Info("start to UpdateAITaskFromRemote.task.DisplayJobName = %s task.Status = %s", task.DisplayJobName, task.Status) | |||
res, err := remoteFunc(task.JobID) | |||
res, err := remoteFunc(entity.JobIdAndVersionId{JobID: task.JobID, VersionID: task.VersionID}) | |||
log.Info("remoteQueryFunc task.DisplayJobName = %s res = %+v ", task.DisplayJobName, res) | |||
if err != nil { | |||
log.Error("query from remote err.cloudbrainID = %d err=%v", task.ID, err) | |||
@@ -215,7 +226,7 @@ func StopAITaskByJobNameFromRemote(task *models.Cloudbrain, queryFunc QueryListF | |||
if v.StartedAt < task.CreatedUnix-5*60 { | |||
continue | |||
} | |||
if err = stopFunc(v.JobId); err != nil { | |||
if err = stopFunc(entity.JobIdAndVersionId{JobID: v.JobId, VersionID: v.VersionId}); err != nil { | |||
log.Error("stop task err. name=%s jobId=%s err=%v", task.JobName, v.JobId) | |||
return err | |||
} | |||
@@ -258,7 +269,7 @@ func UpdateByQueryResponse(res *entity.QueryTaskResponse, task *models.Cloudbrai | |||
task.JobID = res.JobId | |||
task.TrainUrl = res.CodeUrl | |||
task.RemoteCodeUrl = res.CodeUrl | |||
task.DataUrl = res.DataUrl | |||
task.ContainerID = res.ContainerID | |||
task.ContainerIp = res.ContainerIP | |||
@@ -321,52 +332,6 @@ func isCloudbrainOneNotebookReady(jobId string) bool { | |||
} | |||
//根据实际调度的智算中心修正规格 | |||
func correctAITaskSpec(task *models.Cloudbrain) { | |||
if task.AiCenter == "" { | |||
return | |||
} | |||
s := strings.Split(task.AiCenter, "+") | |||
if len(s) < 2 { | |||
return | |||
} | |||
realCenterCode := s[0] | |||
if realCenterCode == "" { | |||
return | |||
} | |||
oldSpec, err := models.GetCloudbrainSpecByID(task.ID) | |||
if err != nil { | |||
log.Error("correctAITaskSpec GetCloudbrainSpecByID err.taskId=%d err=%v", task.ID, err) | |||
return | |||
} | |||
if oldSpec == nil { | |||
log.Error("correctAITaskSpec GetCloudbrainSpecByID spec is empty.taskId=%d ", task.ID) | |||
return | |||
} | |||
if oldSpec.AiCenterCode == realCenterCode { | |||
return | |||
} | |||
//智算中心不一样时才需要处理 | |||
r, err := models.FindSpecs(models.FindSpecsOptions{ | |||
SourceSpecId: oldSpec.SourceSpecId, | |||
AiCenterCode: realCenterCode, | |||
}) | |||
if err != nil { | |||
log.Error("correctAITaskSpec FindSpecs err.taskId=%d err=%v", task.ID, err) | |||
return | |||
} | |||
if r == nil || len(r) == 0 { | |||
log.Error("correctAITaskSpec FindSpecs 0.taskId=%d ", task.ID) | |||
return | |||
} | |||
n, err := models.UpdateCloudbrainSpec(task.ID, r[0]) | |||
if err == nil && n > 0 { | |||
log.Info("correctAITaskSpec success,taskId=%d oldCenter=%s realCenter=%s", task.ID, oldSpec.AiCenterCode, realCenterCode) | |||
} | |||
} | |||
func DelTask(id int64, deleteRemote DeleteFunc) error { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
@@ -379,7 +344,7 @@ func DelTask(id int64, deleteRemote DeleteFunc) error { | |||
//删除远端记录 | |||
if cloudbrain.JobID != "" { | |||
err = deleteRemote(cloudbrain.JobID) | |||
err = deleteRemote(entity.JobIdAndVersionId{JobID: cloudbrain.JobID, VersionID: cloudbrain.VersionID}) | |||
if err != nil { | |||
log.Error("delete from remote err.%v", err) | |||
return err | |||
@@ -409,22 +374,94 @@ func StopTask(id int64, stopRemote StopFunc) error { | |||
if cloudbrain.IsPreparing() || cloudbrain.IsCreating() { | |||
return nil | |||
} | |||
err = stopRemote(cloudbrain.JobID) | |||
err = stopRemote(entity.JobIdAndVersionId{JobID: cloudbrain.JobID, VersionID: cloudbrain.VersionID}) | |||
if err != nil { | |||
log.Error("stop from remote err.%v", err) | |||
return errors.New(response.STOP_FAILED.TrCode) | |||
} | |||
//返回数据 | |||
return nil | |||
} | |||
func QueryTaskLog(id int64, getLogRemote GetLogFunc) (*entity.ClusterLog, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id) | |||
//jobId string, baseLine int64, lines int64, order int64 | |||
func QueryTaskLog(opts entity.QueryLogOpts, getLogRemote GetLogFunc) (*entity.ClusterLog, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if cloudbrain.JobID == "" { | |||
return &entity.ClusterLog{ | |||
Content: "", | |||
}, nil | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
return getLogRemote(entity.ClusterLogOpts{ | |||
JobId: cloudbrain.JobID, | |||
BaseLine: opts.BaseLine, | |||
Lines: opts.Lines, | |||
Direction: opts.Order, | |||
ObjectKeyPrefix: aiConfig.LogObjectPrefix, | |||
StorageType: aiConfig.LogStorageType, | |||
VersionID: cloudbrain.VersionID, | |||
NodeId: opts.NodeId, | |||
LogFileName: opts.LogFileName, | |||
WorkServerNum: cloudbrain.WorkServerNumber, | |||
}) | |||
} | |||
func GetLogDownloadInfo(opts entity.GetLogDownloadInfoReq, getLogDownloadInfo GetLogDownloadInfoFunc) (*entity.FileDownloadInfo, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if cloudbrain.JobID == "" { | |||
return nil, nil | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
return getLogDownloadInfo(entity.ClusterLogDownloadInfoOpts{ | |||
JobId: cloudbrain.JobID, | |||
ObjectKeyPrefix: aiConfig.LogObjectPrefix, | |||
StorageType: aiConfig.LogStorageType, | |||
NodeId: opts.NodeId, | |||
LogFileName: opts.LogFileName, | |||
WorkServerNum: cloudbrain.WorkServerNumber, | |||
JobName: cloudbrain.JobName, | |||
DisplayJobName: cloudbrain.DisplayJobName, | |||
}) | |||
} | |||
func GetSingleOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq, f GetSingleOutputDownloadInfoFunc) (*entity.FileDownloadInfo, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if cloudbrain.JobID == "" { | |||
return nil, nil | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
fileRelativePath := path.Join(aiConfig.OutputObjectPrefix, opts.ParentDir, opts.FileName) | |||
return f(entity.ClusterOutputDownloadInfoOpts{ | |||
JobId: cloudbrain.JobID, | |||
Path: fileRelativePath, | |||
StorageType: aiConfig.OutputStorageType, | |||
}) | |||
} | |||
func GetAllOutputDownloadInfo(opts entity.GetOutputDownloadInfoReq, f GetAllOutputDownloadInfoFunc) (*entity.FileDownloadInfo, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return getLogRemote(cloudbrain.JobID) | |||
if cloudbrain.JobID == "" { | |||
return nil, nil | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
return f(entity.ClusterOutputDownloadInfoOpts{ | |||
JobId: cloudbrain.JobID, | |||
Path: aiConfig.OutputObjectPrefix, | |||
StorageType: aiConfig.OutputStorageType, | |||
JobName: cloudbrain.JobName, | |||
}) | |||
} | |||
func QueryNoteBookUrl(id int64, getNoteBookUrl GetNotebookUrlFunc, fileName string) (string, error) { | |||
@@ -449,6 +486,85 @@ func QueryNoteBookUrl(id int64, getNoteBookUrl GetNotebookUrlFunc, fileName stri | |||
return url, nil | |||
} | |||
func GetAITaskNodeInfo(id int64, getNodeInfo GetNodeInfoFunc) ([]entity.AITaskNodeInfo, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
return nil, err | |||
} | |||
res, err := getNodeInfo(entity.ClusterNodeInfoOpts{ | |||
JobId: cloudbrain.JobID, | |||
WorkServerNum: cloudbrain.WorkServerNumber, | |||
VersionId: cloudbrain.VersionID, | |||
}) | |||
if err != nil { | |||
log.Error("getNodeInfo error.id = %d err=%v", id, err) | |||
return nil, err | |||
} | |||
return res, nil | |||
} | |||
func GetAITaskOutput(id int64, parentDir string, getOutput GetOutputFunc) (*entity.AITaskOutput, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
return nil, err | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
res, err := getOutput(entity.ClusterOutputOpts{ | |||
JobId: cloudbrain.JobID, | |||
ObjectKeyPrefix: aiConfig.OutputObjectPrefix, | |||
StorageType: aiConfig.OutputStorageType, | |||
ParentDir: parentDir, | |||
}) | |||
if err != nil { | |||
log.Error("GetAITaskOutput getOutput from cluster error.id=%d parentDir=%s err=%v ", id, parentDir, err) | |||
return nil, err | |||
} | |||
return &entity.AITaskOutput{ | |||
Status: res.Status, | |||
Path: res.Path, | |||
FileList: res.FileList, | |||
IsTaskTerminal: cloudbrain.IsTerminal(), | |||
}, nil | |||
} | |||
func GetAllAITaskOutput(opts entity.GetAllOutputReq, getOutput GetAllOutputFunc) (*entity.AllAITaskOutput, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
aiConfig := GetDetailConfigInfoByCloudbrain(cloudbrain) | |||
res, err := getOutput(entity.ClusterOutputOpts{ | |||
JobId: cloudbrain.JobID, | |||
ObjectKeyPrefix: aiConfig.OutputObjectPrefix, | |||
StorageType: aiConfig.OutputStorageType, | |||
}) | |||
if err != nil { | |||
log.Error("GetAllAITaskOutput getOutput from cluster error.id=%d err=%v ", opts.CloudbrainId, err) | |||
return nil, err | |||
} | |||
return filterOutputFile(res, opts.Suffix), nil | |||
} | |||
func filterOutputFile(sourceOutput *entity.AllAITaskOutput, suffixArray []string) *entity.AllAITaskOutput { | |||
if len(suffixArray) == 0 || sourceOutput == nil || len(sourceOutput.FileList) == 0 { | |||
return sourceOutput | |||
} | |||
sourceFiles := sourceOutput.FileList | |||
var files = make([]storage.FileInfo, 0) | |||
for i := 0; i < len(sourceFiles); i++ { | |||
f := sourceFiles[i] | |||
for j := 0; j < len(suffixArray); j++ { | |||
if strings.HasSuffix(f.FileName, suffixArray[j]) { | |||
files = append(files, f) | |||
break | |||
} | |||
} | |||
} | |||
return &entity.AllAITaskOutput{FileList: files} | |||
} | |||
func transferFileNotebookUrl(oldUrl string, fileName string) string { | |||
u, err := url.Parse(oldUrl) | |||
if err != nil { | |||
@@ -478,6 +594,26 @@ func GetFileNoteBookDebugUrl(url string, filename string) string { | |||
return url + middle + filename | |||
} | |||
func GetResourceUsage(opts entity.GetResourceUsageOpts, fun GetResourceUsageFunc) (*entity.ResourceUsage, error) { | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(opts.CloudbrainId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if cloudbrain.JobID == "" { | |||
return &entity.ResourceUsage{}, nil | |||
} | |||
return fun(entity.ClusterResourceUsageOpts{ | |||
JobId: cloudbrain.JobID, | |||
StartTime: int64(cloudbrain.StartTime), | |||
EndTime: int64(cloudbrain.EndTime), | |||
NodeId: opts.NodeId, | |||
ComputeSource: models.GetComputeSourceStandardFormat(cloudbrain.ComputeResource), | |||
WorkServerNumber: cloudbrain.WorkServerNumber, | |||
VersionID: cloudbrain.VersionID, | |||
LogFileName: opts.LogFileName, | |||
}) | |||
} | |||
func CreateAITask(form entity.CreateReq, gitRepo *git.Repository, repo *models.Repository, user *models.User) (*entity.CreateTaskRes, *response.BizError) { | |||
t, err := GetAITaskTemplate(form.JobType, form.Cluster) | |||
if err != nil { | |||
@@ -495,18 +631,12 @@ func CreateAITask(form entity.CreateReq, gitRepo *git.Repository, repo *models.R | |||
log.Error("lock processed failed:%s", errMsg) | |||
return nil, response.BuildDefaultBizError(errMsg, errMsg) | |||
} | |||
c := models.GetComputeSourceInstance(form.ComputeSourceStr) | |||
if c == nil { | |||
log.Error("ComputeSourceStr invalid") | |||
return nil, response.PARAM_ERROR | |||
} | |||
config := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: c.Name}) | |||
return t.Create(&creation_context.CreationContext{ | |||
Request: &form, | |||
GitRepo: gitRepo, | |||
Repository: repo, | |||
User: user, | |||
AITaskConfig: config, | |||
Request: &form, | |||
GitRepo: gitRepo, | |||
Repository: repo, | |||
User: user, | |||
Config: t.GetConfig(entity.AITaskConfigKey{ComputeSource: form.ComputeSourceStr}), | |||
}) | |||
} | |||
@@ -532,18 +662,12 @@ func RestartAITask(cloudbrainId int64, gitRepo *git.Repository, repo *models.Rep | |||
log.Error("lock processed failed:%s", errMsg) | |||
return nil, response.BuildDefaultBizError(errMsg, errMsg) | |||
} | |||
c := models.GetComputeSourceInstance(cloudbrain.ComputeResource) | |||
if c == nil { | |||
log.Error("ComputeSourceStr invalid") | |||
return nil, response.PARAM_ERROR | |||
} | |||
config := t.GetConfig(entity.GetAITaskConfigOpts{ComputeSource: c.Name}) | |||
return t.Restart(&creation_context.CreationContext{ | |||
GitRepo: gitRepo, | |||
Repository: repo, | |||
User: user, | |||
SourceCloudbrain: cloudbrain, | |||
AITaskConfig: config, | |||
Config: t.GetConfig(entity.AITaskConfigKey{ComputeSource: models.GetComputeSourceStandardFormat(cloudbrain.ComputeResource)}), | |||
}) | |||
} | |||
@@ -558,6 +682,9 @@ func GetOperationProfile(id int64, getOperationProfile GetOperationProfileFunc) | |||
if errMsg != "" { | |||
defaultRes = &entity.OperationProfile{Events: []entity.ProfileEvent{{Reason: "Error", Message: errMsg}}} | |||
} | |||
if cloudbrain.JobID == "" { | |||
return defaultRes, nil | |||
} | |||
s, err := getOperationProfile(cloudbrain.JobID) | |||
if err != nil || s == nil { | |||
return defaultRes, nil | |||
@@ -648,80 +775,6 @@ func DelCloudbrain(task *models.Cloudbrain) *response.BizError { | |||
return t.Delete(task.ID) | |||
} | |||
func GetCloudBrainDataSetInfo(task *models.Cloudbrain) []*models.DatasetDownload { | |||
datasetDownload := getCloudBrainDatasetInfo4Local(task.Uuid, task.DatasetName, true) | |||
//非虎鲸的任务返回本地地址 | |||
if task.Type != models.TypeC2Net { | |||
return datasetDownload | |||
} | |||
//虎鲸的任务需要返回调度后的地址 | |||
datasetObsUrlList := make([]entity.NotebookDataset, 0) | |||
_ = json.Unmarshal([]byte(task.DataUrl), &datasetObsUrlList) | |||
for _, datasetInfo := range datasetDownload { | |||
datasetInfo.DatasetDownloadLink = "" | |||
for _, datasetObs := range datasetObsUrlList { | |||
log.Info("datasetObsUrl:" + datasetObs.DatasetUrl + "datasetName:" + datasetInfo.DatasetName) | |||
if strings.Contains(datasetObs.DatasetUrl, datasetInfo.DatasetName) { | |||
datasetInfo.DatasetDownloadLink = datasetObs.DatasetUrl | |||
break | |||
} | |||
} | |||
} | |||
return datasetDownload | |||
} | |||
func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown bool) []*models.DatasetDownload { | |||
datasetDownload := make([]*models.DatasetDownload, 0) | |||
if len(uuid) == 0 { | |||
return datasetDownload | |||
} | |||
uuidList := strings.Split(uuid, ";") | |||
datasetnameList := strings.Split(datasetname, ";") | |||
for i, uuidStr := range uuidList { | |||
name := "" | |||
link := "" | |||
url := "" | |||
isDelete := false | |||
attachment, err := models.GetAttachmentByUUID(uuidStr) | |||
if err != nil { | |||
log.Error("GetAttachmentByUUID failed:%v", err.Error()) | |||
if len(datasetnameList) <= i || len(datasetname) == 0 { | |||
continue | |||
} | |||
name = datasetnameList[i] | |||
isDelete = true | |||
} else { | |||
name = attachment.Name | |||
dataset, err := models.GetDatasetByID(attachment.DatasetID) | |||
if err != nil { | |||
log.Error("GetDatasetByID failed:%v", err.Error()) | |||
} else { | |||
repo, err := models.GetRepositoryByID(dataset.RepoID) | |||
if err != nil { | |||
log.Error("GetRepositoryByID failed:%v", err.Error()) | |||
} else { | |||
link = repo.Link() + "/datasets" | |||
} | |||
} | |||
if isNeedDown { | |||
url = attachment.S3DownloadURL() | |||
} | |||
} | |||
datasetDownload = append(datasetDownload, &models.DatasetDownload{ | |||
DatasetName: name, | |||
DatasetDownloadLink: url, | |||
RepositoryLink: link, | |||
IsDelete: isDelete, | |||
}) | |||
} | |||
log.Info("dataset length=" + fmt.Sprint(len(datasetDownload))) | |||
return datasetDownload | |||
} | |||
func HandleNewAITaskStop(cloudbrainId int64) (result *entity.AITaskBriefInfo, isHandled bool, err error) { | |||
task, err := models.GetCloudbrainByCloudbrainID(cloudbrainId) | |||
if err != nil { | |||
@@ -750,64 +803,3 @@ func HandleNewAITaskDelete(cloudbrainId int64) (isHandled bool, err error) { | |||
} | |||
return true, nil | |||
} | |||
func GetModelDownload(task *models.Cloudbrain) []*models.ModelDownload { | |||
var repositoryLink string | |||
pretrainModelList := []*models.ModelDownload{} | |||
ckptNames := strings.Split(task.CkptName, ";") | |||
var model *models.AiModelManage | |||
var err error | |||
if task.ModelId == "" { | |||
model, err = models.QueryModelByPath(task.PreTrainModelUrl) | |||
} else { | |||
model, err = models.QueryModelById(task.ModelId) | |||
} | |||
if err != nil || model == nil { | |||
return pretrainModelList | |||
} | |||
if r, err := models.QueryModelRepoByModelID(model.ID); err == nil { | |||
repositoryLink = r.Link() | |||
} | |||
for _, ckptName := range ckptNames { | |||
var url string | |||
if task.Type == models.TypeC2Net { | |||
url = getModelContainerLink(task.DataUrl, ckptName) | |||
} else { | |||
url = getModelLocalLink(model, ckptName) | |||
} | |||
modelDownload := models.ModelDownload{ | |||
Name: ckptName, | |||
DownloadLink: url, | |||
IsDelete: false, | |||
} | |||
if hasModelFileDeleted(task.ModelId, ckptName) { | |||
log.Warn("Can not get model by path:" + url) | |||
modelDownload.IsDelete = true | |||
} | |||
modelDownload.RepositoryLink = repositoryLink | |||
pretrainModelList = append(pretrainModelList, &modelDownload) | |||
} | |||
return pretrainModelList | |||
} | |||
func getModelLocalLink(model *models.AiModelManage, ckptName string) string { | |||
index := strings.Index(model.Path, "/") | |||
key := model.Path[index+1:] + ckptName | |||
url, _ := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, key) | |||
return url | |||
} | |||
func getModelContainerLink(dataUrl string, ckptName string) string { | |||
if dataUrl == "" { | |||
return "" | |||
} | |||
datasetObsUrlList := make([]entity.NotebookDataset, 0) | |||
_ = json.Unmarshal([]byte(dataUrl), &datasetObsUrlList) | |||
for _, datasetObs := range datasetObsUrlList { | |||
if strings.Contains(datasetObs.DatasetUrl, ckptName) { | |||
return datasetObs.DatasetUrl | |||
} | |||
} | |||
return "" | |||
} |
@@ -1,29 +0,0 @@ | |||
package upload | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
) | |||
type UploaderConfig struct { | |||
Bucket string | |||
Endpoint string | |||
} | |||
type Uploader interface { | |||
UploadDir(codePath, jobName string) error | |||
GetRealPath(objectKey string) string | |||
GetBucket() string | |||
GetEndpoint() string | |||
GetJobDefaultObjectKeyPrefix(jobName string) string | |||
MKDIR(path string) error | |||
} | |||
func SelectUploaderFromStorageType(storageType entity.StorageType) Uploader { | |||
switch storageType { | |||
case entity.OBS: | |||
return &OBSUploader{} | |||
case entity.MINIO: | |||
return &MinioUploader{} | |||
} | |||
return nil | |||
} |
@@ -1,40 +0,0 @@ | |||
package upload | |||
import ( | |||
"bytes" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"strings" | |||
) | |||
type MinioUploader struct { | |||
} | |||
func (m *MinioUploader) UploadDir(codePath, objectKeyPrefix string) error { | |||
return UploadDirToMinio(codePath, objectKeyPrefix, "") | |||
} | |||
func (m *MinioUploader) GetJobDefaultObjectKeyPrefix(jobName string) string { | |||
return setting.CBCodePathPrefix + jobName | |||
} | |||
func (m *MinioUploader) GetRealPath(objectKey string) string { | |||
return setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + strings.TrimPrefix(objectKey, "/") | |||
} | |||
func (m *MinioUploader) GetBucket() string { | |||
return setting.Attachment.Minio.Bucket | |||
} | |||
func (m *MinioUploader) GetEndpoint() string { | |||
return setting.Attachment.Minio.Endpoint | |||
} | |||
const README = "README" | |||
func (m *MinioUploader) MKDIR(path string) error { | |||
//无法直接创建空文件夹,上传一个readme文件模拟 | |||
path = strings.TrimSuffix(path, "/") + "/" + README | |||
val := "You can put the files into this directory and download the files by the web page." | |||
_, err := storage.Attachments.UploadContent(m.GetBucket(), path, bytes.NewReader([]byte(val))) | |||
return err | |||
} |
@@ -1,46 +0,0 @@ | |||
package upload | |||
import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/obs" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"strings" | |||
) | |||
type OBSUploader struct { | |||
} | |||
func (m *OBSUploader) UploadDir(codePath, objectKeyPrefix string) error { | |||
return UploadDirToObs(codePath, objectKeyPrefix, "") | |||
} | |||
func (m *OBSUploader) GetJobDefaultObjectKeyPrefix(jobName string) string { | |||
return setting.CodePathPrefix + jobName | |||
} | |||
func (m *OBSUploader) GetRealPath(objectKey string) string { | |||
return "" | |||
} | |||
func (m *OBSUploader) GetBucket() string { | |||
return setting.Bucket | |||
} | |||
func (m *OBSUploader) MKDIR(path string) error { | |||
path = strings.TrimSuffix(path, "/") + "/" | |||
input := &obs.PutObjectInput{} | |||
input.Bucket = setting.Bucket | |||
input.Key = path | |||
_, err := storage.ObsCli.PutObject(input) | |||
if err != nil { | |||
log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (m *OBSUploader) GetEndpoint() string { | |||
index := strings.Index(setting.Endpoint, "//") | |||
endpoint := setting.Endpoint[index+2:] | |||
return endpoint | |||
} |
@@ -28,3 +28,18 @@ func IsModelFileExists(model *models.AiModelManage, fileName string) bool { | |||
} | |||
return false | |||
} | |||
func CheckAndGetFileSize(model *models.AiModelManage, fileName string) (bool, int64) { | |||
if model.Type == models.TypeCloudBrainTwo { | |||
key := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" + fileName | |||
log.Info("IsModelFileExists TypeCloudBrainTwo key=%s", key) | |||
return storage.ObsCheckAndGetFileSize(setting.Bucket, key) | |||
} else if model.Type == models.TypeCloudBrainOne { | |||
prefix := models.AIModelPath + models.AttachmentRelativePath(model.ID) + "/" | |||
objectName := prefix + fileName | |||
log.Info("IsModelFileExists TypeCloudBrainOne objectName=%s", objectName) | |||
return storage.MinioCheckAndGetFileSize(setting.Attachment.Minio.Bucket, objectName) | |||
} | |||
return false, 0 | |||
} |
@@ -18,7 +18,7 @@ import ( | |||
var noteBookOKMap = make(map[int64]int, 20) | |||
var noteBookFailMap = make(map[int64]int, 20) | |||
//if a task notebook url can get successfulCount times, the notebook can browser. | |||
// if a task notebook url can get successfulCount times, the notebook can browser. | |||
const successfulCount = 3 | |||
const maxSuccessfulCount = 10 | |||
@@ -169,6 +169,11 @@ func StopDebugJob(task *models.Cloudbrain) error { | |||
} | |||
} | |||
if task.JobType == string(models.JobTypeSuperCompute) { | |||
_, err = grampus.StopJob(task.JobID, task.JobType) | |||
} | |||
return err | |||
} |
@@ -51,7 +51,7 @@ func AdminBalanceOperate(req models.AdminRewardOperateReq, doer *models.User) er | |||
return nil | |||
} | |||
func GetCSVFailedDatas(user *models.User, records [][]string) ([]models.CSVFailedData, int) { | |||
func GetCSVFailedDatas(admimUser *models.User, records [][]string) ([]models.CSVFailedData, int) { | |||
var SuccessNum int | |||
var CSVFailedDatas []models.CSVFailedData | |||
var RewardOperateType models.RewardOperateType | |||
@@ -77,12 +77,12 @@ func GetCSVFailedDatas(user *models.User, records [][]string) ([]models.CSVFaile | |||
} | |||
targetUserId, _ := strconv.ParseInt(userId, 10, 64) | |||
user, err := models.GetUserByID(targetUserId) | |||
targetUser, err := models.GetUserByID(targetUserId) | |||
if err != nil { | |||
CSVFailedDatas = append(CSVFailedDatas, models.CSVFailedData{pointUser, "userid not found"}) | |||
continue | |||
} | |||
if userName != user.Name { | |||
if userName != targetUser.Name { | |||
CSVFailedDatas = append(CSVFailedDatas, models.CSVFailedData{pointUser, "userName not found"}) | |||
continue | |||
} | |||
@@ -110,7 +110,7 @@ func GetCSVFailedDatas(user *models.User, records [][]string) ([]models.CSVFaile | |||
Amount: amountNum, | |||
Remark: remark, | |||
RewardType: models.RewardTypePoint, | |||
}, user) | |||
}, admimUser) | |||
if err != nil { | |||
log.Error("OperatePointAccountBalance error.%v", err) | |||
CSVFailedDatas = append(CSVFailedDatas, models.CSVFailedData{pointUser, "OperatePointAccountBalance error."}) | |||
@@ -10,7 +10,7 @@ import ( | |||
"github.com/elliotchance/orderedmap" | |||
) | |||
var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46} | |||
var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47} | |||
type ClientsManager struct { | |||
Clients *orderedmap.OrderedMap | |||
@@ -89,7 +89,7 @@ | |||
<div class="row"> | |||
<!-- 任务名 --> | |||
{{$JobID := '0'}} | |||
{{if eq .JobType "DEBUG" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "MODELSAFETY" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}} | |||
{{if eq .JobType "DEBUG" "TRAIN" "HPC" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "MODELSAFETY" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}} | |||
{{$JobID = .Cloudbrain.ID}} | |||
{{else}} | |||
{{$JobID = .JobID}} | |||
@@ -103,6 +103,13 @@ | |||
<span class="fitted" | |||
style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span> | |||
</a> | |||
{{else if eq .JobType "HPC"}} | |||
<a class="title" | |||
href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/supercompute/job/{{$JobID}}" | |||
title="{{.DisplayJobName}}" style="font-size: 14px;padding-right:0px"> | |||
<span class="fitted" | |||
style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span> | |||
</a> | |||
{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE") (eq .JobType "SNN4ECOSET") (eq .JobType "SIM2BRAIN_SNN")}} | |||
<a class="title" | |||
href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/cloudbrain/benchmark/{{$JobID}}" | |||
@@ -148,8 +155,9 @@ | |||
<div class="two wide column text center nowrap" | |||
style="width: 6% !important;"> | |||
<span class="job-status" id="{{$JobID}}" | |||
data-repopath='{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "DEBUG" "ONLINEINFERENCE"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "INFERENCE"}}/modelarts/inference-job{{else if eq .JobType "TRAIN"}}/modelarts/train-job{{else if eq .JobType "BENCHMARK" "MODELSAFETY"}}/cloudbrain{{end}}' | |||
data-jobid="{{$JobID}}" data-version="{{.VersionName}}"> | |||
data-repopath='{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "DEBUG" "ONLINEINFERENCE" "HPC"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "INFERENCE"}}/modelarts/inference-job{{else if eq .JobType "TRAIN"}}/modelarts/train-job{{else if eq .JobType "BENCHMARK" "MODELSAFETY"}}/cloudbrain{{end}}' | |||
data-jobid="{{$JobID}}" data-version="{{.VersionName}}" | |||
data-cloudbrainid="{{.Cloudbrain.ID}}"> | |||
<span><i id="{{$JobID}}-icon" style="vertical-align: middle;" | |||
class="{{.Status}}"></i><span id="{{$JobID}}-text" | |||
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||
@@ -239,7 +247,7 @@ | |||
{{$.CsrfTokenHtml}} | |||
{{if eq .Status "RUNNING" "WAITING" "CREATING" "STARTING"}} | |||
<a style="margin: 0 1rem;" id="ai-debug-{{$JobID}}" | |||
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING"}}disabled {{else}}blue {{end}}button' | |||
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING" "CREATED_FAILED"}}disabled {{else}}blue {{end}}button' | |||
data-jobid="{{$JobID}}" | |||
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'> | |||
{{$.i18n.Tr "repo.debug"}} | |||
@@ -247,7 +255,7 @@ | |||
{{else}} | |||
{{if not .BootFile}} | |||
<a id="ai-debug-{{$JobID}}" | |||
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING"}} disabled {{else}}blue {{end}}button' | |||
class='ui basic ai_debug {{if eq .Status "CREATING" "STOPPING" "WAITING" "STARTING" "CREATED_FAILED"}} disabled {{else}}blue {{end}}button' | |||
data-jobid="{{$JobID}}" | |||
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'> | |||
{{$.i18n.Tr "repo.debug_again"}} | |||
@@ -269,6 +277,19 @@ | |||
</a> | |||
</form> | |||
</div> | |||
{{end}} | |||
{{if eq .JobType "HPC"}} | |||
<div class="ui compact buttons"> | |||
<form id="debugAgainForm-{{$JobID}}"> | |||
{{$.CsrfTokenHtml}} | |||
<a id="ai-debug-infer-{{$JobID}}" | |||
class='ui basic ai_debug {{if eq .Status "RUNNING"}} blue {{else}} disabled {{end}}button' | |||
data-jobid="{{$JobID}}" | |||
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/'> | |||
{{$.i18n.Tr "repo.start_use"}} | |||
</a> | |||
</form> | |||
</div> | |||
{{end}} | |||
<!-- 停止任务 --> | |||
<div class="ui compact buttons"> | |||
@@ -276,18 +297,18 @@ | |||
<form id="stopForm-{{$JobID}}" style="margin-left:-1px;"> | |||
{{$.CsrfTokenHtml}} | |||
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}" | |||
class='ui basic ai_stop {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED" "STOPPING"}}disabled {{else}} blue {{end}}button' | |||
class='ui basic ai_stop {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button' | |||
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/modelsafety/{{$JobID}}/stop' | |||
data-jobid="{{$JobID}}"> | |||
{{$.i18n.Tr "repo.stop"}} | |||
</a> | |||
</form> | |||
{{else}} | |||
{{if eq .JobType "DEBUG" "BENCHMARK" "SNN4IMAGENET" "BRAINSCORE" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}} | |||
{{if eq .JobType "DEBUG" "HPC" "BENCHMARK" "SNN4IMAGENET" "BRAINSCORE" "SNN4ECOSET" "SIM2BRAIN_SNN" "ONLINEINFERENCE"}} | |||
<form id="stopForm-{{$JobID}}" style="margin-left:-1px;"> | |||
{{$.CsrfTokenHtml}} | |||
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}" | |||
class='ui basic ai_stop {{if eq .Status "KILLED" "FAILED" "CREATE_FAILED" "CREATED_FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED" "STOPPING"}}disabled {{else}} blue {{end}}button' | |||
class='ui basic ai_stop {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button' | |||
data-repopath='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else if eq .JobType "BENCHMARK" }}/cloudbrain/benchmark{{else if eq .ComputeResource "NPU" }}/modelarts/notebook{{end}}{{end}}/{{$JobID}}/stop' | |||
data-jobid="{{$JobID}}" data-bootfile="{{.BootFile}}"> | |||
{{$.i18n.Tr "repo.stop"}} | |||
@@ -295,9 +316,11 @@ | |||
</form> | |||
{{else}} | |||
<a style="padding: 0.5rem 1rem;" id="ai-stop-{{$JobID}}" | |||
class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "CREATE_FAILED" "CREATED_FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" | |||
class="ui basic ai_stop_version {{if eq .Status "RUNNING" "WAITING"}} blue {{else}} disabled {{end}} button" | |||
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}/{{if eq .JobType "INFERENCE"}}{{if eq .Cloudbrain.Type 1}}modelarts/inference-job{{else}}cloudbrain/train-job{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}modelarts/train-job{{else if eq .Cloudbrain.Type 0}}cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}grampus/train-job{{end}}{{end}}" | |||
data-jobid="{{$JobID}}" data-version="{{.VersionName}}"> | |||
data-jobid="{{$JobID}}" | |||
data-cloudbrainid="{{.Cloudbrain.ID}}" | |||
data-version="{{.VersionName}}"> | |||
{{$.i18n.Tr "repo.stop"}} | |||
</a> | |||
{{end}} | |||
@@ -306,7 +329,7 @@ | |||
<!-- 修改任务 --> | |||
{{if and (eq .JobType "TRAIN") (not .FineTune)}} | |||
<div class="ui compact buttons __btn_edit__"> | |||
<a style="padding: 0.5rem 1rem;" class="ui basic blue button" href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}/grampus/train-job/{{.JobID}}{{end}}/create_version{{if .VersionName}}?version_name={{.VersionName}}{{end}}"> | |||
<a style="padding: 0.5rem 1rem;" class="ui basic blue button" href="{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job/{{ToLower .ComputeResource}}{{end}}/create?modify=true&id={{$JobID}}"> | |||
{{$.i18n.Tr "repo.modelarts.modify"}} | |||
</a> | |||
</div> | |||
@@ -317,6 +340,7 @@ | |||
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}/modelsafety/{{$JobID}}/del?isadminpage=true' | |||
method="post"> | |||
{{$.CsrfTokenHtml}} | |||
<input type="hidden" value="{{.Cloudbrain.ID}}" style="display:none" name="id" /> | |||
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{$JobID}}" | |||
class="ui basic ai_delete blue button" | |||
style="border-radius: .28571429rem;"> | |||
@@ -325,12 +349,15 @@ | |||
</form> | |||
{{else}} | |||
<form class="ui compact buttons" id="delForm-{{$JobID}}" | |||
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "BENCHMARK"}}/cloudbrain/benchmark{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE") (eq .JobType "SNN4ECOSET") (eq .JobType "SIM2BRAIN_SNN")}}/cloudbrain{{else if eq .JobType "DEBUG" "ONLINEINFERENCE"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}{{else if eq .JobType "INFERENCE"}}{{if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{end}}{{end}}/{{$JobID}}/del?isadminpage=true' | |||
action='{{AppSubUrl}}/{{.Repo.OwnerName}}/{{.Repo.Name}}{{if eq .JobType "BENCHMARK"}}/cloudbrain/benchmark{{else if or (eq .JobType "SNN4IMAGENET") (eq .JobType "BRAINSCORE") (eq .JobType "SNN4ECOSET") (eq .JobType "SIM2BRAIN_SNN")}}/cloudbrain{{else if eq .JobType "DEBUG" "ONLINEINFERENCE" "HPC"}}{{if eq .Cloudbrain.Type 2}}/grampus/notebook{{else}}{{if eq .ComputeResource "CPU/GPU"}}/cloudbrain{{else}}/modelarts/notebook{{end}}{{end}}{{else if eq .JobType "TRAIN"}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}{{else if eq .JobType "INFERENCE"}}{{if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{end}}{{end}}/{{$JobID}}/del?isadminpage=true' | |||
method="post"> | |||
{{$.CsrfTokenHtml}} | |||
{{$.CsrfTokenHtml}} | |||
<input type="hidden" value="{{.Cloudbrain.ID}}" style="display:none" name="id" /> | |||
<a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{$JobID}}" | |||
data-repopath="{{.Repo.OwnerName}}/{{.Repo.Name}}/modelarts/inference-job/{{$JobID}}/del_version?isadminpage=true" | |||
data-version="{{.VersionName}}" class="ui basic ai_delete blue button" | |||
data-version="{{.VersionName}}" | |||
data-cloudbrainid="{{.Cloudbrain.ID}}" | |||
class="ui basic ai_delete blue button" | |||
style="border-radius: .28571429rem;"> | |||
{{$.i18n.Tr "repo.delete"}} | |||
</a> | |||
@@ -341,7 +368,7 @@ | |||
</div> | |||
{{else}} | |||
{{$JobID := '0'}} | |||
{{if eq .JobType "DEBUG" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "SNN4ECOSET" "SIM2BRAIN_SNN"}} | |||
{{if eq .JobType "DEBUG" "TRAIN" "SNN4IMAGENET" "BRAINSCORE" "BENCHMARK" "SNN4ECOSET" "SIM2BRAIN_SNN"}} | |||
{{$JobID = .Cloudbrain.ID}} | |||
{{else}} | |||
{{$JobID = .JobID}} | |||
@@ -381,7 +408,7 @@ | |||
<div class="two wide column text center nowrap" | |||
style="width: 6% !important;"> | |||
<span class="job-status" id="{{$JobID}}" data-jobid="{{$JobID}}" | |||
data-version="{{.VersionName}}"> | |||
data-version="{{.VersionName}}" data-cloudbrainid="{{.Cloudbrain.ID}}"> | |||
<span><i id="{{$JobID}}-icon" style="vertical-align: middle;" | |||
class="{{.Status}}"></i><span id="{{$JobID}}-text" | |||
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||
@@ -38,6 +38,7 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=MODELSAFETY&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="MODELSAFETY">MODELSAFETY</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=SNN4ECOSET&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SNN4ECOSET">SNN4ECOSET</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=SIM2BRAIN_SNN&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SIM2BRAIN_SNN">SIM2BRAIN_SNN</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=HPC&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="HPC">HPC</a> | |||
</div> | |||
</div> | |||
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;"> | |||
@@ -50,6 +51,7 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=GCU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="GCU">GCU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=MLU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="MLU">MLU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=DCU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="DCU">DCU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=CPU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CPU">CPU</a> | |||
</div> | |||
</div> | |||
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;"> | |||
@@ -67,6 +69,7 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=COMPLETED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="COMPLETED">COMPLETED</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=SUCCEEDED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SUCCEEDED">SUCCEEDED</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="FAILED">FAILED </a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=CREATED_FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CREATED_FAILED">CREATED_FAILED </a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=other&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="OTHER">OTHER</a> | |||
</div> | |||
@@ -90,7 +93,7 @@ | |||
for (var i = 0, iLen = data.length; i < iLen; i++) { | |||
var dataI = data[i]; | |||
var aiCenterCode = dataI.AiCenterCode; | |||
var aiCenterName = dataI.AiCenterName; | |||
var aiCenterName = dataI.AiCenterName || dataI.AiCenterCode; | |||
var itemClone = itemEl.clone(); | |||
var oHref = itemClone.attr('href'); | |||
var oId = itemClone.attr('id'); | |||
@@ -48,7 +48,8 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=BENCHMARK&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="BENCHMARK">{{.i18n.Tr "cloudbrain.BENCHMARK"}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=ONLINEINFERENCE&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="ONLINEINFERENCE">{{.i18n.Tr "cloudbrain.ONLINEINFERENCE"}}</a> | |||
<!-- <a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=SNN4IMAGENET&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="BENCHMARK">{{.i18n.Tr "cloudbrain.SNN4IMAGENET"}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=BRAINSCORE&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="BENCHMARK">{{.i18n.Tr "cloudbrain.BRAINSCORE"}}</a> --> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=BRAINSCORE&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="BENCHMARK">{{.i18n.Tr "cloudbrain.BRAINSCORE"}}</a> --> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType=HPC&listType={{$.ListType}}&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="HPC">{{.i18n.Tr "repo.superComputeTask"}}</a> | |||
</div> | |||
</div> | |||
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;"> | |||
@@ -61,6 +62,7 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=GCU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="GCU">GCU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=MLU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="MLU">MLU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=DCU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="DCU">DCU</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType=CPU&jobStatus={{$.JobStatus}}&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CPU">CPU</a> | |||
</div> | |||
</div> | |||
<div class="ui selection dropdown" style="min-width: 10em;min-height:2.6em;border-radius: .28571429rem;margin-right: 1em;padding: .67em 3.2em .7em 1em;"> | |||
@@ -78,6 +80,7 @@ | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=COMPLETED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="COMPLETED">COMPLETED</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=SUCCEEDED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="SUCCEEDED">SUCCEEDED</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="FAILED">FAILED </a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=CREATED_FAILED&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="CREATED_FAILED">CREATED_FAILED </a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&jobType={{$.JobType}}&listType={{$.ListType}}&jobStatus=other&cluster={{$.cluster}}&aiCenter={{$.aiCenter}}" data-value="OTHER">OTHER</a> | |||
</div> | |||
@@ -104,7 +107,7 @@ | |||
for (var i = 0, iLen = data.length; i < iLen; i++) { | |||
var dataI = data[i]; | |||
var aiCenterCode = dataI.AiCenterCode; | |||
var aiCenterName = dataI.AiCenterName; | |||
var aiCenterName = dataI.AiCenterName || dataI.AiCenterCode; | |||
var itemClone = itemEl.clone(); | |||
var oHref = itemClone.attr('href'); | |||
var oId = itemClone.attr('id'); | |||
@@ -1,273 +1 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width { | |||
width: 100% !important; | |||
} | |||
.width48 { | |||
width: 48.5% !important; | |||
} | |||
.width80 { | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width806 { | |||
width: 80.6% !important; | |||
margin-left: -2px; | |||
} | |||
.width85 { | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81 { | |||
margin-left: 1.5rem !important; | |||
width: 81% !important; | |||
} | |||
.add { | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min { | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="action" value="update"> | |||
<input type="hidden" id="ai_image_name" value="{{.image}}"> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" | |||
height="16"> | |||
<path fill="none" d="M0 0h24v24H0z" /> | |||
<path | |||
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" /> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/modelarts/train-job/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" | |||
height="16"> | |||
<path fill="none" d="M0 0h24v24H0z" /> | |||
<path | |||
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" /> | |||
</svg> | |||
Ascend NPU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "ckpt_url" "/model" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" | |||
placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" | |||
tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required | |||
maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;" | |||
for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!--{{template "custom/select_model" .}} --> | |||
<div> | |||
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" | |||
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div> | |||
<div id="select-multi-model"></div> | |||
</div> | |||
<div class="inline required field" style="display: none;"> | |||
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label> | |||
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px' | |||
name="job_type"> | |||
<option name="job_type" value="TRAIN">TRAIN</option> | |||
</select> | |||
</div> | |||
<div id="images-new-cb"> | |||
</div> | |||
<div class="inline field min_title required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" | |||
tabindex="3" autofocus required maxlength="255"> | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" | |||
autofocus required maxlength="255"> | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" | |||
data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} | |||
data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" | |||
style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i | |||
class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label> | |||
<select id="__specs__" class="ui dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}" | |||
{{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}} | |||
name="spec_id"> | |||
</select> | |||
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini" ></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link" ></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
{{if not .IsCreate}} | |||
<div class="inline min_title field" > | |||
<label class="label-fix-width"></label> | |||
<div class="ui checkbox" style="margin-right:1rem"> | |||
<input type="checkbox" name="is_continue" value="true"> | |||
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label> | |||
</div> | |||
<span > | |||
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/inference.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
{{end}} | |||
<div class="inline field" style="padding: 1rem 0;"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" | |||
href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .train_specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> | |||
{{ template "repo/cloudbrain/cloudbraincreate" .}} |
@@ -0,0 +1,273 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width { | |||
width: 100% !important; | |||
} | |||
.width48 { | |||
width: 48.5% !important; | |||
} | |||
.width80 { | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width806 { | |||
width: 80.6% !important; | |||
margin-left: -2px; | |||
} | |||
.width85 { | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81 { | |||
margin-left: 1.5rem !important; | |||
width: 81% !important; | |||
} | |||
.add { | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min { | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="action" value="update"> | |||
<input type="hidden" id="ai_image_name" value="{{.image}}"> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" | |||
height="16"> | |||
<path fill="none" d="M0 0h24v24H0z" /> | |||
<path | |||
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" /> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/modelarts/train-job/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" | |||
height="16"> | |||
<path fill="none" d="M0 0h24v24H0z" /> | |||
<path | |||
d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z" /> | |||
</svg> | |||
Ascend NPU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "ckpt_url" "/model" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" | |||
placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" | |||
tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required | |||
maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;" | |||
for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!--{{template "custom/select_model" .}} --> | |||
<div> | |||
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" | |||
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div> | |||
<div id="select-multi-model"></div> | |||
</div> | |||
<div class="inline required field" style="display: none;"> | |||
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label> | |||
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px' | |||
name="job_type"> | |||
<option name="job_type" value="TRAIN">TRAIN</option> | |||
</select> | |||
</div> | |||
<div id="images-new-cb"> | |||
</div> | |||
<div class="inline field min_title required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" | |||
tabindex="3" autofocus required maxlength="255"> | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" | |||
autofocus required maxlength="255"> | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" | |||
data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} | |||
data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" | |||
style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i | |||
class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_specification"}}</label> | |||
<select id="__specs__" class="ui dropdown width48" placeholder="{{.i18n.Tr "cloudbrain.select_specification"}}" ovalue="{{.spec_id}}" | |||
{{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}} | |||
name="spec_id"> | |||
</select> | |||
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini" ></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link" ></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
{{if not .IsCreate}} | |||
<div class="inline min_title field" > | |||
<label class="label-fix-width"></label> | |||
<div class="ui checkbox" style="margin-right:1rem"> | |||
<input type="checkbox" name="is_continue" value="true"> | |||
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label> | |||
</div> | |||
<span > | |||
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/inference.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
{{end}} | |||
<div class="inline field" style="padding: 1rem 0;"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" | |||
href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .train_specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> |
@@ -1,746 +1 @@ | |||
{{template "base/head" .}} | |||
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css"> | |||
<style> | |||
.model_file_bread { | |||
margin-bottom: -0.5rem !important; | |||
padding-left: 1rem; | |||
padding-top: 0.5rem; | |||
} | |||
.menuContent{ | |||
position: absolute; | |||
background: #ffffff; | |||
left: 0; | |||
right: 26px; | |||
top: 36px; | |||
z-index:999; | |||
border: 1px solid #96c8da; | |||
border-top: 0; | |||
border-bottom-right-radius: 4px; | |||
border-bottom-left-radius: 4px; | |||
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%); | |||
} | |||
</style> | |||
<div id="mask"> | |||
<div id="loadingPage"> | |||
<div class="rect1"></div> | |||
<div class="rect2"></div> | |||
<div class="rect3"></div> | |||
<div class="rect4"></div> | |||
<div class="rect5"></div> | |||
</div> | |||
</div> | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<h4 class="ui header" id="vertical-segment"> | |||
<div class="ui breadcrumb"> | |||
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all"> | |||
{{.i18n.Tr "repo.cloudbrain"}} | |||
</a> | |||
<div class="divider"> / </div> | |||
<a class="section" href="{{$.RepoLink}}/modelarts/train-job"> | |||
{{$.i18n.Tr "repo.modelarts.train_job"}} | |||
</a> | |||
<div class="divider"> / </div> | |||
<div class="active section">{{.displayJobName}}</div> | |||
</div> | |||
</h4> | |||
{{range $k ,$v := .version_list_task}} | |||
<div class="ui accordion border-according" id="accordion{{.VersionName}}" | |||
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}"> | |||
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}"> | |||
<div class="{{if eq $k 0}}active{{end}} title padding0"> | |||
<div class="according-panel-heading"> | |||
<div class="accordion-panel-title"> | |||
<i class="dropdown icon"></i> | |||
<span class="accordion-panel-title-content"> | |||
<span> | |||
<div style="float: right;"> | |||
{{if and ($.canDownload) (ne .Status "WAITING") }} | |||
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model" | |||
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a> | |||
{{else}} | |||
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a> | |||
{{end}} | |||
{{if and ($.canDownload) (ne .Status "WAITING") }} | |||
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model"> | |||
{{$.i18n.Tr "repo.export_result_to_dataset"}} | |||
<div class="export-popup" id="{{.VersionName}}-popup"> | |||
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div> | |||
</div> | |||
</a> | |||
{{else}} | |||
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a> | |||
{{end}} | |||
</div> | |||
<div class="ac-display-inblock title_text acc-margin-bottom"> | |||
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}: | |||
<span id="{{.VersionName}}-status-span"><i id="icon" | |||
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" | |||
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||
</span> | |||
<span | |||
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span> | |||
<span class="cti-mgRight-sm uc-accordionTitle-black" | |||
id="{{.VersionName}}-duration-span">{{$.duration}}</span> | |||
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}"><i | |||
class="redo icon redo-color"></i></span> | |||
</div> | |||
</span> | |||
</span> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="{{if eq $k 0}}active{{end}} content"> | |||
<div class="content-pad"> | |||
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | |||
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item detail-log-tab" data-tab="third{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
<a class="item load-model-file" data-tab="four{{$k}}" data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||
</div> | |||
<div class="ui tab active" data-tab="first{{$k}}"> | |||
<div style="padding-top: 10px;"> | |||
<div class="tab_2_content"> | |||
<div class="ac-grid ac-grid-col2"> | |||
<div class="ac-grid-col"> | |||
<table class="ti-form"> | |||
<tbody class="ti-text-form"> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.cloudbrain_task"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.DisplayJobName}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.status"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-status"> | |||
{{.Status}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.cloudbrain_creator"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-mirror"> | |||
{{.User.Name}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
<span style="font-size: 12px;" id="{{.VersionName}}-startTime"> | |||
{{if not (eq .StartTime 0)}} | |||
{{TimeSinceUnix1 .StartTime}} | |||
{{else}} | |||
-- | |||
{{end}} | |||
</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" | |||
id="{{.VersionName}}-duration"> | |||
{{$.duration}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.model_name"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelconvert.modelversion"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
</tbody> | |||
</table> | |||
</div> | |||
<div class="ac-grid-col"> | |||
<table class="ti-form"> | |||
<tbody class="ti-text-form"> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "cloudbrain.mirror"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-mirror"> | |||
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn" style="cursor:pointer" | |||
data-clipboard-text="{{.Image}}" | |||
data-success="{{$.i18n.Tr "repo.copy_link_success"}}" | |||
data-error="{{$.i18n.Tr "repo.copy_link_error"}}" | |||
data-content="{{$.i18n.Tr "repo.copy_link"}}" | |||
data-variation="inverted tiny" | |||
> | |||
<span title="{{.Image}}">{{.Image}}</span> | |||
</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.code_version"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.BranchName}} | |||
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.BootFile}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" title="{{.Parameters}}"> | |||
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}} | |||
</td> | |||
<td class="ti-text-form-content resorce_type"> | |||
<div class="text-span text-span-w"></div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.standard"}} | |||
</td> | |||
<td class="ti-text-form-content spec"> | |||
<div class="text-span text-span-w"></div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.description"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" title="{{.Description}}"> | |||
{{if .Description}}{{.Description}}{{else}}--{{end}} | |||
</div> | |||
</td> | |||
</tr> | |||
</tbody> | |||
</table> | |||
</div> | |||
</div> | |||
<div style="clear:both"> | |||
{{if $.datasetDownload}} | |||
<table style="border:none" class="ui fixed small stackable table"> | |||
<thead> | |||
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th> | |||
</tr></thead> | |||
<tbody> | |||
{{range $m ,$n := $.datasetDownload}} | |||
<tr> | |||
<td style="word-wrap: break-word;word-break: break-all;"> | |||
{{if eq .IsDelete true}} | |||
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}}) | |||
{{else}} | |||
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a> | |||
{{end}} | |||
</td> | |||
</tr> | |||
{{end}} | |||
</tbody> | |||
</table> | |||
{{end}} | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="third{{$k}}"> | |||
<div class="detail-log-content detail-log-content-{{.VersionName}}" | |||
data-multiplenode="" | |||
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log" | |||
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file" | |||
data-workservernumber="{{.WorkServerNumber}}" | |||
data-version="{{.VersionName}}"> | |||
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }} | |||
</div> | |||
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;"> | |||
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div> | |||
<div style="padding:0 50px 10px 30px;height:100%"> | |||
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;" | |||
data-multiplenode="" | |||
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log" | |||
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file" | |||
data-workservernumber="{{.WorkServerNumber}}" | |||
data-version="{{.VersionName}}"> | |||
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }} | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="four{{$k}}"> | |||
<input type="hidden" name="model{{.VersionName}}" value="-1"> | |||
<input type="hidden" name="modelback{{.VersionName}}" value="-1"> | |||
<div style="display: flex;justify-content: space-between;"> | |||
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'> | |||
<div class="active section">result</div> | |||
<div class="divider"> / </div> | |||
</div> | |||
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;" | |||
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info' | |||
href="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}&jobName={{.JobName}}"> | |||
<i class="ri-download-cloud-2-line"></i> | |||
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span> | |||
</a> | |||
</div> | |||
<div id="dir_list{{.VersionName}}"> | |||
</div> | |||
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;"> | |||
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i> | |||
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<!-- {{template "custom/max_log" .}} --> | |||
{{end}} {{template "base/paginate" .}} | |||
</div> | |||
<!-- 确认模态框 --> | |||
<div id="deletemodel"> | |||
<div class="ui basic modal"> | |||
<div class="ui icon header"> | |||
<i class="trash icon"></i> 删除任务 | |||
</div> | |||
<div class="content"> | |||
<p>你确认删除该任务么?此任务一旦删除不可恢复。</p> | |||
</div> | |||
<div class="actions"> | |||
<div class="ui red basic inverted cancel button"> | |||
<i class="remove icon"></i> 取消操作 | |||
</div> | |||
<div class="ui green basic inverted ok button"> | |||
<i class="checkmark icon"></i> 确定操作 | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<!-- | |||
<div id="menuContent" class="menuContent" style="display:none; position: absolute;z-index:9999"> | |||
<ul id="treeDemo" class="ztree" style="margin-top:0; width: 83%; height: 100%;"></ul> | |||
</div> --> | |||
<!-- 创建模型 --> | |||
<div id="newmodel"> | |||
<div class="ui modal second"> | |||
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);"> | |||
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4> | |||
</div> | |||
<div class="content content-padding"> | |||
<form id="formId" method="POST" class="ui form"> | |||
<div class="ui error message"> | |||
</div> | |||
{{$.CsrfTokenHtml}} | |||
<input type="hidden" name="trainTaskCreate" value="true"> | |||
<div class="required inline field"> | |||
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label> | |||
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required> | |||
<input type="hidden" id="versionName" name="versionName" value="V0001"> | |||
<input style="width: 45%;" id="JobName" readonly required> | |||
</div> | |||
<div class="required inline field" id="modelname"> | |||
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label> | |||
<input style="width: 45%;" id="name" name="name" required maxlength="25" | |||
onkeyup="this.value=this.value.replace(/[, ]/g,'')"> | |||
</div> | |||
<div class="required inline field" id="verionname"> | |||
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label> | |||
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255"> | |||
</div> | |||
<div class="unite min_title inline field required"> | |||
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label> | |||
<div class="ui dropdown selection search width70" id="choice_Engine"> | |||
<input type="hidden" id="engine" name="engine" required> | |||
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div> | |||
<i class="dropdown icon"></i> | |||
<div class="menu" id="job-Engine"> | |||
<option class="active item" data-value="0">PyTorch</option> | |||
<option class="item" data-value="1">TensorFlow</option> | |||
<option class="item" data-value="4">PaddlePaddle</option> | |||
<option class="item" data-value="5">OneFlow</option> | |||
<option class="item" data-value="6">MXNet</option> | |||
<option class="item" data-value="3">Other</option> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="unite min_title inline fields required"> | |||
<div class="field required"> | |||
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label> | |||
</div> | |||
<div class="thirteen wide field" style="position:relative"> | |||
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile"> | |||
<div id="menuContent" class="menuContent" style="display:none;"> | |||
<ul id="treeDemo" class="ztree"></ul> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="inline field"> | |||
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label> | |||
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255" | |||
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'> | |||
</div> | |||
{{if eq $.Repository.IsPrivate false}} | |||
<div class="inline fields"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}} </label> | |||
<div class="field"> | |||
<div class="ui radio checkbox"> | |||
<input type="radio" name="isPrivate" checked="checked" value="false"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label> | |||
</div> | |||
</div> | |||
<div class="field"> | |||
<div class="ui radio checkbox"> | |||
<input type="radio" name="isPrivate" value="true"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label> | |||
</div> | |||
</div> | |||
</div> | |||
{{end}} | |||
<div class="inline field"> | |||
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label> | |||
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3" | |||
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}' | |||
onchange="this.value=this.value.substring(0, 255)" | |||
onkeydown="this.value=this.value.substring(0, 255)" | |||
onkeyup="this.value=this.value.substring(0, 256)"></textarea> | |||
</div> | |||
<div class="inline field" style="margin-left: 75px;"> | |||
<button onclick="createModel()" type="button" class="ui create_train_job green button" | |||
style="position: absolute;"> | |||
{{.i18n.Tr "repo.model.manage.sava_model"}} | |||
</button> | |||
</div> | |||
</form> | |||
<div class="actions" style="display: inline-block;margin-left: 180px;"> | |||
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "custom/export_dataset" .}} | |||
</div> | |||
{{template "base/footer" .}} | |||
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script> | |||
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script> | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
var userName; | |||
var repoPath; | |||
$(document).ready(function(){ | |||
var url = window.location.href; | |||
var urlArr = url.split('/') | |||
userName = urlArr.slice(-5)[0] | |||
repoPath = urlArr.slice(-4)[0] | |||
}); | |||
var setting = { | |||
check: { | |||
enable: true, | |||
chkboxType: {"Y":"ps", "N":"ps"} | |||
}, | |||
view: { | |||
dblClickExpand: false | |||
}, | |||
callback: { | |||
beforeClick: beforeClick, | |||
onCheck: onCheck | |||
} | |||
}; | |||
function beforeClick(treeId, treeNode) { | |||
var zTree = $.fn.zTree.getZTreeObj("treeDemo"); | |||
zTree.checkNode(treeNode, !treeNode.checked, null, true); | |||
return false; | |||
} | |||
function onCheck(e, treeId, treeNode) { | |||
var zTree = $.fn.zTree.getZTreeObj("treeDemo"), | |||
nodes = zTree.getCheckedNodes(true), | |||
v = ""; | |||
for (var i=0, l=nodes.length; i<l; i++) { | |||
if(nodes[i].isParent){ | |||
continue; | |||
} | |||
var pathNodes = nodes[i].getPath(); | |||
var path =""; | |||
for(var j=0;j<pathNodes.length;j++){ | |||
if(j ==0){ | |||
path += pathNodes[j].name; | |||
}else{ | |||
path += "/" + pathNodes[j].name; | |||
} | |||
} | |||
v += path + ";"; | |||
} | |||
if (v.length > 0 ) v = v.substring(0, v.length-1); | |||
var cityObj = $("#modelSelectedFile"); | |||
cityObj.attr("value", v); | |||
} | |||
function showMenu() { | |||
var cityObj = $("#modelSelectedFile"); | |||
var cityOffset = $("#modelSelectedFile").offset(); | |||
//$("#menuContent").css({left:cityOffset.left + "px", top:cityOffset.top + cityObj.outerHeight() + "px"}).slideDown("fast"); | |||
$("#menuContent").slideDown("fast"); | |||
$("body").bind("mousedown", onBodyDown); | |||
} | |||
function hideMenu() { | |||
$("#menuContent").fadeOut("fast"); | |||
$("body").unbind("mousedown", onBodyDown); | |||
} | |||
function onBodyDown(event) { | |||
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) { | |||
hideMenu(); | |||
} | |||
} | |||
let dirKey="isOnlyDir--:&"; | |||
function loadSelectedModelFile(trainJob){ | |||
console.log("trainJob=" + trainJob); | |||
$('#choice_file').dropdown('clear') | |||
$("#model-file").empty() | |||
if(trainJob ==null || trainJob ==""){ | |||
console.log("trainJob is null"); | |||
}else{ | |||
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=0&VersionName=${trainJob.VersionName}`, (data) => { | |||
const n_length = data.length | |||
let file_html='' | |||
let firstFileName ='' | |||
var zNodes=[]; | |||
var nodesMap={}; | |||
for (let i=0;i<n_length;i++){ | |||
var parentNodeMap = nodesMap; | |||
var fileSplits = data[i].FileName.split("/"); | |||
for(let j=0;j < fileSplits.length;j++){ | |||
if(fileSplits[j] == ""){ | |||
break; | |||
} | |||
if(parentNodeMap[fileSplits[j]] == null){ | |||
parentNodeMap[fileSplits[j]] = {}; | |||
} | |||
parentNodeMap = parentNodeMap[fileSplits[j]]; | |||
} | |||
} | |||
for (let i=0;i<n_length;i++){ | |||
var parentNodeMap = nodesMap; | |||
var fileSplits = data[i].FileName.split("/"); | |||
for(let j=0;j < fileSplits.length;j++){ | |||
if(fileSplits[j] == ""){ | |||
if(data[i].FileName[data[i].FileName.length -1] =="/"){ | |||
if(Object.keys(parentNodeMap).length ==0){ | |||
parentNodeMap[dirKey]="true"; | |||
} | |||
} | |||
break; | |||
} | |||
parentNodeMap = parentNodeMap[fileSplits[j]]; | |||
} | |||
} | |||
convertToNode(zNodes,nodesMap); | |||
$.fn.zTree.init($("#treeDemo"), setting, zNodes); | |||
}) | |||
} | |||
} | |||
function convertToNode(nodeList,nodesMap){ | |||
var keyList = Object.keys(nodesMap); | |||
keyList.sort(function(a,b){ | |||
return a-b; | |||
}); | |||
var isFirst = true; | |||
for(var i=0; i<keyList.length;i++){ | |||
var node = {}; | |||
node["name"] = keyList[i]; | |||
nodeList.push(node); | |||
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){ | |||
if(nodesMap[keyList[i]][dirKey] != null){ | |||
node["open"] = false; | |||
node["isParent"] = true; | |||
}else{ | |||
node["children"]=[]; | |||
if(isFirst){ | |||
node["open"] = true; | |||
isFirst= false; | |||
} | |||
convertToNode(node["children"],nodesMap[keyList[i]]); | |||
} | |||
} | |||
} | |||
} | |||
function showcreate(obj) { | |||
$('.ui.modal.second') | |||
.modal({ | |||
centered: false, | |||
onShow: function () { | |||
$('input[name="version"]').addClass('model_disabled') | |||
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled') | |||
$('input[name="jobId"]').val(obj.JobID) | |||
$('input[name="versionName"]').val("V0001") | |||
$('#choice_Engine .default.text').text("PyTorch"); | |||
$('#choice_Engine input[name="engine"]').val(0) | |||
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" }) | |||
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" }) | |||
createModelName(); | |||
loadSelectedModelFile(obj); | |||
}, | |||
onHide: function () { | |||
$('.ui.dimmer').css({ "background-color": "" }) | |||
$('.ui.error.message').text() | |||
$('.ui.error.message').css('display', 'none') | |||
} | |||
}) | |||
.modal('show') | |||
} | |||
function createModel() { | |||
if(!$('input#modelSelectedFile').val()){ | |||
$('input#modelSelectedFile').parent().addClass('error') | |||
return | |||
} | |||
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model` | |||
let data = $("#formId").serialize() | |||
var radio = document.getElementsByName("isPrivate"); | |||
if(radio == null || radio.length == 0){ | |||
data +="&isPrivate=true"; | |||
} | |||
$("#mask").css({ "display": "block", "z-index": "9999" }) | |||
$.ajax({ | |||
url: url_href, | |||
type: 'POST', | |||
data: data, | |||
success: function (res) { | |||
const modelName = $('#formId #name').val(); | |||
$('input[name="engine_name"]').val(""); | |||
$('input[name="engine"]').val(""); | |||
$('input[name="jobId"]').val(""); | |||
$('input[name="label"]').val(""); | |||
$('input[name="description"]').val(""); | |||
var cityObj = $("#modelSelectedFile"); | |||
cityObj.attr("value", ""); | |||
document.getElementById("formId").reset(); | |||
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}` | |||
$('.ui.modal.second').modal('hide') | |||
}, | |||
error: function (xhr) { | |||
// 隐藏 loading | |||
// 只有请求不正常(状态码不为200)才会执行 | |||
$('.ui.error.message').text(xhr.responseText) | |||
$('.ui.error.message').css('display', 'block') | |||
}, | |||
complete: function (xhr) { | |||
$("#mask").css({ "display": "none", "z-index": "1" }) | |||
} | |||
}) | |||
} | |||
function createModelName() { | |||
let repoName = location.pathname.split('/')[2] | |||
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4) | |||
$('#name').val(modelName) | |||
$('#version').val("0.0.1") | |||
} | |||
$('.menu .item').tab() | |||
$(document).ready(function () { | |||
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } }); | |||
}); | |||
$(document).ready(function () { | |||
$('.secondary.menu .item').tab(); | |||
}); | |||
;(function() { | |||
var SPEC = {{ .Spec }}; | |||
var showPoint = false; | |||
var specStr = window.renderSpecStr(SPEC, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
$('td.ti-text-form-content.spec div').text(specStr); | |||
SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType)); | |||
})(); | |||
</script> | |||
{{ template "repo/cloudbrain/cloudbraindetail" .}} |
@@ -0,0 +1,746 @@ | |||
{{template "base/head" .}} | |||
<link rel="stylesheet" href="/self/ztree/css/zTreeStyle/zTreeStyle.css?v={{MD5 AppVer}}" type="text/css"> | |||
<style> | |||
.model_file_bread { | |||
margin-bottom: -0.5rem !important; | |||
padding-left: 1rem; | |||
padding-top: 0.5rem; | |||
} | |||
.menuContent{ | |||
position: absolute; | |||
background: #ffffff; | |||
left: 0; | |||
right: 26px; | |||
top: 36px; | |||
z-index:999; | |||
border: 1px solid #96c8da; | |||
border-top: 0; | |||
border-bottom-right-radius: 4px; | |||
border-bottom-left-radius: 4px; | |||
box-shadow: 0 2px 3px 0 rgb(34 36 38 / 15%); | |||
} | |||
</style> | |||
<div id="mask"> | |||
<div id="loadingPage"> | |||
<div class="rect1"></div> | |||
<div class="rect2"></div> | |||
<div class="rect3"></div> | |||
<div class="rect4"></div> | |||
<div class="rect5"></div> | |||
</div> | |||
</div> | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<h4 class="ui header" id="vertical-segment"> | |||
<div class="ui breadcrumb"> | |||
<a class="section" href="{{.RepoLink}}/debugjob?debugListType=all"> | |||
{{.i18n.Tr "repo.cloudbrain"}} | |||
</a> | |||
<div class="divider"> / </div> | |||
<a class="section" href="{{$.RepoLink}}/modelarts/train-job"> | |||
{{$.i18n.Tr "repo.modelarts.train_job"}} | |||
</a> | |||
<div class="divider"> / </div> | |||
<div class="active section">{{.displayJobName}}</div> | |||
</div> | |||
</h4> | |||
{{range $k ,$v := .version_list_task}} | |||
<div class="ui accordion border-according" id="accordion{{.VersionName}}" | |||
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}"> | |||
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}"> | |||
<div class="{{if eq $k 0}}active{{end}} title padding0"> | |||
<div class="according-panel-heading"> | |||
<div class="accordion-panel-title"> | |||
<i class="dropdown icon"></i> | |||
<span class="accordion-panel-title-content"> | |||
<span> | |||
<div style="float: right;"> | |||
{{if and ($.canDownload) (ne .Status "WAITING") }} | |||
<a class="ti-action-menu-item" id="{{.VersionName}}-create-model" | |||
onclick="showcreate({DisplayJobName:{{.DisplayJobName}},JobName:{{.JobName}},JobID:{{.JobID}},VersionName:{{.VersionName}}})">{{$.i18n.Tr "repo.modelarts.create_model"}}</a> | |||
{{else}} | |||
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-create-model">{{$.i18n.Tr "repo.modelarts.create_model"}}</a> | |||
{{end}} | |||
{{if and ($.canDownload) (ne .Status "WAITING") }} | |||
<a class="ti-action-menu-item export-dataset" style="position:relative" id="{{.VersionName}}-export-dataset" data-version="{{.VersionName}}" data-jobid="{{.JobID}}" data-repopath="/{{$.RepoRelPath}}/datasets/model"> | |||
{{$.i18n.Tr "repo.export_result_to_dataset"}} | |||
<div class="export-popup" id="{{.VersionName}}-popup"> | |||
<div class="ui active centered inline loader" style="width: 100%;display: flex;align-items: center;">{{$.i18n.Tr "repo.loader_result_file"}}</div> | |||
</div> | |||
</a> | |||
{{else}} | |||
<a class="ti-action-menu-item disabled" id="{{.VersionName}}-export-dataset">{{$.i18n.Tr "repo.export_result_to_dataset"}}</a> | |||
{{end}} | |||
</div> | |||
<div class="ac-display-inblock title_text acc-margin-bottom"> | |||
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}: | |||
<span id="{{.VersionName}}-status-span"><i id="icon" | |||
style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" | |||
style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||
</span> | |||
<span | |||
class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}}:</span> | |||
<span class="cti-mgRight-sm uc-accordionTitle-black" | |||
id="{{.VersionName}}-duration-span">{{$.duration}}</span> | |||
<span class="refresh-status" data-tooltip="刷新" style="cursor: pointer;" data-inverted="" data-version="{{.VersionName}}"><i | |||
class="redo icon redo-color"></i></span> | |||
</div> | |||
</span> | |||
</span> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="{{if eq $k 0}}active{{end}} content"> | |||
<div class="content-pad"> | |||
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);"> | |||
<a class="active item" data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a> | |||
<a class="item detail-log-tab" data-tab="third{{$k}}" data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | |||
<a class="item load-model-file" data-tab="four{{$k}}" data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||
</div> | |||
<div class="ui tab active" data-tab="first{{$k}}"> | |||
<div style="padding-top: 10px;"> | |||
<div class="tab_2_content"> | |||
<div class="ac-grid ac-grid-col2"> | |||
<div class="ac-grid-col"> | |||
<table class="ti-form"> | |||
<tbody class="ti-text-form"> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.cloudbrain_task"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.DisplayJobName}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.status"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-status"> | |||
{{.Status}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.cloudbrain_creator"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-mirror"> | |||
{{.User.Name}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.start_time"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
<span style="font-size: 12px;" id="{{.VersionName}}-startTime"> | |||
{{if not (eq .StartTime 0)}} | |||
{{TimeSinceUnix1 .StartTime}} | |||
{{else}} | |||
-- | |||
{{end}} | |||
</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" | |||
id="{{.VersionName}}-duration"> | |||
{{$.duration}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.model_name"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w">{{if .ModelName}}{{.ModelName}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelconvert.modelversion"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w">{{if .ModelVersion}}{{.ModelVersion}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.infer_job_model_file"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" {{if .CkptName}}title="{{.CkptName}}"{{end}}>{{if .CkptName}}{{.CkptName}}{{else}}--{{end}}</div> | |||
</td> | |||
</tr> | |||
</tbody> | |||
</table> | |||
</div> | |||
<div class="ac-grid-col"> | |||
<table class="ti-form"> | |||
<tbody class="ti-text-form"> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "cloudbrain.mirror"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" id="{{.VersionName}}-mirror"> | |||
<span class="ui poping up clipboard" data-position="top center" id="clipboard-btn" style="cursor:pointer" | |||
data-clipboard-text="{{.Image}}" | |||
data-success="{{$.i18n.Tr "repo.copy_link_success"}}" | |||
data-error="{{$.i18n.Tr "repo.copy_link_error"}}" | |||
data-content="{{$.i18n.Tr "repo.copy_link"}}" | |||
data-variation="inverted tiny" | |||
> | |||
<span title="{{.Image}}">{{.Image}}</span> | |||
</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.code_version"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.BranchName}} | |||
<span style="margin-left:1rem" class="ui label">{{SubStr .CommitID 0 10}}</span> | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.start_file"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w"> | |||
{{.BootFile}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" title="{{.Parameters}}"> | |||
{{if .Parameters}}{{.Parameters}}{{else}}--{{end}} | |||
</div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.resource_type"}} | |||
</td> | |||
<td class="ti-text-form-content resorce_type"> | |||
<div class="text-span text-span-w"></div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.standard"}} | |||
</td> | |||
<td class="ti-text-form-content spec"> | |||
<div class="text-span text-span-w"></div> | |||
</td> | |||
</tr> | |||
<tr class="ti-no-ng-animate"> | |||
<td class="ti-no-ng-animate ti-text-form-label text-width80"> | |||
{{$.i18n.Tr "repo.modelarts.train_job.description"}} | |||
</td> | |||
<td class="ti-text-form-content"> | |||
<div class="text-span text-span-w" title="{{.Description}}"> | |||
{{if .Description}}{{.Description}}{{else}}--{{end}} | |||
</div> | |||
</td> | |||
</tr> | |||
</tbody> | |||
</table> | |||
</div> | |||
</div> | |||
<div style="clear:both"> | |||
{{if $.datasetDownload}} | |||
<table style="border:none" class="ui fixed small stackable table"> | |||
<thead> | |||
<tr><th style="color: #8a8e99;font-size:12px" class="three wide left aligned">{{$.i18n.Tr "dataset.file"}}</th> | |||
</tr></thead> | |||
<tbody> | |||
{{range $m ,$n := $.datasetDownload}} | |||
<tr> | |||
<td style="word-wrap: break-word;word-break: break-all;"> | |||
{{if eq .IsDelete true}} | |||
{{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}}) | |||
{{else}} | |||
<a href="{{.RepositoryLink}}" target="_blank">{{.DatasetName}}</a> | |||
{{end}} | |||
</td> | |||
</tr> | |||
{{end}} | |||
</tbody> | |||
</table> | |||
{{end}} | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="third{{$k}}"> | |||
<div class="detail-log-content detail-log-content-{{.VersionName}}" | |||
data-multiplenode="" | |||
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log" | |||
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file" | |||
data-workservernumber="{{.WorkServerNumber}}" | |||
data-version="{{.VersionName}}"> | |||
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n }} | |||
</div> | |||
<div class="ui modal full-log-dlg-{{.VersionName}} container" style="height:90%;margin:3rem auto;"> | |||
<div id="log-file-title" style="font-size: 16px;font-weight:600;padding:20px 30px 16px;">{{$.i18n.Tr "repo.modelarts.log_file"}}</div> | |||
<div style="padding:0 50px 10px 30px;height:100%"> | |||
<div class="detail-log-fullscreen-content detail-log-fullscreen-content-{{.VersionName}}" style="height:100%;" | |||
data-multiplenode="" | |||
data-logapiurl="{{$.RepoLink}}/cloudbrain/{{.ID}}/log" | |||
data-logdownloadurl="/api/v1/repos{{$.RepoLink}}/cloudbrain/{{.ID}}/download_log_file" | |||
data-workservernumber="{{.WorkServerNumber}}" | |||
data-version="{{.VersionName}}"> | |||
{{template "custom/task_log" Dict "$" $ "i18n" $.i18n "Full" true }} | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="ui tab" data-tab="four{{$k}}"> | |||
<input type="hidden" name="model{{.VersionName}}" value="-1"> | |||
<input type="hidden" name="modelback{{.VersionName}}" value="-1"> | |||
<div style="display: flex;justify-content: space-between;"> | |||
<div class='ui breadcrumb model_file_bread' id='file_breadcrumb{{.VersionName}}'> | |||
<div class="active section">result</div> | |||
<div class="divider"> / </div> | |||
</div> | |||
<a id="{{.VersionName}}-result-down" style="padding-right: 1%;display: none;" | |||
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}} file-info' | |||
href="{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}/download_multi_model?version_name={{.VersionName}}&jobName={{.JobName}}"> | |||
<i class="ri-download-cloud-2-line"></i> | |||
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.all_result_download"}}</span> | |||
</a> | |||
</div> | |||
<div id="dir_list{{.VersionName}}"> | |||
</div> | |||
<div style="display:flex;align-items: center;justify-content: end;color: #f2711c;"> | |||
<i class="ri-error-warning-line" style="margin-right:0.5rem;"></i> | |||
<span>{{$.i18n.Tr "repo.file_limit_100"}}</span> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<!-- {{template "custom/max_log" .}} --> | |||
{{end}} {{template "base/paginate" .}} | |||
</div> | |||
<!-- 确认模态框 --> | |||
<div id="deletemodel"> | |||
<div class="ui basic modal"> | |||
<div class="ui icon header"> | |||
<i class="trash icon"></i> 删除任务 | |||
</div> | |||
<div class="content"> | |||
<p>你确认删除该任务么?此任务一旦删除不可恢复。</p> | |||
</div> | |||
<div class="actions"> | |||
<div class="ui red basic inverted cancel button"> | |||
<i class="remove icon"></i> 取消操作 | |||
</div> | |||
<div class="ui green basic inverted ok button"> | |||
<i class="checkmark icon"></i> 确定操作 | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
<!-- | |||
<div id="menuContent" class="menuContent" style="display:none; position: absolute;z-index:9999"> | |||
<ul id="treeDemo" class="ztree" style="margin-top:0; width: 83%; height: 100%;"></ul> | |||
</div> --> | |||
<!-- 创建模型 --> | |||
<div id="newmodel"> | |||
<div class="ui modal second"> | |||
<div class="header" style="padding: 1rem;background-color: rgba(240, 240, 240, 100);"> | |||
<h4 id="model_header">{{.i18n.Tr "repo.model.manage.import_new_model"}}</h4> | |||
</div> | |||
<div class="content content-padding"> | |||
<form id="formId" method="POST" class="ui form"> | |||
<div class="ui error message"> | |||
</div> | |||
{{$.CsrfTokenHtml}} | |||
<input type="hidden" name="trainTaskCreate" value="true"> | |||
<div class="required inline field"> | |||
<label>{{.i18n.Tr "repo.modelarts.train_job"}}</label> | |||
<input type="hidden" class="width83" id="jobId" name="jobId" readonly required> | |||
<input type="hidden" id="versionName" name="versionName" value="V0001"> | |||
<input style="width: 45%;" id="JobName" readonly required> | |||
</div> | |||
<div class="required inline field" id="modelname"> | |||
<label>{{.i18n.Tr "repo.model.manage.model_name"}}</label> | |||
<input style="width: 45%;" id="name" name="name" required maxlength="25" | |||
onkeyup="this.value=this.value.replace(/[, ]/g,'')"> | |||
</div> | |||
<div class="required inline field" id="verionname"> | |||
<label>{{.i18n.Tr "repo.modelconvert.modelversion"}}</label> | |||
<input style="width: 45%;" id="version" name="version" value="" readonly required maxlength="255"> | |||
</div> | |||
<div class="unite min_title inline field required"> | |||
<label>{{.i18n.Tr "repo.model.manage.engine"}}</label> | |||
<div class="ui dropdown selection search width70" id="choice_Engine"> | |||
<input type="hidden" id="engine" name="engine" required> | |||
<div class="default text">{{.i18n.Tr "repo.model.manage.select.engine"}}</div> | |||
<i class="dropdown icon"></i> | |||
<div class="menu" id="job-Engine"> | |||
<option class="active item" data-value="0">PyTorch</option> | |||
<option class="item" data-value="1">TensorFlow</option> | |||
<option class="item" data-value="4">PaddlePaddle</option> | |||
<option class="item" data-value="5">OneFlow</option> | |||
<option class="item" data-value="6">MXNet</option> | |||
<option class="item" data-value="3">Other</option> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="unite min_title inline fields required"> | |||
<div class="field required"> | |||
<label for="modelSelectedFile">{{.i18n.Tr "repo.model.manage.modelfile"}}</label> | |||
</div> | |||
<div class="thirteen wide field" style="position:relative"> | |||
<input id="modelSelectedFile" type="text" readonly required onclick="showMenu();" name="modelSelectedFile"> | |||
<div id="menuContent" class="menuContent" style="display:none;"> | |||
<ul id="treeDemo" class="ztree"></ul> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="inline field"> | |||
<label>{{.i18n.Tr "repo.model.manage.modellabel"}}</label> | |||
<input style="width: 83%;margin-left: 7px;" id="label" name="label" maxlength="255" | |||
placeholder='{{.i18n.Tr "repo.modelarts.train_job.label_place"}}'> | |||
</div> | |||
{{if eq $.Repository.IsPrivate false}} | |||
<div class="inline fields"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess"}} </label> | |||
<div class="field"> | |||
<div class="ui radio checkbox"> | |||
<input type="radio" name="isPrivate" checked="checked" value="false"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess.public"}}</label> | |||
</div> | |||
</div> | |||
<div class="field"> | |||
<div class="ui radio checkbox"> | |||
<input type="radio" name="isPrivate" value="true"> | |||
<label>{{.i18n.Tr "repo.model.manage.modelaccess.private"}}</label> | |||
</div> | |||
</div> | |||
</div> | |||
{{end}} | |||
<div class="inline field"> | |||
<label for="description">{{.i18n.Tr "repo.model.manage.modeldesc"}}</label> | |||
<textarea style="width: 83%;margin-left: 7px;" id="description" name="description" rows="3" | |||
maxlength="255" placeholder='{{.i18n.Tr "repo.modelarts.train_job.new_place"}}' | |||
onchange="this.value=this.value.substring(0, 255)" | |||
onkeydown="this.value=this.value.substring(0, 255)" | |||
onkeyup="this.value=this.value.substring(0, 256)"></textarea> | |||
</div> | |||
<div class="inline field" style="margin-left: 75px;"> | |||
<button onclick="createModel()" type="button" class="ui create_train_job green button" | |||
style="position: absolute;"> | |||
{{.i18n.Tr "repo.model.manage.sava_model"}} | |||
</button> | |||
</div> | |||
</form> | |||
<div class="actions" style="display: inline-block;margin-left: 180px;"> | |||
<button class="ui button cancel">{{.i18n.Tr "repo.cloudbrain.cancel"}}</button> | |||
</div> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "custom/export_dataset" .}} | |||
</div> | |||
{{template "base/footer" .}} | |||
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.core.js?v={{MD5 AppVer}}"></script> | |||
<script type="text/javascript" src="/self/ztree/js/jquery.ztree.excheck.js?v={{MD5 AppVer}}"></script> | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
var userName; | |||
var repoPath; | |||
$(document).ready(function(){ | |||
var url = window.location.href; | |||
var urlArr = url.split('/') | |||
userName = urlArr.slice(-5)[0] | |||
repoPath = urlArr.slice(-4)[0] | |||
}); | |||
var setting = { | |||
check: { | |||
enable: true, | |||
chkboxType: {"Y":"ps", "N":"ps"} | |||
}, | |||
view: { | |||
dblClickExpand: false | |||
}, | |||
callback: { | |||
beforeClick: beforeClick, | |||
onCheck: onCheck | |||
} | |||
}; | |||
function beforeClick(treeId, treeNode) { | |||
var zTree = $.fn.zTree.getZTreeObj("treeDemo"); | |||
zTree.checkNode(treeNode, !treeNode.checked, null, true); | |||
return false; | |||
} | |||
function onCheck(e, treeId, treeNode) { | |||
var zTree = $.fn.zTree.getZTreeObj("treeDemo"), | |||
nodes = zTree.getCheckedNodes(true), | |||
v = ""; | |||
for (var i=0, l=nodes.length; i<l; i++) { | |||
if(nodes[i].isParent){ | |||
continue; | |||
} | |||
var pathNodes = nodes[i].getPath(); | |||
var path =""; | |||
for(var j=0;j<pathNodes.length;j++){ | |||
if(j ==0){ | |||
path += pathNodes[j].name; | |||
}else{ | |||
path += "/" + pathNodes[j].name; | |||
} | |||
} | |||
v += path + ";"; | |||
} | |||
if (v.length > 0 ) v = v.substring(0, v.length-1); | |||
var cityObj = $("#modelSelectedFile"); | |||
cityObj.attr("value", v); | |||
} | |||
function showMenu() { | |||
var cityObj = $("#modelSelectedFile"); | |||
var cityOffset = $("#modelSelectedFile").offset(); | |||
//$("#menuContent").css({left:cityOffset.left + "px", top:cityOffset.top + cityObj.outerHeight() + "px"}).slideDown("fast"); | |||
$("#menuContent").slideDown("fast"); | |||
$("body").bind("mousedown", onBodyDown); | |||
} | |||
function hideMenu() { | |||
$("#menuContent").fadeOut("fast"); | |||
$("body").unbind("mousedown", onBodyDown); | |||
} | |||
function onBodyDown(event) { | |||
if (!(event.target.id == "menuBtn" || event.target.id == "modelSelectedFile" || event.target.id == "menuContent" || $(event.target).parents("#menuContent").length>0)) { | |||
hideMenu(); | |||
} | |||
} | |||
let dirKey="isOnlyDir--:&"; | |||
function loadSelectedModelFile(trainJob){ | |||
console.log("trainJob=" + trainJob); | |||
$('#choice_file').dropdown('clear') | |||
$("#model-file").empty() | |||
if(trainJob ==null || trainJob ==""){ | |||
console.log("trainJob is null"); | |||
}else{ | |||
$.get(`/${userName}/${repoPath}/modelmanage/query_train_model?jobName=${trainJob.JobName}&type=0&VersionName=${trainJob.VersionName}`, (data) => { | |||
const n_length = data.length | |||
let file_html='' | |||
let firstFileName ='' | |||
var zNodes=[]; | |||
var nodesMap={}; | |||
for (let i=0;i<n_length;i++){ | |||
var parentNodeMap = nodesMap; | |||
var fileSplits = data[i].FileName.split("/"); | |||
for(let j=0;j < fileSplits.length;j++){ | |||
if(fileSplits[j] == ""){ | |||
break; | |||
} | |||
if(parentNodeMap[fileSplits[j]] == null){ | |||
parentNodeMap[fileSplits[j]] = {}; | |||
} | |||
parentNodeMap = parentNodeMap[fileSplits[j]]; | |||
} | |||
} | |||
for (let i=0;i<n_length;i++){ | |||
var parentNodeMap = nodesMap; | |||
var fileSplits = data[i].FileName.split("/"); | |||
for(let j=0;j < fileSplits.length;j++){ | |||
if(fileSplits[j] == ""){ | |||
if(data[i].FileName[data[i].FileName.length -1] =="/"){ | |||
if(Object.keys(parentNodeMap).length ==0){ | |||
parentNodeMap[dirKey]="true"; | |||
} | |||
} | |||
break; | |||
} | |||
parentNodeMap = parentNodeMap[fileSplits[j]]; | |||
} | |||
} | |||
convertToNode(zNodes,nodesMap); | |||
$.fn.zTree.init($("#treeDemo"), setting, zNodes); | |||
}) | |||
} | |||
} | |||
function convertToNode(nodeList,nodesMap){ | |||
var keyList = Object.keys(nodesMap); | |||
keyList.sort(function(a,b){ | |||
return a-b; | |||
}); | |||
var isFirst = true; | |||
for(var i=0; i<keyList.length;i++){ | |||
var node = {}; | |||
node["name"] = keyList[i]; | |||
nodeList.push(node); | |||
if(nodesMap[keyList[i]] != null && Object.keys(nodesMap[keyList[i]]).length >0){ | |||
if(nodesMap[keyList[i]][dirKey] != null){ | |||
node["open"] = false; | |||
node["isParent"] = true; | |||
}else{ | |||
node["children"]=[]; | |||
if(isFirst){ | |||
node["open"] = true; | |||
isFirst= false; | |||
} | |||
convertToNode(node["children"],nodesMap[keyList[i]]); | |||
} | |||
} | |||
} | |||
} | |||
function showcreate(obj) { | |||
$('.ui.modal.second') | |||
.modal({ | |||
centered: false, | |||
onShow: function () { | |||
$('input[name="version"]').addClass('model_disabled') | |||
$('#JobName').val(obj.DisplayJobName).addClass('model_disabled') | |||
$('input[name="jobId"]').val(obj.JobID) | |||
$('input[name="versionName"]').val("V0001") | |||
$('#choice_Engine .default.text').text("PyTorch"); | |||
$('#choice_Engine input[name="engine"]').val(0) | |||
$('#choice_Engine .default.text').css({ "color": "rgb(0, 0, 0,0.87)" }) | |||
$('.ui.dimmer').css({ "background-color": "rgb(136, 136, 136,0.7)" }) | |||
createModelName(); | |||
loadSelectedModelFile(obj); | |||
}, | |||
onHide: function () { | |||
$('.ui.dimmer').css({ "background-color": "" }) | |||
$('.ui.error.message').text() | |||
$('.ui.error.message').css('display', 'none') | |||
} | |||
}) | |||
.modal('show') | |||
} | |||
function createModel() { | |||
if(!$('input#modelSelectedFile').val()){ | |||
$('input#modelSelectedFile').parent().addClass('error') | |||
return | |||
} | |||
let url_href = `/${userName}/${repoPath}/modelmanage/create_new_model` | |||
let data = $("#formId").serialize() | |||
var radio = document.getElementsByName("isPrivate"); | |||
if(radio == null || radio.length == 0){ | |||
data +="&isPrivate=true"; | |||
} | |||
$("#mask").css({ "display": "block", "z-index": "9999" }) | |||
$.ajax({ | |||
url: url_href, | |||
type: 'POST', | |||
data: data, | |||
success: function (res) { | |||
const modelName = $('#formId #name').val(); | |||
$('input[name="engine_name"]').val(""); | |||
$('input[name="engine"]').val(""); | |||
$('input[name="jobId"]').val(""); | |||
$('input[name="label"]').val(""); | |||
$('input[name="description"]').val(""); | |||
var cityObj = $("#modelSelectedFile"); | |||
cityObj.attr("value", ""); | |||
document.getElementById("formId").reset(); | |||
location.href = `/${userName}/${repoPath}/modelmanage/model_readme_tmpl?name=${encodeURIComponent(modelName)}` | |||
$('.ui.modal.second').modal('hide') | |||
}, | |||
error: function (xhr) { | |||
// 隐藏 loading | |||
// 只有请求不正常(状态码不为200)才会执行 | |||
$('.ui.error.message').text(xhr.responseText) | |||
$('.ui.error.message').css('display', 'block') | |||
}, | |||
complete: function (xhr) { | |||
$("#mask").css({ "display": "none", "z-index": "1" }) | |||
} | |||
}) | |||
} | |||
function createModelName() { | |||
let repoName = location.pathname.split('/')[2] | |||
let modelName = repoName + '_model_' + Math.random().toString(36).substr(2, 4) | |||
$('#name').val(modelName) | |||
$('#version').val("0.0.1") | |||
} | |||
$('.menu .item').tab() | |||
$(document).ready(function () { | |||
$('.ui.accordion').accordion({ selector: { trigger: '.icon' } }); | |||
}); | |||
$(document).ready(function () { | |||
$('.secondary.menu .item').tab(); | |||
}); | |||
;(function() { | |||
var SPEC = {{ .Spec }}; | |||
var showPoint = false; | |||
var specStr = window.renderSpecStr(SPEC, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
$('td.ti-text-form-content.spec div').text(specStr); | |||
SPEC && $('td.ti-text-form-content.resorce_type div').text(getListValueWithKey(ACC_CARD_TYPE, SPEC.AccCardType)); | |||
})(); | |||
</script> |
@@ -1,257 +1 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width{ | |||
width:100% !important; | |||
} | |||
.width80{ | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width85{ | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81{ | |||
width: 81% !important; | |||
} | |||
.width48 { | |||
width: 48.5% !important; | |||
} | |||
.add{font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min{ | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="image" value=""> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
Ascend NPU</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gcu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
GCU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required unite min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!-- {{template "custom/select_model" .}} --> | |||
<div class="required min_title inline field" id="engine_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label> | |||
<select class="ui dropdown cloudbrain_image width81" id="trainjob_images" name="image_id"> | |||
{{if .image_id}} | |||
{{range .images}} | |||
{{if eq $.image_id .ID}} | |||
<option value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
{{range .images}} | |||
{{if ne $.image_id .ID}} | |||
<option value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
{{range .images}} | |||
<option name="image_id" value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<div class="inline min_title field required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" > | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" > | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GCU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" id="flavor_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label> | |||
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select> | |||
<span><i class="question circle icon link"></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link" data-position="right center" data-variation="mini"></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
<div class="inline required min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label> | |||
<div class="ui labeled input" style="width: 5%;"> | |||
<input style="border-radius: 0;text-align: center;"type="hidden" name="work_server_number" id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" readonly> | |||
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select"> | |||
<select class="ui dropdown width" style='width: 100%;' name="work_server_id"> | |||
<option name="server_id" value="1">1</option> | |||
</select> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .Specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> | |||
{{ template "repo/cloudbrain/cloudbraincreate" .}} |
@@ -0,0 +1,257 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width{ | |||
width:100% !important; | |||
} | |||
.width80{ | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width85{ | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81{ | |||
width: 81% !important; | |||
} | |||
.width48 { | |||
width: 48.5% !important; | |||
} | |||
.add{font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min{ | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="image" value=""> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
Ascend NPU</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gcu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
GCU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required unite min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!-- {{template "custom/select_model" .}} --> | |||
<div class="required min_title inline field" id="engine_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label> | |||
<select class="ui dropdown cloudbrain_image width81" id="trainjob_images" name="image_id"> | |||
{{if .image_id}} | |||
{{range .images}} | |||
{{if eq $.image_id .ID}} | |||
<option value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
{{range .images}} | |||
{{if ne $.image_id .ID}} | |||
<option value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
{{range .images}} | |||
<option name="image_id" value="{{.ID}}">{{.Name}}</option> | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<div class="inline min_title field required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" > | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" > | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GCU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" id="flavor_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label> | |||
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select> | |||
<span><i class="question circle icon link"></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link" data-position="right center" data-variation="mini"></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
<div class="inline required min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.amount_of_compute_node"}}</label> | |||
<div class="ui labeled input" style="width: 5%;"> | |||
<input style="border-radius: 0;text-align: center;"type="hidden" name="work_server_number" id="trainjob_work_server_num" tabindex="3" autofocus required maxlength="255" value="1" readonly> | |||
<div class="field" id="trainjob_work_server_num_select" name="work_server_number_select"> | |||
<select class="ui dropdown width" style='width: 100%;' name="work_server_id"> | |||
<option name="server_id" value="1">1</option> | |||
</select> | |||
</div> | |||
</div> | |||
</div> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .Specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> |
@@ -1,261 +1 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width{ | |||
width:100% !important; | |||
} | |||
.width80{ | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width806{ | |||
width: 80.6% !important; | |||
margin-left: -2px; | |||
} | |||
.width85{ | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81{ | |||
width: 81% !important; | |||
} | |||
.width48{ | |||
width: 48.5% !important; | |||
} | |||
.add{font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min{ | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.label-fix-width{ | |||
width: 140px !important; | |||
text-align: right; | |||
font-family: SourceHanSansSC-medium !important; | |||
color: rgba(16, 16, 16, 100) !important; | |||
font-size: 14px !important; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-image="2" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=CPU/GPU{{end}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="action" value="update"> | |||
<input type="hidden" id="ai_engine_name" name="engine_name" value=""> | |||
<input type="hidden" id="ai_flavor_name" name="flavor_name" value=""> | |||
<input type="hidden" id="ai_image_name" value="{{.image}}"> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
Ascend NPU</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
GCU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!--{{template "custom/select_model" .}} --> | |||
<div> | |||
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" | |||
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div> | |||
<div id="select-multi-model"></div> | |||
</div> | |||
<div id="images-new-grampus"> | |||
<div id="images-new-cb"> | |||
</div> | |||
<div class="inline min_title field required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" > | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" > | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" id="flavor_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label> | |||
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select> | |||
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link"></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
<!-- {{if not .IsCreate}} | |||
<div class="inline min_title field" > | |||
<label class="label-fix-width"></label> | |||
<div class="ui checkbox" style="margin-right:1rem"> | |||
<input type="checkbox" name="is_continue" value="true"> | |||
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label> | |||
</div> | |||
<span > | |||
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
{{end}} --> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .Specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> | |||
{{ template "repo/cloudbrain/cloudbraincreate" .}} |
@@ -0,0 +1,261 @@ | |||
{{template "base/head" .}} | |||
<style> | |||
.min_title{ | |||
font-size: 14px !important; | |||
margin-bottom: 2rem !important; | |||
} | |||
.width{ | |||
width:100% !important; | |||
} | |||
.width80{ | |||
width: 80.7% !important; | |||
margin-left: 10px; | |||
} | |||
.width806{ | |||
width: 80.6% !important; | |||
margin-left: -2px; | |||
} | |||
.width85{ | |||
width: 85% !important; | |||
margin-left: 10.5rem !important; | |||
align-items: center; | |||
} | |||
.width81{ | |||
width: 81% !important; | |||
} | |||
.width48{ | |||
width: 48.5% !important; | |||
} | |||
.add{font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 0px 5px 5px 0px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.min{ | |||
font-size: 18px; | |||
padding: 0.5rem; | |||
border: 1px solid rgba(187, 187, 187, 100); | |||
border-radius: 5px 0px 0px 5px; | |||
line-height: 21px; | |||
text-align: center; | |||
color: #C2C7CC; | |||
} | |||
.label-fix-width{ | |||
width: 140px !important; | |||
text-align: right; | |||
font-family: SourceHanSansSC-medium !important; | |||
color: rgba(16, 16, 16, 100) !important; | |||
font-size: 14px !important; | |||
} | |||
</style> | |||
{{template "custom/global_mask" .}} | |||
<div class="repository"> | |||
{{template "repo/header" .}} | |||
<div class="ui container"> | |||
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-image="2" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.attachment}}" data-dataset-name="{{.dataset_name}}"></div> | |||
{{if eq .NotStopTaskCount 0}} | |||
{{template "base/alert" .}} | |||
{{end}} | |||
{{template "custom/alert_cb" .}} | |||
<h4 class="ui top attached header"> | |||
{{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
</h4> | |||
<div class="ui attached segment"> | |||
<!-- equal width --> | |||
<form id="form_id" class="ui form" action="{{.Link}}{{if not .IsCreate}}?compute_resource=CPU/GPU{{end}}" method="post"> | |||
{{.CsrfTokenHtml}} | |||
<input type="hidden" name="action" value="update"> | |||
<input type="hidden" id="ai_engine_name" name="engine_name" value=""> | |||
<input type="hidden" id="ai_flavor_name" name="flavor_name" value=""> | |||
<input type="hidden" id="ai_image_name" value="{{.image}}"> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.resource_cluster"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="item" href="{{.RepoLink}}/cloudbrain/train-job/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}} | |||
</a> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/{{if.NPUEnabled}}npu{{else}}gpu{{end}}/create"> | |||
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg> | |||
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta) | |||
</a> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label> | |||
<div class="ui blue mini menu compact selectcloudbrain"> | |||
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
CPU/GPU | |||
</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
Ascend NPU</a> | |||
<a class="item" href="{{.RepoLink}}/grampus/train-job/gcu/create"> | |||
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"> | |||
<path fill="none" d="M0 0h24v24H0z"/> | |||
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/> | |||
</svg> | |||
GCU</a> | |||
</div> | |||
</div> | |||
<div class="min_title inline field" style="margin-top:-10px;"> | |||
<label class="label-fix-width" style="font-weight: normal;"></label> | |||
{{template "custom/task_wait_count" .}} | |||
<div style="display: flex;align-items: center;margin-left: 156px;margin-top: 0.5rem;"> | |||
<i class="ri-error-warning-line" style="color: #f2711c;margin-right: 0.5rem;"></i> | |||
<span style="color: #888;font-size: 12px;">{{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "ckpt_url" "/tmp/output" | Safe}}</span> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" style="margin-bottom: 0rem !important;"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.job_name"}}</label> | |||
<input style="width: 60%;" name="display_job_name" id="display_job_name" placeholder={{.i18n.Tr "repo.modelarts.train_job.job_name"}} value="{{.display_job_name}}" tabindex="3" onkeyup="this.value=this.value.replace(/[, ]/g,'')" autofocus required maxlength="36"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 2rem;">{{.i18n.Tr "repo.cloudbrain_jobname_err"}}</span> | |||
<div class="min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;" for="description">{{.i18n.Tr "repo.modelarts.train_job.description"}}</label> | |||
{{if .description}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}}>{{.description}}</textarea> | |||
{{else}} | |||
<textarea style="width: 80%;" id="description" name="description" rows="3" maxlength="255" placeholder={{.i18n.Tr "repo.modelarts.train_job.new_place"}} onchange="this.value=this.value.substring(0, 255)" onkeydown="this.value=this.value.substring(0, 255)" onkeyup="this.value=this.value.substring(0, 255)"></textarea> | |||
{{end}} | |||
</div> | |||
<div class="ui divider"></div> | |||
<h4 class="train-job-title ui header ">{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:</h4> | |||
<div class="required min_title inline field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.code_version"}}</label> | |||
<select class="ui dropdown width80 left2" id="code_version" name="branch_name"> | |||
{{if .branch_name}} | |||
<option name="branch_name" value="{{.branch_name}}">{{.branch_name}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branch_name }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{else}} | |||
<option name="branch_name" value="{{.branchName}}">{{.branchName}}</option> | |||
{{range $k, $v :=.Branches}} | |||
{{ if ne $v $.branchName }} | |||
<option name="branch_name" value="{{$v}}">{{$v}}</option> | |||
{{end}} | |||
{{end}} | |||
{{end}} | |||
</select> | |||
</div> | |||
<!--{{template "custom/select_model" .}} --> | |||
<div> | |||
<div class="select-multi-model" data-model-id="{{.model_id}}" data-model-name="{{.model_name}}" data-model-version="{{.model_version}}" | |||
data-pre-train-model-url="{{.pre_train_model_url}}" data-ckpt-name="{{.ckpt_name}}"></div> | |||
<div id="select-multi-model"></div> | |||
</div> | |||
<div id="images-new-grampus"> | |||
<div id="images-new-cb"> | |||
</div> | |||
<div class="inline min_title field required"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.start_file"}}</label> | |||
{{if .boot_file}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="{{.boot_file}}" tabindex="3" autofocus required maxlength="255" > | |||
{{else}} | |||
<input style="width: 48.5%;" name="boot_file" id="trainjob_boot_file" value="" tabindex="3" autofocus required maxlength="255" > | |||
{{end}} | |||
<span> | |||
<i class="question circle icon link" data-content={{.i18n.Tr "repo.modelarts.train_job.boot_file_helper"}} data-position="right center" data-variation="mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
<div id="select-multi-dataset"> | |||
</div> | |||
<span class="tooltips" style="margin-left: 11.5rem;margin-bottom: 1rem;"></span> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.run_parameter"}}</label> | |||
<span id="add_run_para" style="margin-left: 0.5rem;cursor:pointer;color: rgba(3, 102, 214, 100);font-size: 14px;line-height: 26px;font-family: SourceHanSansSC-medium;"><i class="plus square outline icon"></i>{{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}</span> | |||
<input id="store_run_para" type="hidden" name="run_para_list"> | |||
<div class="dynamic field" style="margin-top: 1rem;" data-params="{{.run_para_list}}" data-params-value="{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}" data-params-name="{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}"> | |||
</div> | |||
</div> | |||
<div class="required min_title inline field" id="flavor_name"> | |||
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.standard"}}</label> | |||
<select class="ui dropdown width48" id="__specs__" style='width:385px' name="spec_id" ovalue="{{.spec_id}}" {{if .CloudBrainPaySwitch}}blance="{{.PointAccount.Balance}}"{{end}}></select> | |||
<span><i class="question circle icon link" data-content="{{.i18n.Tr "repo.modelarts.train_job.resource_helper"}}" data-position="right center" data-variation="mini"></i></span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/quickstart/resources" target="_blank">{{.i18n.Tr "custom.resource_description"}}</a> | |||
{{if .CloudBrainPaySwitch}} | |||
<div class="cloudbrain_resource_spec_blance_tip width48" style="padding:0 5px;margin:6px 0;margin-left:155px;font-size:12px;"> | |||
<span>{{$.i18n.Tr "points.balance_of_points"}}<span style="color:red;margin: 0 3px">{{.PointAccount.Balance}}</span>{{$.i18n.Tr "points.points"}}</span><span>{{$.i18n.Tr "points.expected_time"}}<span style="color:red;margin: 0 3px" class="can-use-time"></span>{{$.i18n.Tr "points.hours"}}</span> | |||
<span style="float:right;"> | |||
<i class="question circle icon link"></i> | |||
<a href="{{AppSubUrl}}/reward/point/rule" target="_blank">{{$.i18n.Tr "points.points_acquisition_instructions"}}</a> | |||
</span> | |||
</div> | |||
{{end}} | |||
</div> | |||
<!-- {{if not .IsCreate}} | |||
<div class="inline min_title field" > | |||
<label class="label-fix-width"></label> | |||
<div class="ui checkbox" style="margin-right:1rem"> | |||
<input type="checkbox" name="is_continue" value="true"> | |||
<label>{{.i18n.Tr "repo.reuse_last_result"}}</label> | |||
</div> | |||
<span > | |||
<i class="question circle icon" data-content={{.i18n.Tr "repo.modelarts.infer_job.continue_helper"}} data-position="top center" data-variation="inverted mini"></i> | |||
</span> | |||
<a href="https://openi.pcl.ac.cn/docs/index.html#/cloudbrain/train/continue" target="_blank">{{.i18n.Tr "cloudbrain.view_sample"}}</a> | |||
</div> | |||
{{end}} --> | |||
<div class="inline min_title field"> | |||
<label class="label-fix-width"></label> | |||
<button class="ui create_train_job green button {{if eq .NotStopTaskCount 1}}disabled{{end}}"> | |||
{{.i18n.Tr "repo.cloudbrain.new"}} | |||
</button> | |||
<a class="ui button __btn-cancel-back__" href="{{.RepoLink}}/modelarts/train-job">{{.i18n.Tr "repo.cloudbrain.cancel"}}</a> | |||
</div> | |||
<!-- 模态框 --> | |||
</form> | |||
</div> | |||
</div> | |||
</div> | |||
{{template "base/footer" .}} | |||
<script src="{{StaticUrlPrefix}}/js/specsuse.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script src="{{StaticUrlPrefix}}/js/cloudbrainNew.js?v={{MD5 AppVer}}" type="text/javascript"></script> | |||
<script> | |||
;(function() { | |||
var SPECS = {{ .Specs }}; | |||
var showPoint = {{ .CloudBrainPaySwitch }}; | |||
window.renderSpecsSelect($('#__specs__'), SPECS, showPoint, { | |||
gpu_memory: {{$.i18n.Tr "cloudbrain.gpu_memory"}}, | |||
free: {{$.i18n.Tr "cloudbrain.free"}}, | |||
point_hr: {{$.i18n.Tr "cloudbrain.point_hr"}}, | |||
memory: {{$.i18n.Tr "cloudbrain.memory"}}, | |||
shared_memory: {{$.i18n.Tr "cloudbrain.shared_memory"}}, | |||
no_use_resource:{{$.i18n.Tr "cloudbrain.no_use_resource"}}, | |||
}); | |||
var backUrl = new URLSearchParams(window.location.search).get("backurl"); | |||
if (backUrl) { | |||
$('.__btn-cancel-back__').attr('href', backUrl); | |||
} | |||
})(); | |||
</script> |
Dear OpenI User
Thank you for your continuous support to the Openl Qizhi Community AI Collaboration Platform. In order to protect your usage rights and ensure network security, we updated the Openl Qizhi Community AI Collaboration Platform Usage Agreement in January 2024. The updated agreement specifies that users are prohibited from using intranet penetration tools. After you click "Agree and continue", you can continue to use our services. Thank you for your cooperation and understanding.
For more agreement content, please refer to the《Openl Qizhi Community AI Collaboration Platform Usage Agreement》