#4680 #4622 删除智算任务时可调用虎鲸删除任务接口,包括调试任务及训练任务

Merged
ychao_1983 merged 4 commits from zouap_static into V20230912 8 months ago
  1. +40
    -0
      manager/client/grampus/grampus.go
  2. +5
    -0
      models/cloudbrain.go
  3. +20
    -8
      services/ai_task_service/cluster/c2net.go

+ 40
- 0
manager/client/grampus/grampus.go View File

@@ -553,3 +553,43 @@ sendjob:

return &result, nil
}

func DeleteJob(jobID string, jobType ...string) (*models.GrampusDeleteJobResponse, error) {
checkSetting()
client := getRestyClient()
var result models.GrampusDeleteJobResponse

retry := 0

url := urlTrainJob
if len(jobType) > 0 {
if jobType[0] == string(models.JobTypeDebug) {
url = urlNotebookJob
}
}

sendjob:
_, err := client.R().
//SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetResult(&result).
Delete(HOST + url + "/" + jobID)

if err != nil {
return &result, fmt.Errorf("resty StopTrainJob: %v", err)
}

if result.ErrorCode == errorIllegalToken && retry < 1 {
retry++
log.Info("retry get token")
_ = getToken()
goto sendjob
}

if result.ErrorCode != 0 {
log.Error("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg)
}
log.Info("delete grampus task, re=" + result.Info + " grampus jobId=" + jobID)
return &result, nil
}

+ 5
- 0
models/cloudbrain.go View File

@@ -2068,6 +2068,11 @@ type NewModelArtsMetrics struct {
Value []float32 `json:"value"` //获取的监控值的序列,元素为float类型
}

type GrampusDeleteJobResponse struct {
GrampusResult
Info string `json:"info"`
}

func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
sess := x.NewSession()
defer sess.Close()


+ 20
- 8
services/ai_task_service/cluster/c2net.go View File

@@ -1,6 +1,13 @@
package cluster

import (
"errors"
"fmt"
"io/ioutil"
"path"
"strings"
"time"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/manager/client/grampus"
"code.gitea.io/gitea/models"
@@ -15,12 +22,6 @@ import (
"code.gitea.io/gitea/services/ai_task_service/schedule"
"code.gitea.io/gitea/services/ai_task_service/storage_helper"
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask"
"errors"
"fmt"
"io/ioutil"
"path"
"strings"
"time"
)

type C2NetClusterAdapter struct {
@@ -266,7 +267,12 @@ func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartRespo
}
}

func (c C2NetClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error {
func (c C2NetClusterAdapter) DeleteNoteBook(opts entity.JobIdAndVersionId) error {
_, err := grampus.DeleteJob(opts.JobID, string(models.JobTypeDebug))
if err != nil {
log.Error("DeleteNoteBook(%s) failed:%v", opts, err)
return err
}
return nil
}

@@ -588,9 +594,15 @@ func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.Creat
}
}

func (c C2NetClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error {
func (c C2NetClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error {
_, err := grampus.DeleteJob(opts.JobID)
if err != nil {
log.Error("Delete train job(%s) failed:%v", opts, err)
return err
}
return nil
}

func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error {
_, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug))
if err != nil {


Loading…
Cancel
Save