diff --git a/manager/client/grampus/grampus.go b/manager/client/grampus/grampus.go index 060cfb87d0..7b15ddc16e 100644 --- a/manager/client/grampus/grampus.go +++ b/manager/client/grampus/grampus.go @@ -553,3 +553,43 @@ sendjob: return &result, nil } + +func DeleteJob(jobID string, jobType ...string) (*models.GrampusDeleteJobResponse, error) { + checkSetting() + client := getRestyClient() + var result models.GrampusDeleteJobResponse + + retry := 0 + + url := urlTrainJob + if len(jobType) > 0 { + if jobType[0] == string(models.JobTypeDebug) { + url = urlNotebookJob + } + } + +sendjob: + _, err := client.R(). + //SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetResult(&result). + Delete(HOST + url + "/" + jobID) + + if err != nil { + return &result, fmt.Errorf("resty StopTrainJob: %v", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + log.Info("retry get token") + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + log.Info("delete grampus task, re=" + result.Info + " grampus jobId=" + jobID) + return &result, nil +} diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 504e49c5a2..152622d9ff 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2068,6 +2068,11 @@ type NewModelArtsMetrics struct { Value []float32 `json:"value"` //获取的监控值的序列,元素为float类型 } +type GrampusDeleteJobResponse struct { + GrampusResult + Info string `json:"info"` +} + func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() diff --git a/services/ai_task_service/cluster/c2net.go b/services/ai_task_service/cluster/c2net.go index e4db4f5712..522ec187fb 100644 --- a/services/ai_task_service/cluster/c2net.go +++ b/services/ai_task_service/cluster/c2net.go @@ -1,6 +1,13 @@ package cluster import ( + "errors" + "fmt" + "io/ioutil" + "path" + "strings" + "time" + "code.gitea.io/gitea/entity" "code.gitea.io/gitea/manager/client/grampus" "code.gitea.io/gitea/models" @@ -15,12 +22,6 @@ import ( "code.gitea.io/gitea/services/ai_task_service/schedule" "code.gitea.io/gitea/services/ai_task_service/storage_helper" "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" - "errors" - "fmt" - "io/ioutil" - "path" - "strings" - "time" ) type C2NetClusterAdapter struct { @@ -266,7 +267,12 @@ func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartRespo } } -func (c C2NetClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error { +func (c C2NetClusterAdapter) DeleteNoteBook(opts entity.JobIdAndVersionId) error { + _, err := grampus.DeleteJob(opts.JobID, string(models.JobTypeDebug)) + if err != nil { + log.Error("DeleteNoteBook(%s) failed:%v", opts, err) + return err + } return nil } @@ -588,9 +594,15 @@ func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.Creat } } -func (c C2NetClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error { +func (c C2NetClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error { + _, err := grampus.DeleteJob(opts.JobID) + if err != nil { + log.Error("Delete train job(%s) failed:%v", opts, err) + return err + } return nil } + func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { _, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug)) if err != nil {