From e968c9c06aaccb368650083dde4b1aff250bdb18 Mon Sep 17 00:00:00 2001 From: zouap Date: Mon, 4 Sep 2023 09:23:41 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E6=99=BA=E7=AE=97=E8=B0=83=E8=AF=95=E5=8F=8A=E8=AE=AD=E7=BB=83?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E7=9A=84=E6=8E=A5=E5=8F=A3=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- manager/client/grampus/grampus.go | 40 +++++++++++++++++++++++ models/cloudbrain.go | 5 +++ services/ai_task_service/cluster/c2net.go | 28 +++++++++++----- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/manager/client/grampus/grampus.go b/manager/client/grampus/grampus.go index 060cfb87d0..1904085337 100644 --- a/manager/client/grampus/grampus.go +++ b/manager/client/grampus/grampus.go @@ -553,3 +553,43 @@ sendjob: return &result, nil } + +func DeleteJob(jobID string, jobType ...string) (*models.GrampusDeleteJobResponse, error) { + checkSetting() + client := getRestyClient() + var result models.GrampusDeleteJobResponse + + retry := 0 + + url := urlTrainJob + if len(jobType) > 0 { + if jobType[0] == string(models.JobTypeDebug) { + url = urlNotebookJob + } + } + +sendjob: + _, err := client.R(). + //SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetResult(&result). + Delete(HOST + url + "/" + jobID) + + if err != nil { + return &result, fmt.Errorf("resty StopTrainJob: %v", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + log.Info("retry get token") + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} diff --git a/models/cloudbrain.go b/models/cloudbrain.go index a77c190995..f2a2c880d2 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2050,6 +2050,11 @@ type NewModelArtsMetrics struct { Value []float32 `json:"value"` //获取的监控值的序列,元素为float类型 } +type GrampusDeleteJobResponse struct { + GrampusResult + Info string `json:"info"` +} + func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { sess := x.NewSession() defer sess.Close() diff --git a/services/ai_task_service/cluster/c2net.go b/services/ai_task_service/cluster/c2net.go index e4db4f5712..522ec187fb 100644 --- a/services/ai_task_service/cluster/c2net.go +++ b/services/ai_task_service/cluster/c2net.go @@ -1,6 +1,13 @@ package cluster import ( + "errors" + "fmt" + "io/ioutil" + "path" + "strings" + "time" + "code.gitea.io/gitea/entity" "code.gitea.io/gitea/manager/client/grampus" "code.gitea.io/gitea/models" @@ -15,12 +22,6 @@ import ( "code.gitea.io/gitea/services/ai_task_service/schedule" "code.gitea.io/gitea/services/ai_task_service/storage_helper" "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" - "errors" - "fmt" - "io/ioutil" - "path" - "strings" - "time" ) type C2NetClusterAdapter struct { @@ -266,7 +267,12 @@ func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartRespo } } -func (c C2NetClusterAdapter) DeleteNoteBook(entity.JobIdAndVersionId) error { +func (c C2NetClusterAdapter) DeleteNoteBook(opts entity.JobIdAndVersionId) error { + _, err := grampus.DeleteJob(opts.JobID, string(models.JobTypeDebug)) + if err != nil { + log.Error("DeleteNoteBook(%s) failed:%v", opts, err) + return err + } return nil } @@ -588,9 +594,15 @@ func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.Creat } } -func (c C2NetClusterAdapter) DeleteTrainJob(entity.JobIdAndVersionId) error { +func (c C2NetClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error { + _, err := grampus.DeleteJob(opts.JobID) + if err != nil { + log.Error("Delete train job(%s) failed:%v", opts, err) + return err + } return nil } + func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { _, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug)) if err != nil { -- 2.34.1 From a92d0af32667d04c9a9d3ddcd4518057a9aa7f6f Mon Sep 17 00:00:00 2001 From: zouap Date: Mon, 4 Sep 2023 10:53:55 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- manager/client/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manager/client/grampus/grampus.go b/manager/client/grampus/grampus.go index 1904085337..47e537ad78 100644 --- a/manager/client/grampus/grampus.go +++ b/manager/client/grampus/grampus.go @@ -590,6 +590,6 @@ sendjob: log.Error("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) return &result, fmt.Errorf("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) } - + log.Info("delete grampus task, re=" + result.Info) return &result, nil } -- 2.34.1 From 21408bf1d74706e36316c4875587800e9e7aebcb Mon Sep 17 00:00:00 2001 From: zouap Date: Mon, 4 Sep 2023 10:55:14 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E8=B0=83=E7=94=A8=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- manager/client/grampus/grampus.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/manager/client/grampus/grampus.go b/manager/client/grampus/grampus.go index 47e537ad78..7b15ddc16e 100644 --- a/manager/client/grampus/grampus.go +++ b/manager/client/grampus/grampus.go @@ -587,9 +587,9 @@ sendjob: } if result.ErrorCode != 0 { - log.Error("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) - return &result, fmt.Errorf("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + log.Error("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("Delete Job failed(%d): %s", result.ErrorCode, result.ErrorMsg) } - log.Info("delete grampus task, re=" + result.Info) + log.Info("delete grampus task, re=" + result.Info + " grampus jobId=" + jobID) return &result, nil } -- 2.34.1