|
- package models
-
- import (
- "encoding/json"
- "fmt"
- "time"
- "xorm.io/xorm"
-
- "code.gitea.io/gitea/modules/setting"
- "code.gitea.io/gitea/modules/timeutil"
- "xorm.io/builder"
- )
-
- type CloudbrainStatus string
- type JobType string
- type ModelArtsJobStatus string
-
- const (
- JobWaiting CloudbrainStatus = "WAITING"
- JobStopped CloudbrainStatus = "STOPPED"
- JobSucceeded CloudbrainStatus = "SUCCEEDED"
- JobFailed CloudbrainStatus = "FAILED"
- JobRunning CloudbrainStatus = "RUNNING"
-
- JobTypeDebug JobType = "DEBUG"
- JobTypeBenchmark JobType = "BENCHMARK"
- JobTypeSnn4imagenet JobType = "SNN4IMAGENET"
-
- ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中
- ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中
- ModelArtsCreateFailed ModelArtsJobStatus = "CREATE_FAILED" //创建失败
- ModelArtsStartQueuing ModelArtsJobStatus = "START_QUEUING" //免费资源启动排队中
- ModelArtsReadyToStart ModelArtsJobStatus = "READY_TO_START" //免费资源等待启动
- ModelArtsStarting ModelArtsJobStatus = "STARTING" //启动中
- ModelArtsRestarting ModelArtsJobStatus = "RESTARTING" //重启中
- ModelArtsStartFailed ModelArtsJobStatus = "START_FAILED" //启动失败
- ModelArtsRunning ModelArtsJobStatus = "RUNNING" //运行中
- ModelArtsStopping ModelArtsJobStatus = "STOPPING" //停止中
- ModelArtsStopped ModelArtsJobStatus = "STOPPED" //停止
- ModelArtsUnavailable ModelArtsJobStatus = "UNAVAILABLE" //故障
- ModelArtsDeleted ModelArtsJobStatus = "DELETED" //已删除
- ModelArtsResizing ModelArtsJobStatus = "RESIZING" //规格变更中
- ModelArtsResizFailed ModelArtsJobStatus = "RESIZE_FAILED" //规格变更失败
- )
-
- type Cloudbrain struct {
- ID int64 `xorm:"pk autoincr"`
- JobID string `xorm:"INDEX NOT NULL"`
- JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
- JobName string `xorm:"INDEX"`
- Status string `xorm:"INDEX"`
- UserID int64 `xorm:"INDEX"`
- RepoID int64 `xorm:"INDEX"`
- SubTaskName string `xorm:"INDEX"`
- ContainerID string
- ContainerIp string
- CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
- UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
- DeletedAt time.Time `xorm:"deleted"`
- CanDebug bool `xorm:"-"`
- Type int `xorm:"INDEX DEFAULT 0"`
-
- User *User `xorm:"-"`
- Repo *Repository `xorm:"-"`
- }
-
- type CloudBrainLoginResult struct {
- Code string
- Msg string
- Payload map[string]interface{}
- }
-
- type TaskRole struct {
- Name string `json:"name"`
- TaskNumber int `json:"taskNumber"`
- MinSucceededTaskCount int `json:"minSucceededTaskCount"`
- MinFailedTaskCount int `json:"minFailedTaskCount"`
- CPUNumber int `json:"cpuNumber"`
- GPUNumber int `json:"gpuNumber"`
- MemoryMB int `json:"memoryMB"`
- ShmMB int `json:"shmMB"`
- Command string `json:"command"`
- NeedIBDevice bool `json:"needIBDevice"`
- IsMainRole bool `json:"isMainRole"`
- UseNNI bool `json:"useNNI"`
- }
-
- type StHostPath struct {
- Path string `json:"path"`
- MountPath string `json:"mountPath"`
- ReadOnly bool `json:"readOnly"`
- }
-
- type Volume struct {
- HostPath StHostPath `json:"hostPath"`
- }
-
- type CreateJobParams struct {
- JobName string `json:"jobName"`
- RetryCount int8 `json:"retryCount"`
- GpuType string `json:"gpuType"`
- Image string `json:"image"`
- TaskRoles []TaskRole `json:"taskRoles"`
- Volumes []Volume `json:"volumes"`
- }
-
- type CreateJobResult struct {
- Code string `json:"code"`
- Msg string `json:"msg"`
- Payload map[string]interface{} `json:"payload"`
- }
-
- type GetJobResult struct {
- Code string `json:"code"`
- Msg string `json:"msg"`
- Payload map[string]interface{} `json:"payload"`
- }
-
- type GetImagesResult struct {
- Code string `json:"code"`
- Msg string `json:"msg"`
- Payload GetImagesPayload `json:"payload"`
- }
-
- type GetImagesPayload struct {
- Count int `json:"count"`
- TotalPages int `json:"totalPages,omitempty"`
- ImageInfo []*ImageInfo `json:"rows"`
- }
-
- type CloudbrainsOptions struct {
- ListOptions
- RepoID int64 // include all repos if empty
- UserID int64
- JobID int64
- SortType string
- CloudbrainIDs []int64
- // JobStatus CloudbrainStatus
- Type int
- }
- type TaskPod struct {
- TaskRoleStatus struct {
- Name string `json:"name"`
- } `json:"taskRoleStatus"`
- TaskStatuses []struct {
- TaskIndex int `json:"taskIndex"`
- PodUID string `json:"podUid"`
- PodIP string `json:"podIp"`
- PodName string `json:"podName"`
- ContainerID string `json:"containerId"`
- ContainerIP string `json:"containerIp"`
- ContainerGpus string `json:"containerGpus"`
- State string `json:"state"`
- StartAt time.Time `json:"startAt"`
- FinishedAt time.Time `json:"finishedAt"`
- ExitCode int `json:"exitCode"`
- ExitDiagnostics string `json:"exitDiagnostics"`
- RetriedCount int `json:"retriedCount"`
- StartTime string
- FinishedTime string
- } `json:"taskStatuses"`
- }
-
- type TaskInfo struct {
- Username string `json:"username"`
- TaskName string `json:"task_name"`
- CodeName string `json:"code_name"`
- BenchmarkCategory []string `json:"selected_category"`
- CodeLink string `json:"code_link"`
- GpuType string `json:"gpu_type"`
- }
-
- func ConvertToTaskPod(input map[string]interface{}) (TaskPod, error) {
- data, _ := json.Marshal(input)
- var taskPod TaskPod
- err := json.Unmarshal(data, &taskPod)
- taskPod.TaskStatuses[0].StartTime = time.Unix(taskPod.TaskStatuses[0].StartAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
- taskPod.TaskStatuses[0].FinishedTime = time.Unix(taskPod.TaskStatuses[0].FinishedAt.Unix()+8*3600, 0).UTC().Format("2006-01-02 15:04:05")
- return taskPod, err
- }
-
- type JobResultPayload struct {
- ID string `json:"id"`
- Name string `json:"name"`
- Platform string `json:"platform"`
- JobStatus struct {
- Username string `json:"username"`
- State string `json:"state"`
- SubState string `json:"subState"`
- ExecutionType string `json:"executionType"`
- Retries int `json:"retries"`
- CreatedTime int64 `json:"createdTime"`
- CompletedTime int64 `json:"completedTime"`
- AppID string `json:"appId"`
- AppProgress string `json:"appProgress"`
- AppTrackingURL string `json:"appTrackingUrl"`
- AppLaunchedTime int64 `json:"appLaunchedTime"`
- AppCompletedTime interface{} `json:"appCompletedTime"`
- AppExitCode int `json:"appExitCode"`
- AppExitDiagnostics string `json:"appExitDiagnostics"`
- AppExitType interface{} `json:"appExitType"`
- VirtualCluster string `json:"virtualCluster"`
- StartTime string
- EndTime string
- } `json:"jobStatus"`
- TaskRoles map[string]interface{} `json:"taskRoles"`
- Resource struct {
- CPU int `json:"cpu"`
- Memory string `json:"memory"`
- NvidiaComGpu int `json:"nvidia.com/gpu"`
- } `json:"resource"`
- Config struct {
- Image string `json:"image"`
- JobID string `json:"jobId"`
- GpuType string `json:"gpuType"`
- JobName string `json:"jobName"`
- JobType string `json:"jobType"`
- TaskRoles []struct {
- Name string `json:"name"`
- ShmMB int `json:"shmMB"`
- Command string `json:"command"`
- MemoryMB int `json:"memoryMB"`
- CPUNumber int `json:"cpuNumber"`
- GpuNumber int `json:"gpuNumber"`
- IsMainRole bool `json:"isMainRole"`
- TaskNumber int `json:"taskNumber"`
- NeedIBDevice bool `json:"needIBDevice"`
- MinFailedTaskCount int `json:"minFailedTaskCount"`
- MinSucceededTaskCount int `json:"minSucceededTaskCount"`
- } `json:"taskRoles"`
- RetryCount int `json:"retryCount"`
- } `json:"config"`
- Userinfo struct {
- User string `json:"user"`
- OrgID string `json:"org_id"`
- } `json:"userinfo"`
- }
-
- func ConvertToJobResultPayload(input map[string]interface{}) (JobResultPayload, error) {
- data, _ := json.Marshal(input)
- var jobResultPayload JobResultPayload
- err := json.Unmarshal(data, &jobResultPayload)
- jobResultPayload.JobStatus.StartTime = time.Unix(jobResultPayload.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05")
- jobResultPayload.JobStatus.EndTime = time.Unix(jobResultPayload.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05")
- return jobResultPayload, err
- }
-
- type ImagesResultPayload struct {
- Images []struct {
- ID int `json:"id"`
- Name string `json:"name"`
- Place string `json:"place"`
- Description string `json:"description"`
- Provider string `json:"provider"`
- Createtime string `json:"createtime"`
- Remark string `json:"remark"`
- } `json:"taskStatuses"`
- }
- type ImageInfo struct {
- ID int `json:"id"`
- Name string `json:"name"`
- Place string `json:"place"`
- Description string `json:"description"`
- Provider string `json:"provider"`
- Createtime string `json:"createtime"`
- Remark string `json:"remark"`
- IsPublic int `json:"isPublic"`
- PlaceView string
- }
-
- type Categories struct {
- Category []*Category `json:"category"`
- }
-
- type Category struct {
- Id int `json:"id"`
- Value string `json:"value"`
- }
-
- type GpuInfos struct {
- GpuInfo []*GpuInfo `json:"gpu_type"`
- }
-
- type GpuInfo struct {
- Id int `json:"id"`
- Value string `json:"value"`
- Queue string `json:"queue"`
- }
-
- type ResourceSpecs struct {
- ResourceSpec []*ResourceSpec `json:"resorce_specs"`
- }
-
- type ResourceSpec struct {
- Id int `json:"id"`
- CpuNum int `json:"cpu"`
- GpuNum int `json:"gpu"`
- MemMiB int `json:"memMiB"`
- ShareMemMiB int `json:"shareMemMiB"`
- }
-
- type FlavorInfos struct {
- FlavorInfo []*FlavorInfo `json:"flavor_info"`
- }
-
- type FlavorInfo struct {
- Id int `json:"id"`
- Value string `json:"value"`
- }
-
- type PoolInfos struct {
- PoolInfo []*PoolInfo `json:"pool_info"`
- }
-
- type PoolInfo struct {
- PoolId string `json:"pool_id"`
- PoolName string `json:"pool_name"`
- PoolType string `json:"pool_type"`
- }
-
- type CommitImageParams struct {
- Ip string `json:"ip"`
- TaskContainerId string `json:"taskContainerId"`
- ImageTag string `json:"imageTag"`
- ImageDescription string `json:"imageDescription"`
- }
-
- type CommitImageResult struct {
- Code string `json:"code"`
- Msg string `json:"msg"`
- Payload map[string]interface{} `json:"payload"`
- }
-
- type CloudBrainResult struct {
- Code string `json:"code"`
- Msg string `json:"msg"`
- }
-
- type CreateNotebookParams struct {
- JobName string `json:"name"`
- Description string `json:"description"`
- ProfileID string `json:"profile_id"`
- Flavor string `json:"flavor"`
- Spec Spec `json:"spec"`
- Workspace Workspace `json:"workspace"`
- Pool Pool `json:"pool"`
- }
-
- type Pool struct {
- ID string `json:"id"`
- Name string `json:"name"`
- Type string `json:"type"`
- }
-
- type Workspace struct {
- ID string `json:"id"`
- }
-
- type Spec struct {
- Storage Storage `json:"storage"`
- AutoStop AutoStop `json:"auto_stop"`
- }
-
- type AutoStop struct {
- Enable bool `json:"enable"`
- Duration int `json:"duration"`
- }
-
- type Storage struct {
- Type string `json:"type"`
- Location Location `json:"location"`
- }
-
- type Location struct {
- Path string `json:"path"`
- }
-
- type NotebookResult struct {
- ErrorCode string `json:"error_code"`
- ErrorMsg string `json:"error_msg"`
- }
-
- type CreateNotebookResult struct {
- ErrorCode string `json:"error_code"`
- ErrorMsg string `json:"error_msg"`
- ID string `json:"id"`
- Name string `json:"name"`
- Description string `json:"description"`
- Status string `json:"status"`
- CreationTimestamp string `json:"creation_timestamp"`
- LatestUpdateTimestamp string `json:"latest_update_timestamp"`
- Profile struct {
- ID string `json:"id"`
- Name string `json:"name"`
- Description string `json:"description"`
- DeType string `json:"de_type"`
- FlavorType string `json:"flavor_type"`
- } `json:"profile"`
- Flavor string `json:"flavor"`
- FlavorDetails struct {
- Name string `json:"name"`
- Status string `json:"status"`
- QueuingNum int `json:"queuing_num"`
- QueueLeftTime int `json:"queue_left_time"` //s
- Duration int `json:"duration"` //auto_stop_time s
- } `json:"flavor_details"`
- }
-
- type GetNotebookResult struct {
- ErrorCode string `json:"error_code"`
- ErrorMsg string `json:"error_msg"`
- ID string `json:"id"`
- Name string `json:"name"`
- Description string `json:"description"`
- Status string `json:"status"`
- CreationTimestamp string `json:"creation_timestamp"`
- CreateTime string
- LatestUpdateTimestamp string `json:"latest_update_timestamp"`
- LatestUpdateTime string
- Profile struct {
- ID string `json:"id"`
- Name string `json:"name"`
- Description string `json:"description"`
- DeType string `json:"de_type"`
- FlavorType string `json:"flavor_type"`
- } `json:"profile"`
- Flavor string `json:"flavor"`
- FlavorDetails struct {
- Name string `json:"name"`
- Status string `json:"status"`
- QueuingNum int `json:"queuing_num"`
- QueueLeftTime int `json:"queue_left_time"` //s
- Duration int `json:"duration"` //auto_stop_time s
- } `json:"flavor_details"`
- QueuingInfo struct {
- ID string `json:"id"`
- Name string `json:"name"`
- Flavor string `json:"flavor"`
- DeType string `json:"de_type"`
- Status string `json:"status"`
- BeginTimestamp int `json:"begin_timestamp"` //time of instance begin in queue
- BeginTime string
- RemainTime int `json:"remain_time"` //remain time of instance
- EndTimestamp int `json:"end_timestamp"` //
- EndTime string
- Rank int `json:"rank"` //rank of instance in queue
- } `json:"queuing_info"`
- Spec struct {
- Annotations struct {
- TargetDomain string `json:"target_domain"`
- Url string `json:"url"`
- } `json:"annotations"`
- } `json:"spec"`
- }
-
- type GetTokenParams struct {
- Auth Auth `json:"auth"`
- }
-
- type Auth struct {
- Identity Identity `json:"identity"`
- Scope Scope `json:"scope"`
- }
-
- type Scope struct {
- Project Project `json:"project"`
- }
-
- type Project struct {
- Name string `json:"name"`
- }
-
- type Identity struct {
- Methods []string `json:"methods"`
- Password Password `json:"password"`
- }
-
- type Password struct {
- User NotebookUser `json:"user"`
- }
-
- type NotebookUser struct {
- Name string `json:"name"`
- Password string `json:"password"`
- Domain Domain `json:"domain"`
- }
-
- type Domain struct {
- Name string `json:"name"`
- }
-
- const (
- ActionStart = "start"
- ActionStop = "stop"
- ActionRestart = "restart"
- ActionQueue = "queue"
- ActionDequeue = "dequeue"
- )
-
- type NotebookAction struct {
- Action string `json:"action"`
- }
-
- type NotebookActionResult struct {
- ErrorCode string `json:"error_code"`
- ErrorMsg string `json:"error_msg"`
- CurrentStatus string `json:"current_status"`
- PreviousState string `json:"previous_state"`
- }
-
- type NotebookGetJobTokenResult struct {
- ErrorCode string `json:"error_code"`
- ErrorMsg string `json:"error_msg"`
- Token string `json:"token"`
- }
-
- type NotebookDelResult struct {
- InstanceID string `json:"instance_id"`
- }
-
- func Cloudbrains(opts *CloudbrainsOptions) ([]*Cloudbrain, int64, error) {
- sess := x.NewSession()
- defer sess.Close()
-
- var cond = builder.NewCond()
- if opts.RepoID > 0 {
- cond = cond.And(
- builder.Eq{"cloudbrain.repo_id": opts.RepoID},
- )
- }
-
- if opts.UserID > 0 {
- cond = cond.And(
- builder.Eq{"cloudbrain.user_id": opts.UserID},
- )
- }
-
- if (opts.JobID) > 0 {
- cond = cond.And(
- builder.Eq{"cloudbrain.job_id": opts.JobID},
- )
- }
-
- if (opts.Type) >= 0 {
- cond = cond.And(
- builder.Eq{"cloudbrain.type": opts.Type},
- )
- }
-
- // switch opts.JobStatus {
- // case JobWaiting:
- // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
- // case JobFailed:
- // cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
- // case JobStopped:
- // cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
- // case JobSucceeded:
- // cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
- // }
-
- if len(opts.CloudbrainIDs) > 0 {
- cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
- }
-
- count, err := sess.Where(cond).Count(new(Cloudbrain))
- if err != nil {
- return nil, 0, fmt.Errorf("Count: %v", err)
- }
-
- if opts.Page >= 0 && opts.PageSize > 0 {
- var start int
- if opts.Page == 0 {
- start = 0
- } else {
- start = (opts.Page - 1) * opts.PageSize
- }
- sess.Limit(opts.PageSize, start)
- }
-
- sess.OrderBy("cloudbrain.created_unix DESC")
- cloudbrains := make([]*Cloudbrain, 0, setting.UI.IssuePagingNum)
- if err := sess.Where(cond).Find(&cloudbrains); err != nil {
- return nil, 0, fmt.Errorf("Find: %v", err)
- }
- sess.Close()
-
- return cloudbrains, count, nil
- }
-
- func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
- if _, err = x.Insert(cloudbrain); err != nil {
- return err
- }
-
- return nil
- }
-
- func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
- has, err := x.Get(cb)
- if err != nil {
- return nil, err
- } else if !has {
- return nil, ErrJobNotExist{}
- }
- return cb, nil
- }
-
- func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
- cb := &Cloudbrain{JobID: jobID, RepoID: repoID}
- return getRepoCloudBrain(cb)
- }
-
- func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
- cb := &Cloudbrain{JobID: jobID}
- return getRepoCloudBrain(cb)
- }
-
- func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err error) {
- cb := &Cloudbrain{JobID: jobID, Status: string(status)}
- _, err = x.Cols("status").Where("cloudbrain.job_id=?", jobID).Update(cb)
- return
- }
-
- func UpdateJob(job *Cloudbrain) error {
- return updateJob(x, job)
- }
-
- func updateJob(e Engine, job *Cloudbrain) error {
- var sess *xorm.Session
- sess = e.Where("job_id = ?", job.JobID)
- _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
- return err
- }
-
- func DeleteJob(job *Cloudbrain) error {
- return deleteJob(x, job)
- }
-
- func deleteJob(e Engine, job *Cloudbrain) error {
- _, err := e.ID(job.ID).Delete(job)
- return err
- }
-
- func GetCloudbrainByName(jobName string) (*Cloudbrain, error) {
- cb := &Cloudbrain{JobName: jobName}
- return getRepoCloudBrain(cb)
- }
|