yands
/
gpu-server

 
			
							import os
import sys
import yaml
import importlib
import torch
import numpy as np
import datetime
from time import time
from feeder1 import Feeder
from tqdm import tqdm
from torch import nn, optim
import torch.utils.data
import pickle
import torch.onnx

str.encode('utf-8')

#os.environ['CUDA_VISIBLE_DEVICES'] = '1'
#device = torch.device("cuda:0")
'''
dataset:
url="https://s3.openi.org.cn/opendata/attachment/a/4/a4e3e67e-2ef7-4856-a0b5-3dcbb86a932d?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=1fa9e58b6899afd26dd3%2F20220423%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220423T021552Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3D%22xview.zip%22&X-Amz-Signature=05ea95953ae92fe66fbe6974ee2abeb48e8a3d69c438d59eac2c78f840148340"

code :
git clone https://git.openi.org.cn/yands/gpu-server.git
cd gpu-server
source /opt/conda/bin/activate
python -u train.py
'''

data_path = '/model/xview'
traindata = Feeder(data_path+'/train_data.npy',
                   data_path+'/train_label.pkl',
                   num_samples=-1, mmap=True, num_frames=100, seed=0)

testdata = Feeder(data_path+'/val_data.npy', data_path+'/val_label.pkl',
                  num_samples=-1, mmap=True, num_frames=100, seed=0)

trainloader = torch.utils.data.DataLoader(traindata, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

configFile = 'config.yml'
with open(configFile, 'r') as f:
    config = yaml.safe_load(f)

num_epochs = int(config['num_epochs'])
batch_size = int(config['batch_size'])
learning_rate = float(config['learning_rate'])
weight_decay = float(config['weight_decay'])
best_acc = 0.

data_dir = config['dataset']
val_data_dir = data_dir
if config['dataset'] == 'UCLA':
    num_joints = 20
    num_cls = 10
elif config['dataset'] == 'NTU':
    num_joints = 25
    num_cls = 60
else:
    raise ValueError

module, model_name = config['net'].rsplit('.', 1)

module = importlib.import_module(module)
model = getattr(module, model_name)
print('model name', model_name)
net = model(config['in_channels'], num_joints, config['data_param']['num_frames'], num_cls, config)

# 8888
device_ids = config['device_ids']
# net = torch.nn.DataParallel(net, device_ids=device_ids)

device = device_ids[0]
net = net.to(device)

optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
# xxxxxxx
# optimizer = nn.DataParallel(optimizer)
# vvvvvv
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.55, last_epoch=config['start_epoch'] - 2)
print("optimizer, done")
file = open('test_ntu_xv.txt','r+')

# sample_input = torch.randn((1, 3, 100, 25, 2)).cuda()
# torch.onnx.export(net,sample_input,"out2.onnx")
print('start training')
for epoch in range(config['start_epoch'], num_epochs + 1):

    np.random.seed()
    tic = time()
    net.train()
    correct = 0
    total = 0
    running_loss = 0.0
    num_iters = 0

    score_frag = []

    for x, labels in tqdm(trainloader, total=len(trainloader), disable=not config['tqdm'], ascii=True):
        # inputs, labels = data

        # inputs = torch.tensor(inputs, dtype=torch.float32)
        # inputs, labels = inputs.to(0), labels.to(0)
        # print(x.__len__())
        # print(x.shape)
        # labels = torch.LongTensor([i[1] for i in labels])

        optimizer.zero_grad()

        outputs = net(torch.tensor(x, dtype=torch.float32).cuda())
        loss = net.get_loss(outputs.data, labels.long().cuda())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        # print(predicted)
        total += labels.size(0)
        correct += (predicted == labels.long().cuda()).sum().item()
        num_iters += 1

    scheduler.step()
    # print(outputs)

    acc_train = correct / total
    loss_train = running_loss / num_iters

    print('Epoch %d: train loss: %.5f,  train acc: %.5f,time: %.5f' % (epoch, loss_train, acc_train, time() - tic))
    ## file.write('Epoch: '+str(epoch)+'  train loss: '+str(loss_train)+'  train acc: '+ str(acc_train)+'  time: ' +str(time() - tic)+'\n')

    #######测试开始###########
    correct = 0
    total = 0
    running_loss = 0.0
    num_iters = 0
    net.eval()
    with torch.no_grad():
        test_tic = time()
        for data in tqdm(testloader, ascii=True, disable=not config['tqdm']):
            inputs, labels = data
            #             inputs, labels = inputs.to(device), labels.to(device)
            #             outputs = net(inputs)

            outputs = net(torch.tensor(inputs, dtype=torch.float32).cuda())
            loss = net.get_loss(outputs.data, labels.long().cuda())
            ##############
            score_frag.append(outputs.data.cpu().numpy())
            #############
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels.long().cuda()).sum().item()
            num_iters += 1

    score = np.concatenate(score_frag)

    acc_test = correct / total
    loss_test = running_loss / num_iters
    print('test loss: %.5f,  test acc: %.5f' % (loss_test, acc_test))
    ##############################
'''
    if acc_test > best_acc:
        best_acc = acc_test
        score_dict = score

        with open('../../code/gc-lstm/joints_best_acc_ntu_xs.pkl', 'wb') as f:
            pickle.dump(score_dict, f)
'''


#   file.write('test loss: '+str(loss_test)+'  test acc: '+ str(acc_test)+'  time: ' +str(time() - test_tic)+'\n')

file.close()