|
- import time
-
- import mindspore as ms
- import mindspore.context as context
- from mindspore import nn, Model
- from mindspore.train.callback import TimeMonitor,LossMonitor
-
- from dataset import create_dataset
- from MaxwellNet import MaxwellNet
- from utils import adjust_learning_rate
- from loss import MaxwellNetWithLoss
-
- import argparse
- import os
- import json
-
- parser = argparse.ArgumentParser(description="Train a MaxwellNet")
- parser.add_argument(
- "--directory",required=False,default='examples/spheric_te')
-
- parser.add_argument("--device_target",default="Ascend")
-
- parser.add_argument('--multi_data_url', required=False,
- default=None, help='Location of data.')
- parser.add_argument('--ckpt_url', required=False,
- default=None, help='Location of pretrain.')
- parser.add_argument('--train_url', required=False,
- default="/tmp/output", help='Location of training outputs.')
-
-
- args = parser.parse_args()
-
-
-
- def main(directory):
-
- current_work_dir = os.path.dirname(__file__)
- directory = os.path.join(current_work_dir, directory)
- specs_filename = os.path.join(directory, 'specs_maxwell.json')
-
- if not os.path.isfile(specs_filename):
- print(specs_filename)
- raise Exception(
- 'The experiment directory does not include specifications file "specs_maxwell.json"'
- )
-
- specs = json.load(open(specs_filename))
-
- seed_number = get_spec_with_default(specs, "Seed", None)
-
-
- print("Experiment description: \n" +
- ' '.join([str(elem) for elem in specs["Description"]]))
- print("Training with " + str(args.device_target))
-
- net = MaxwellNet(**specs["NetworkSpecs"], **specs["PhysicalSpecs"])
-
- print("Number of network parameters: {}".format(
- sum(p.size for p in net.get_parameters())))
-
- batch_size = get_spec_with_default(specs, "BatchSize", 1)
- epochs = get_spec_with_default(specs, "Epochs", 1)
- snapshot_freq = specs["SnapshotFrequency"]
- physical_specs = specs["PhysicalSpecs"]
- symmetry_x = physical_specs['symmetry_x']
- mode = physical_specs['mode']
- high_order = physical_specs['high_order']
-
-
- train_ds = create_dataset(directory, "train",batch_size=batch_size)
- dataset_size = train_ds.get_dataset_size()
- train_dataloader = train_ds.create_dict_iterator()
-
-
- print("Train Dataset length: {}".format(dataset_size))
- lr_schedule = adjust_learning_rate(
- base_lr=get_spec_with_default(specs, "LearningRate", 0.0001),
- gamma=get_spec_with_default(specs, "LearningRateDecay", 1.0),
- step_size=get_spec_with_default(specs, "LearningRateDecayStep", 10000),
- step_total=dataset_size*epochs)
- optimizer = nn.Adam(params=net.get_parameters(),learning_rate=lr_schedule,
- weight_decay=0.0)
-
-
- net_with_loss = MaxwellNetWithLoss(network=net)
-
- manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2 ** 12, scale_factor=2, scale_window=1000)
- model = nn.TrainOneStepWithLossScaleCell(net_with_loss,
- optimizer=optimizer, scale_sense=manager)
- time_cb = TimeMonitor(data_size=dataset_size)
- loss_cb = LossMonitor()
- callback_list = [time_cb, loss_cb]
-
- print("Train total epoch: ",epochs)
- print("---------------train start---------------")
- step = 0
- for epoch in range(epochs):
- for i , item in enumerate(train_dataloader):
- step_begin_time = time.time()
- (sample, n) = item['sample'], item['n']
- grad_fn = ms.grad(model, grad_position=None, weights=net.trainable_params(), has_aux=True)
- grads = grad_fn(sample, n)
- grads = ms.ops.clip_by_value(grads, clip_value_max=0.1)
- step_end_time = time.time()
- print('step:', step, 'epoch:', epoch, 'batch:', i, 'loss:', grads)
- print('step time is', step_end_time - step_begin_time)
- step+=1
-
-
-
- save_path = os.path.join(args.train_url,"MaxwellNet.ckpt")
- ms.save_checkpoint(net, save_path)
- print("---------------train success---------------")
-
-
- def get_spec_with_default(specs, key, default):
- try:
- return specs[key]
- except KeyError:
- return default
-
-
- if __name__ == '__main__':
-
- context.set_context(mode=context.PYNATIVE_MODE, device_id=0, device_target='GPU')
-
- main(args.directory)
|