OpenModelZoo
/
masktextspotter

 
			
							# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: True
data_url: ""
multi_data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './ckpt_0/mask_rcnn_2-500_229.ckpt'
device_target: Ascend
enable_profiling: False

pre_trained: ""
icdar_root: "/home/work/user-job-dir/inputs/data/"
dataset_type: [
              #"totaltext",
              #"scut-eng-char",
              "icdar2013",
              #"icdar2015",
              #"SynthText",
              ]
dataset_random_fetch_num: 30
#dataset_type: ["SynthText_mini","scut-eng-char","icdar2013","icdar2015"]
#use_charann: [True,True,True,True]
use_charann: [True]
#coco_root: "/home/xy/data"
#ckpt_path: './ckpt_maskrcnn/mask_rcnn-12_7393.ckpt'
#ann_file: "./annotations/instances_val2017.json"
# ==============================================================================
#modelarts_dataset_unzip_name: 'cocodataset'
need_modelarts_dataset_unzip: True

img_path: ''
result_path: ''

# Training options
img_width: 1280
img_height: 768
#keep_ratio: True
keep_ratio: False
#flip_ratio: 0.5
flip_ratio: 0
expand_ratio: 0
#expand_ratio: 1.0

max_instance_count: 128
mask_shape: [32, 128]
#mask_shape: [28,28]
# anchor
feature_shapes:
#- [120, 160]
#- [60, 80]
#- [30, 40]
#- [15, 20]
#- [8, 10]
- [192, 320]
- [96, 160]
- [48, 80]
- [24, 40]
- [12, 20]
anchor_scales: [8]
anchor_ratios: [0.5, 1.0, 2.0]
anchor_strides: [4, 8, 16, 32, 64]
num_anchors: 3

# resnet
resnet_block: [3, 4, 6, 3]
resnet_in_channels: [64, 256, 512, 1024]
resnet_out_channels: [256, 512, 1024, 2048]

# fpn
fpn_in_channels: [256, 512, 1024, 2048]
fpn_out_channels: 256
fpn_num_outs: 5

# rpn
rpn_in_channels: 256
rpn_feat_channels: 256
rpn_loss_cls_weight: 1.0
rpn_loss_reg_weight: 1.0
rpn_cls_out_channels: 1
rpn_target_means: [0., 0., 0., 0.]
rpn_target_stds: [1.0, 1.0, 1.0, 1.0]

# bbox_assign_sampler
neg_iou_thr: 0.7
pos_iou_thr: 0.3
min_pos_iou: 0.3
num_bboxes: 245520
#num_bboxes: 76740
num_gts: 128
num_expected_neg: 256
num_expected_pos: 128

# proposal
activate_num_classes: 2
use_sigmoid_cls: True

# roi_align
roi_layer: {type: 'RoIAlign', out_size: [7, 7], mask_out_size: [16, 64], sample_num: 2}
roi_align_out_channels: 256
roi_align_featmap_strides: [4, 8, 16, 32]
roi_align_finest_scale: 56
roi_sample_num: 640

# bbox_assign_sampler_stage2
neg_iou_thr_stage2: 0.5
pos_iou_thr_stage2: 0.5
min_pos_iou_stage2: 0.5
num_bboxes_stage2: 2000
num_expected_pos_stage2: 128
num_expected_neg_stage2: 512
num_expected_total_stage2: 512

# rcnn
rcnn_num_layers: 2
rcnn_in_channels: 256
rcnn_fc_out_channels: 1024
rcnn_mask_out_channels: 256
rcnn_loss_cls_weight: 1
rcnn_loss_reg_weight: 1
rcnn_loss_mask_fb_weight: 1
rcnn_target_means: [0., 0., 0., 0.]
rcnn_target_stds: [0.1, 0.1, 0.2, 0.2]

# train proposal
rpn_proposal_nms_across_levels: False
rpn_proposal_nms_pre: 2000
rpn_proposal_nms_post: 2000
rpn_proposal_max_num: 2000
rpn_proposal_nms_thr: 0.7
rpn_proposal_min_bbox_size: 0

# test proposal
rpn_nms_across_levels: False
rpn_nms_pre: 1000
rpn_nms_post: 1000
rpn_max_num: 1000
rpn_nms_thr: 0.2
rpn_min_bbox_min_size: 0
test_score_thr: 0.02
test_iou_thr: 0.5
test_max_per_img: 100
test_batch_size: 1

rpn_head_use_sigmoid: True
rpn_head_weight: 1.0
mask_thr_binary: 0.5

# LR
base_lr: 0.01
warmup_iters: 500
warmup_factor: 0.1
steps: [100000, 160000]
sgd_momentum: 0.9
gamma: 0.1

base_step: 5626
total_epoch: 100
warmup_step: 100
warmup_ratio: 0.1

##############
# base_lr: 0.02
# base_step: 5626
# total_epoch: 300
# warmup_step: 500
# warmup_ratio: 0.333333
# sgd_momentum: 0.9
#############


# train
batch_size: 1
loss_scale: 1
momentum: 0.91
weight_decay: 0.0001 # 1e-4
pretrain_epoch_size: 0
epoch_size: 100
save_checkpoint: True
save_checkpoint_epochs: 10
keep_checkpoint_max: 200
save_checkpoint_path: "./"

mindrecord_train_dir: "MindRecord_MixData_noicdar"
mindrecord_test_dir: "MindRecord_MixData_noicdar"
train_data_type: [   
                  #["Images/Train","total_text_labels/train_gts"],
                  #["train_images","train_gts"],
                  ["train_images","train_gts"],
                  #["train_images","train_gts"],
                  #["SynthText","SynthText_GT_E2E"]
               ]
val_data_type: [   
                  ["test_images","test_gts"],
               ]
instance_set: "train_list.txt"
icdar_classes: [background,'0','1','2','3','4','5','6','7','8','9',
               'a','b','c','d','e','f','g','h','i','j',
               'k','l','m','n','o','p','q','r','s','t',
               'u','v','w','x','y','z']
det_classes: ['background','foreground']
num_classes: 2
char_classes: 37
#only_create_dataset: False
run_distribute: True
do_train: True
do_eval: False
dataset: "icdar"
device_id: 7
test_device_id: 3
device_num: 8
rank_id: 0

# maskrcnn export
file_name: "maskrcnn"
file_format: "MINDIR"
#ckpt_file: '/cache/data/cocodataset/ckpt_maskrcnn/mask_rcnn-12_7393.ckpt'
#ckpt_file_local: './maskrcnn/scripts/train_parallel0/ckpt_0/mask_rcnn-12_7393.ckpt'
export_input_type: float16

# other
learning_rate: 0.002
buffer_size: 1000
save_checkpoint_steps: 1562
sink_size: -1
dataset_sink_mode: True
lr: 0.01

# Model Description
model_name: maskrcnn


---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'
ann_file: 'Ann file, default is val.json.'

device_target: 'Target device type'
enable_profiling: 'Whether enable profiling while training, default: False'
only_create_dataset: 'If set it true, only create Mindrecord, default is false.'
run_distribute: 'Run distribute, default is false.'
do_train: 'Do train or not, default is true.'
do_eval: 'Do eval or not, default is false.'
dataset: 'Dataset, default is coco.'
pre_trained: 'Pretrain file path.'
device_id: 'Device id, default is 0.'
device_num: 'Use device nums, default is 1.'
rank_id: 'Rank id, default is 0.'
file_format: 'file format'
img_path: "image file path."
result_path: "result file path."

---
device_target: ['Ascend', 'GPU', 'CPU']
file_format: ["AIR", "ONNX", "MINDIR"]
export_input_type: ["float16", "float32"]