diff --git a/configs/rec/multi_language/rec_syriac_lite_train.yml b/configs/rec/multi_language/rec_syriac_lite_train.yml new file mode 100644 index 0000000000..8d7d22fc30 --- /dev/null +++ b/configs/rec/multi_language/rec_syriac_lite_train.yml @@ -0,0 +1,110 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_syriac_lite + save_epoch_step: 3 + eval_batch_step: + - 0 + - 2000 + cal_metric_during_train: true + pretrained_model: null + checkpoints: null + save_inference_dir: null + use_visualdl: false + infer_img: null + character_dict_path: ppocr/utils/dict/syriac_dict.txt + max_text_length: 25 + infer_mode: false + use_space_char: true +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: L2 + factor: 1.0e-05 +Architecture: + model_type: rec + algorithm: CRNN + Transform: null + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: + - 1 + - 2 + - 2 + - 2 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 1.0e-05 +Loss: + name: CTCLoss +PostProcess: + name: CTCLabelDecode +Metric: + name: RecMetric + main_indicator: acc +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ + label_file_list: + - train_data/syriac_train.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecAug: null + - CTCLabelEncode: null + - RecResizeImg: + image_shape: + - 3 + - 32 + - 320 + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: true + batch_size_per_card: 256 + drop_last: true + num_workers: 8 +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ + label_file_list: + - train_data/syriac_val.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - CTCLabelEncode: null + - RecResizeImg: + image_shape: + - 3 + - 32 + - 320 + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: false + drop_last: false + batch_size_per_card: 256 + num_workers: 8 diff --git a/ppocr/utils/dict/syriac_dict.txt b/ppocr/utils/dict/syriac_dict.txt new file mode 100644 index 0000000000..4417f4ac26 --- /dev/null +++ b/ppocr/utils/dict/syriac_dict.txt @@ -0,0 +1,157 @@ +! +# +$ +% +& +' +( ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +_ +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +É +é +܀ +܁ +܂ +܃ +܄ +܅ +܆ +܇ +܈ +܉ +܊ +܋ +܌ +܍ +܏ +ܐ +ܑ +ܒ +ܓ +ܔ +ܕ +ܖ +ܗ +ܘ +ܙ +ܚ +ܛ +ܜ +ܝ +ܞ +ܟ +ܠ +ܡ +ܢ +ܣ +ܤ +ܥ +ܦ +ܧ +ܨ +ܩ +ܪ +ܫ +ܬ +ܭ +ܮ +ܯ +ܰ +ܱ +ܲ +ܳ +ܴ +ܵ +ܶ +ܷ +ܸ +ܹ +ܺ +ܻ +ܼ +ܽ +ܾ +ܿ +݀ +݁ +݂ +݃ +݄ +݅ +݆ +݇ +݈ +݉ +݊ +ݍ +ݎ +ݏ