{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T09:40:04Z","timestamp":1775554804520,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,1,21]],"date-time":"2024-01-21T00:00:00Z","timestamp":1705795200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,21]],"date-time":"2024-01-21T00:00:00Z","timestamp":1705795200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s11760-023-02970-9","type":"journal-article","created":{"date-parts":[[2024,1,21]],"date-time":"2024-01-21T02:01:46Z","timestamp":1705802506000},"page":"3053-3062","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":26,"title":["A transformer-based approach for Arabic offline handwritten text recognition"],"prefix":"10.1007","volume":"18","author":[{"given":"Saleh","family":"Momeni","sequence":"first","affiliation":[]},{"given":"Bagher","family":"BabaAli","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,21]]},"reference":[{"issue":"3","key":"2970_CR1","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1007\/s10032-019-00339-8","volume":"22","author":"I Ahmad","year":"2019","unstructured":"Ahmad, I., Fink, G.A.: Handwritten arabic text recognition using multi-stage sub-core-shape hmms. Int. J. Doc. Anal. Recognit. (IJDAR) 22(3), 329\u2013349 (2019)","journal-title":"Int. J. Doc. Anal. Recognit. (IJDAR)"},{"key":"2970_CR2","doi-asserted-by":"crossref","unstructured":"Ahmad, R., Naz, S., Afzal, M.Z., Rashid, S.F.,Liwicki, M., Dengel, A.: Khatt: A deep learning benchmark on arabic script. In 2017 14th IAPR international conference on document analysis and recognition (ICDAR), Volume\u00a07, pp. 10\u201314. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.358"},{"issue":"7","key":"2970_CR3","doi-asserted-by":"publisher","first-page":"2249","DOI":"10.1007\/s00521-020-05070-8","volume":"33","author":"N Altwaijry","year":"2021","unstructured":"Altwaijry, N., Al-Turaiki, I.: Arabic handwriting recognition system using convolutional neural network. Neural Comput. Appl. 33(7), 2249\u20132261 (2021)","journal-title":"Neural Comput. Appl."},{"key":"2970_CR4","doi-asserted-by":"crossref","unstructured":"Atienza, R.: Vision transformer for fast and efficient scene text recognition. In International conference on document analysis and recognition, pp. 319\u2013334. Springer (2021)","DOI":"10.1007\/978-3-030-86549-8_21"},{"key":"2970_CR5","doi-asserted-by":"crossref","unstructured":"Baek, J., Kim, G., Lee, J., Park, S., Han, D., Yun, S., Oh, S.J., Lee, H.: What is wrong with scene text recognition model comparisons? dataset and model analysis. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 4715\u20134723 (2019)","DOI":"10.1109\/ICCV.2019.00481"},{"key":"2970_CR6","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"2970_CR7","unstructured":"Bao, H., Dong, L., Wei, F.: Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254 (2021)"},{"key":"2970_CR8","unstructured":"Bleeker, M., de\u00a0Rijke, M.: Bidirectional scene text recognition with a single decoder. arXiv preprint arXiv:1912.03656 (2019)"},{"key":"2970_CR9","unstructured":"Bluche, T.: Joint line segmentation and transcription for end-to-end handwritten paragraph recognition. Advances in neural information processing systems\u00a029 (2016)"},{"key":"2970_CR10","doi-asserted-by":"crossref","unstructured":"Bluche, T., Messina, R.: Gated convolutional recurrent neural networks for multilingual handwriting recognition. In 2017 14th IAPR international conference on document analysis and recognition (ICDAR), Volume\u00a01, pp. 646\u2013651. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.111"},{"key":"2970_CR11","unstructured":"Chowdhury, A., Vig, L.: An efficient end-to-end neural model for handwritten text recognition. arXiv preprint arXiv:1807.07965 (2018)"},{"key":"2970_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"2970_CR13","unstructured":"Diaz, D.H., Qin, S., Ingle, R., Fujii, Y., Bissacco, A.: Rethinking text line recognition models. arXiv preprint arXiv:2104.07787 (2021)"},{"key":"2970_CR14","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S. et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"2970_CR15","unstructured":"El-Khair, I.A.: 1.5 billion words arabic corpus. arXiv preprint arXiv:1611.04033 (2016)"},{"issue":"1","key":"2970_CR16","first-page":"11","volume":"5","author":"A El-Sawy","year":"2017","unstructured":"El-Sawy, A., Loey, M., El-Bakry, H.: Arabic handwritten characters recognition using convolutional neural network. WSEAS Trans. Comput. Res. 5(1), 11\u201319 (2017)","journal-title":"WSEAS Trans. Comput. Res."},{"key":"2970_CR17","unstructured":"Gao, Y., Chen, Y., Wang, J., Lu, H.: Reading scene text with attention convolutional sequence modeling. arXiv preprint arXiv:1709.04303 (2017)"},{"key":"2970_CR18","doi-asserted-by":"crossref","unstructured":"Graves, A.: Sequence transduction with recurrent neural networks. arXiv preprint arXiv:1211.3711 (2012)","DOI":"10.1007\/978-3-642-24797-2"},{"key":"2970_CR19","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In Proceedings of the 23rd international conference on Machine learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"2970_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108766","volume":"129","author":"L Kang","year":"2022","unstructured":"Kang, L., Riba, P., Rusi\u00f1ol, M., Forn\u00e9s, A., Villegas, M.: Pay attention to what you read: non-recurrent handwritten text-line recognition. Pattern Recogn. 129, 108766 (2022)","journal-title":"Pattern Recogn."},{"key":"2970_CR21","doi-asserted-by":"crossref","unstructured":"Lee, J., Park, S., Baek, J., Oh, S.J., Kim, S., Lee, H.: On recognizing texts of arbitrary shapes with 2d self-attention. In Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition workshops, pp. 546\u2013547 (2020)","DOI":"10.1109\/CVPRW50498.2020.00281"},{"key":"2970_CR22","unstructured":"Li, M., Lv, T., Cui, L., Lu, Y., Florencio, D., Zhang, C., Li, Z., Wei, F.: Trocr: Transformer-based optical character recognition with pre-trained models. arXiv preprint arXiv:2109.10282 (2021)"},{"key":"2970_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"3","key":"2970_CR24","doi-asserted-by":"publisher","first-page":"1096","DOI":"10.1016\/j.patcog.2013.08.009","volume":"47","author":"SA Mahmoud","year":"2014","unstructured":"Mahmoud, S.A., Ahmad, I., Al-Khatib, W.G., Alshayeb, M., Parvez, M.T., M\u00e4rgner, V., Fink, G.A.: Khatt: an open arabic offline handwritten text database. Pattern Recogn. 47(3), 1096\u20131112 (2014)","journal-title":"Pattern Recogn."},{"key":"2970_CR25","doi-asserted-by":"crossref","unstructured":"Mahmoud, S.A., Ahmad, I., Alshayeb, M., Al-Khatib, W.G., Parvez, M.T., Fink, G.A., M\u00e4rgner, V., El\u00a0Abed, H.: Khatt: Arabic offline handwritten text database. In 2012 International conference on frontiers in handwriting recognition, pp. 449\u2013454. IEEE (2012)","DOI":"10.1109\/ICFHR.2012.224"},{"key":"2970_CR26","doi-asserted-by":"crossref","unstructured":"Michael, J., Labahn, R., Gr\u00fcning, T., Z\u00f6llner, J.: Evaluating sequence-to-sequence models for handwritten text recognition. In 2019 international conference on document analysis and recognition (ICDAR), pp. 1286\u20131293. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00208"},{"key":"2970_CR27","doi-asserted-by":"crossref","unstructured":"Mostafa, A., Mohamed, O., Ashraf, A., Elbehery, A., Jamal, S., Khoriba, G., Ghoneim, A.S.: Ocformer: A transformer-based model for arabic handwritten text recognition. In 2021 International mobile, intelligent, and ubiquitous computing conference (MIUCC), pp. 182\u2013186. IEEE (2021)","DOI":"10.1109\/MIUCC52538.2021.9447608"},{"key":"2970_CR28","unstructured":"Paszke, A., Gross, S., Chintala, S., Chanan, G., Yang, E., DeVito, Z., Lin, Z., Desmaison, A., Antiga, L., Lerer, A.: Automatic differentiation in pytorch (2017)"},{"key":"2970_CR29","unstructured":"Pechwitz, M., Maddouri, S.S., M\u00e4rgner, V., Ellouze, N., Amiri, H. et\u00a0al.: Ifn\/enit-database of handwritten arabic words. In Proc. of CIFED, Volume\u00a02, pp. 127\u2013136. Citeseer (2002)"},{"key":"2970_CR30","doi-asserted-by":"crossref","unstructured":"Pham, V., Bluche, T., Kermorvant, C., Louradour, J.: Dropout improves recurrent neural networks for handwriting recognition. In 2014 14th international conference on frontiers in handwriting recognition, pp. 285\u2013290. IEEE (2014)","DOI":"10.1109\/ICFHR.2014.55"},{"key":"2970_CR31","doi-asserted-by":"crossref","unstructured":"Safaya, A., Abdullatif, M., Yuret, D.: Kuisail at semeval-2020 task 12: Bert-cnn for offensive speech identification in social media. In Proceedings of the fourteenth workshop on semantic evaluation, pp. 2054\u20132059 (2020)","DOI":"10.18653\/v1\/2020.semeval-1.271"},{"key":"2970_CR32","doi-asserted-by":"crossref","unstructured":"Sajid, U., Chow, M., Zhang, J., Kim, T., Wang, G.: Parallel scale-wise attention network for effective scene text recognition. In 2021 international joint conference on neural networks (IJCNN), pp. 1\u20138. IEEE (2021)","DOI":"10.1109\/IJCNN52387.2021.9534223"},{"key":"2970_CR33","doi-asserted-by":"crossref","unstructured":"Sheng, F., Chen, Z., Xu, B.: Nrtr: A no-recurrence sequence-to-sequence model for scene text recognition. In 2019 International conference on document analysis and recognition (ICDAR), pp. 781\u2013786. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00130"},{"issue":"11","key":"2970_CR34","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans. Pattern Anal. Mach. Intell. 39(11), 2298\u20132304 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2970_CR35","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In International conference on machine learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"2970_CR36","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems\u00a030 (2017)"},{"key":"2970_CR37","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Doetsch, P., Ney, H.: Handwriting recognition with large multidimensional long short-term memory recurrent neural networks. In 2016 15th international conference on frontiers in handwriting recognition (ICFHR), pp. 228\u2013233. IEEE (2016)","DOI":"10.1109\/ICFHR.2016.0052"},{"key":"2970_CR38","doi-asserted-by":"crossref","unstructured":"Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M. et\u00a0al.: Transformers: State-of-the-art natural language processing. In Proceedings of the 2020 conference on empirical methods in natural language processing: system demonstrations, pp. 38\u201345 (2020)","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"2970_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Lu, H., Sak, H., Tripathi, A., McDermott, E., Koo, S., Kumar, S.: Transformer transducer: A streamable speech recognition model with transformer encoders and rnn-t loss. In ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp. 7829\u20137833. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053896"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02970-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-023-02970-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02970-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,25]],"date-time":"2024-03-25T16:04:45Z","timestamp":1711382685000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-023-02970-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,21]]},"references-count":39,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["2970"],"URL":"https:\/\/doi.org\/10.1007\/s11760-023-02970-9","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,21]]},"assertion":[{"value":"9 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}