{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:40:25Z","timestamp":1772822425892,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":36,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819666010","type":"print"},{"value":"9789819665990","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-6599-0_2","type":"book-chapter","created":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T22:15:29Z","timestamp":1751408129000},"page":"17-31","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Mixed Text Recognition with\u00a0Efficient Parameter Fine-Tuning and\u00a0Transformer"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0216-4385","authenticated-orcid":false,"given":"Da","family":"Chang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3734-9744","authenticated-orcid":false,"given":"Yu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,2]]},"reference":[{"key":"2_CR1","unstructured":"7th International Conference on Document Analysis and Recognition (ICDAR 2003), 2-Volume Set, 3\u20136 August 2003, Edinburgh, Scotland,UK. IEEE Computer Society (2003). https:\/\/ieeexplore.ieee.org\/xpl\/conhome\/8701\/proceeding"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Baek, J., et al.: What is wrong with scene text recognition model comparisons? dataset and model analysis. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4714\u20134722 (2019). https:\/\/api.semanticscholar.org\/CorpusID:102481180","DOI":"10.1109\/ICCV.2019.00481"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Baviskar, D., Ahirrao, S., Potdar, V., Kotecha, K.V.: Efficient automated processing of the unstructured documents using artificial intelligence: a systematic literature review and future directions. IEEE Access 9, 72894\u201372936 (2021). https:\/\/api.semanticscholar.org\/CorpusID:234831463","DOI":"10.1109\/ACCESS.2021.3072900"},{"key":"2_CR4","unstructured":"Chen, L., et al.: Decision transformer: reinforcement learning via sequence modeling. In: Advances in Neural Information Processing Systems, vol. 34, pp. 15084\u201315097 (2021)"},{"key":"2_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. ArXiv arXiv:2010.11929 (2020). https:\/\/api.semanticscholar.org\/CorpusID:225039882"},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Fang, S., Xie, H., Wang, Y., Mao, Z., Zhang, Y.: Read like humans: autonomous, bidirectional and iterative language modeling for scene text recognition. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7094\u20137103 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232185272","DOI":"10.1109\/CVPR46437.2021.00702"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Fujitake, M.: Dtrocr: decoder-only transformer for optical character recognition. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 8025\u20138035 (2024)","DOI":"10.1109\/WACV57701.2024.00784"},{"key":"2_CR8","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1016\/j.patcog.2017.10.013","volume":"77","author":"J Gu","year":"2018","unstructured":"Gu, J., et al.: Recent advances in convolutional neural networks. Pattern Recogn. 77, 354\u2013377 (2018)","journal-title":"Pattern Recogn."},{"key":"2_CR9","unstructured":"Hu, J.E., et al.: Lora: low-rank adaptation of large language models. ArXiv arXiv:2106.09685 (2021). https:\/\/api.semanticscholar.org\/CorpusID:235458009"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: ICDAR 2019 competition on scanned receipt OCR and information extraction. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1516\u20131520 (2019). https:\/\/api.semanticscholar.org\/CorpusID:211026630","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: ICDAR 2015 competition on robust reading. In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR), pp. 1156\u20131160 (2015). https:\/\/api.semanticscholar.org\/CorpusID:13322740","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Karatzas, D., et al.: ICDAR 2013 robust reading competition. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1484\u20131493 (2013). https:\/\/api.semanticscholar.org\/CorpusID:206777226","DOI":"10.1109\/ICDAR.2013.221"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Kim, G., et al.: OCR-free document understanding transformer. In: European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Li, M., et al.: TROCR: transformer-based optical character recognition with pre-trained models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 13094\u201313102 (2023)","DOI":"10.1609\/aaai.v37i11.26538"},{"key":"2_CR15","doi-asserted-by":"publisher","unstructured":"Li, Y., Hu, Y., Chen, J., Wang, B., Liu, W.: ECG classification with dual models: XGBoost voting and deep learning with attention. In: 2023 16th International Conference on Advanced Computer Theory and Engineering (ICACTE), pp. 202\u2013206 (2023). https:\/\/doi.org\/10.1109\/ICACTE59887.2023.10335476","DOI":"10.1109\/ICACTE59887.2023.10335476"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Lin, H., Yang, P., Zhang, F.: Review of scene text detection and recognition. Arch. Comput. Methods Eng. 27, 433\u2013 454 (2019). https:\/\/api.semanticscholar.org\/CorpusID:128295528","DOI":"10.1007\/s11831-019-09315-1"},{"key":"2_CR17","unstructured":"Yang Liu, S., et al.: Dora: weight-decomposed low-rank adaptation. ArXiv arXiv:2402.09353 (2024). https:\/\/api.semanticscholar.org\/CorpusID:267657886"},{"key":"2_CR18","unstructured":"Liu, Y.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"issue":"1","key":"2_CR19","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/s11263-020-01369-0","volume":"129","author":"S Long","year":"2021","unstructured":"Long, S., He, X., Yao, C.: Scene text detection and recognition: the deep learning era. Int. J. Comput. Vision 129(1), 161\u2013184 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Memon, J., Sami, M., Khan, R.A.: Handwritten optical character recognition (OCR): a comprehensive systematic literature review (SLR). IEEE Access 8, 142642\u2013142668 (2020). https:\/\/api.semanticscholar.org\/CorpusID:209531740","DOI":"10.1109\/ACCESS.2020.3012542"},{"key":"2_CR21","unstructured":"Mishra, A., Karteek, A., Jawahar, C.V.: Scene text recognition using higher order language priors. In: British Machine Vision Conference (2009). https:\/\/api.semanticscholar.org\/CorpusID:9695967"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Phan, T.Q., Shivakumara, P., Tian, S., Tan, C.L.: Recognizing text with perspective distortion in natural scenes. In: 2013 IEEE International Conference on Computer Vision, pp. 569\u2013576 (2013). https:\/\/api.semanticscholar.org\/CorpusID:5619635","DOI":"10.1109\/ICCV.2013.76"},{"key":"2_CR23","unstructured":"Pu, G., Jain, A., Yin, J., Kaplan, R.: Empirical analysis of the strengths and weaknesses of PEFT techniques for LLMs. arXiv preprint arXiv:2304.14999 (2023)"},{"key":"2_CR24","unstructured":"Puigcerver, J., Martin-Albo, D., Villegas, M.: Laia: a deep learning toolkit for HTR (2016). https:\/\/github.com\/jpuigcerver\/Laia. gitHub repository"},{"key":"2_CR25","unstructured":"Qin, L., et al.: Multilingual large language model: a survey of resources, taxonomy and frontiers (2024). https:\/\/api.semanticscholar.org\/CorpusID:269005862"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Rabby, A.S.A., Islam, M.M., Hasan, N., Nahar, J., Rahman, F.: A deep learning solution to detect text-types using a convolutional neural network (2021). https:\/\/api.semanticscholar.org\/CorpusID:236644202","DOI":"10.1007\/978-981-33-4087-9_58"},{"key":"2_CR27","unstructured":"Raisi, Z., Naiel, M.A., Fieguth, P.W., Wardell, S., Zelek, J.S.: Text detection and recognition in the wild: a review. ArXiv arXiv:2006.04305 (2020). https:\/\/api.semanticscholar.org\/CorpusID:219531885"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Risnumawan, A., Shivakumara, P., Chan, C.S., Tan, C.L.: A robust arbitrary text detection system for natural scene images. Expert Syst. Appl. 41, 8027\u20138048 (2014). https:\/\/api.semanticscholar.org\/CorpusID:15559857","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Sheng, F., Chen, Z., Xu, B.: NRTR: a no-recurrence sequence-to-sequence model for scene text recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 781\u2013786 (2018). https:\/\/api.semanticscholar.org\/CorpusID:46931567","DOI":"10.1109\/ICDAR.2019.00130"},{"issue":"11","key":"2_CR30","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans. Pattern Anal. Mach. Intell. 39(11), 2298\u20132304 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR31","unstructured":"Stickland, A.C., Murray, I.: BERT and PALs: projected attention layers for efficient adaptation in multi-task learning. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a097, pp. 5986\u20135995. PMLR (2019). https:\/\/proceedings.mlr.press\/v97\/stickland19a.html"},{"key":"2_CR32","unstructured":"Wang, K., Babenko, B., Belongie, S.J.: End-to-end scene text recognition. In: 2011 International Conference on Computer Vision, pp. 1457\u20131464 (2011). https:\/\/api.semanticscholar.org\/CorpusID:14136313"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xie, H., Fang, S., Wang, J., Zhu, S., Zhang, Y.: From two to one: a new scene text recognizer with visual language modeling network. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14174\u201314183 (2021). https:\/\/api.semanticscholar.org\/CorpusID:237267316","DOI":"10.1109\/ICCV48922.2021.01393"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Yang, L., Wang, P., Li, H., Li, Z., Zhang, Y.: A holistic representation guided attention network for scene text recognition. Neurocomputing 414, 67\u201375 (2019). https:\/\/api.semanticscholar.org\/CorpusID:220363911","DOI":"10.1016\/j.neucom.2020.07.010"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Yu, D., Li, X., Zhang, C., Han, J., Liu, J., Ding, E.: Towards accurate scene text recognition with semantic reasoning networks. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12110\u201312119 (2020). https:\/\/api.semanticscholar.org\/CorpusID:214693009","DOI":"10.1109\/CVPR42600.2020.01213"},{"issue":"7","key":"2_CR36","doi-asserted-by":"publisher","first-page":"1235","DOI":"10.1162\/neco_a_01199","volume":"31","author":"Y Yu","year":"2019","unstructured":"Yu, Y., Si, X., Hu, C., Zhang, J.: A review of recurrent neural networks: LSTM cells and network architectures. Neural Comput. 31(7), 1235\u20131270 (2019)","journal-title":"Neural Comput."}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6599-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T22:15:37Z","timestamp":1751408137000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6599-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819666010","9789819665990"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6599-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}