{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T16:28:23Z","timestamp":1758126503504,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705359"},{"type":"electronic","value":"9783031705366"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70536-6_7","type":"book-chapter","created":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T10:03:02Z","timestamp":1725271382000},"page":"104-117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Script Identification in\u00a0the\u00a0Wild with\u00a0FFT-Multi-grained Mix Attention Transformer"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9822-3087","authenticated-orcid":false,"given":"Zhi","family":"Pan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6470-201X","authenticated-orcid":false,"given":"Yaowei","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7566-6494","authenticated-orcid":false,"given":"Kurban","family":"Ubul","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5464-0594","authenticated-orcid":false,"given":"Alimjan","family":"Aysa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,3]]},"reference":[{"key":"7_CR1","unstructured":"Ali, A., et al.: XCiT: cross-covariance image transformers. In: Advances in Neural Information Processing Systems, vol. 34, pp. 20014\u201320027 (2021)"},{"key":"7_CR2","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1016\/j.patcog.2018.07.034","volume":"85","author":"AK Bhunia","year":"2019","unstructured":"Bhunia, A.K., Konwer, A., Bhunia, A.K., Bhowmick, A., Roy, P.P., Pal, U.: Script identification in natural scene image and video frames using an attention based convolutional-LSTM network. Pattern Recogn. 85, 172\u2013184 (2019)","journal-title":"Pattern Recogn."},{"key":"7_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/978-3-030-21074-8_11","volume-title":"Computer Vision \u2013 ACCV 2018 Workshops","author":"M Bu\u0161ta","year":"2019","unstructured":"Bu\u0161ta, M., Patel, Y., Matas, J.: E2E-MLT - an unconstrained end-to-end method for multi-language scene text. In: Carneiro, G., You, S. (eds.) ACCV 2018. LNCS, vol. 11367, pp. 127\u2013143. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-21074-8_11"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Chen, J., Li, B., Xue, X.: Scene text telescope: text-focused scene image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12026\u201312035 (2021)","DOI":"10.1109\/CVPR46437.2021.01185"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Cheng, C., Huang, Q., Bai, X., Feng, B., Liu, W.: Patch aggregator for scene text script identification. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1077\u20131083. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00175"},{"key":"7_CR6","series-title":"CCIS","doi-asserted-by":"publisher","first-page":"458","DOI":"10.1007\/978-3-031-11346-8_40","volume-title":"CVIP 2021","author":"K Dutta","year":"2021","unstructured":"Dutta, K., Dastidar, S.G., Das, N., Kundu, M., Nasipuri, M.: Script identification in natural scene text images by learning local and global features on inception net. In: Raman, B., Murala, S., Chowdhury, A., Dhall, A., Goyal, P. (eds.) CVIP 2021. CCIS, vol. 1567, pp. 458\u2013467. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-031-11346-8_40"},{"issue":"19","key":"7_CR7","doi-asserted-by":"publisher","first-page":"29095","DOI":"10.1007\/s11042-021-11103-8","volume":"80","author":"M Ghosh","year":"2021","unstructured":"Ghosh, M., Mukherjee, H., Obaidullah, S.M., Santosh, K., Das, N., Roy, K.: Lwsinet: a deep learning-based approach towards video script identification. Multimedia Tools Appl. 80(19), 29095\u201329128 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"7_CR8","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.patcog.2017.01.032","volume":"67","author":"L Gomez","year":"2017","unstructured":"Gomez, L., Nicolaou, A., Karatzas, D.: Improving patch-based scene text script identification with ensembles of conjoined networks. Pattern Recogn. 67, 85\u201396 (2017)","journal-title":"Pattern Recogn."},{"issue":"2","key":"7_CR9","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1109\/34.574802","volume":"19","author":"J Hochberg","year":"1997","unstructured":"Hochberg, J., Kelly, P., Thomas, T., Kerns, L.: Automatic script identification from document images using cluster-based templates. IEEE Trans. Pattern Anal. Mach. Intell. 19(2), 176\u2013181 (1997)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"7_CR10","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s42979-022-01414-4","volume":"4","author":"K Jobin","year":"2022","unstructured":"Jobin, K., Mondal, A., Jawahar, C.: Document image analysis using deep multi-modular features. SN Comput. Sci. 4(1), 5 (2022)","journal-title":"SN Comput. Sci."},{"key":"7_CR11","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1016\/j.neunet.2019.04.014","volume":"116","author":"F Karim","year":"2019","unstructured":"Karim, F., Majumdar, S., Darabi, H., Harford, S.: Multivariate LSTM-FCNs for time series classification. Neural Netw. 116, 237\u2013245 (2019)","journal-title":"Neural Netw."},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"7_CR13","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"issue":"1","key":"7_CR14","doi-asserted-by":"publisher","first-page":"551","DOI":"10.3233\/JIFS-200260","volume":"40","author":"L Lu","year":"2021","unstructured":"Lu, L., Wu, D., Tang, Z., Yi, Y., Huang, F.: Mining discriminative patches for script identification in natural scene images. J. Intell. Fuzzy Syst. 40(1), 551\u2013563 (2021)","journal-title":"J. Intell. Fuzzy Syst."},{"key":"7_CR15","doi-asserted-by":"publisher","first-page":"52669","DOI":"10.1109\/ACCESS.2019.2911964","volume":"7","author":"L Lu","year":"2019","unstructured":"Lu, L., Yi, Y., Huang, F., Wang, K., Wang, Q.: Integrating local CNN and global CNN for script identification in natural scene images. IEEE Access 7, 52669\u201352679 (2019)","journal-title":"IEEE Access"},{"key":"7_CR16","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1016\/j.neucom.2020.09.015","volume":"421","author":"M Ma","year":"2021","unstructured":"Ma, M., Wang, Q.F., Huang, S., Huang, S., Goulermas, Y., Huang, K.: Residual attention-based multi-scale script identification in scene text images. Neurocomputing 421, 222\u2013233 (2021)","journal-title":"Neurocomputing"},{"issue":"4","key":"7_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3506699","volume":"21","author":"S Mahajan","year":"2022","unstructured":"Mahajan, S., Rani, R.: Word level script identification using convolutional neural network enhancement for scenic images. Trans. Asian Low-Resour. Lang. Inf. Process. 21(4), 1\u201329 (2022)","journal-title":"Trans. Asian Low-Resour. Lang. Inf. Process."},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Mallappa, S., Dhandra, B., Mukarambi, G.: Script identification from camera captured Indian document images with CNN model. ICTACT J. Soft Comput. 14(2) (2023)","DOI":"10.4018\/978-1-6684-3690-5.ch040"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Mei, J., Dai, L., Shi, B., Bai, X.: Scene text script identification with convolutional recurrent neural networks. In: 2016 23rd International Conference on Pattern Recognition (ICPR), pp. 4053\u20134058. IEEE (2016)","DOI":"10.1109\/ICPR.2016.7900268"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR2017 robust reading challenge on multi-lingual scene text detection and script identification-RRC-MLT. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a01, pp. 1454\u20131459. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.237"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Peng, F., Ma, H., Liu, L., Lu, Y., Suen, C.Y.: Adaptive feature fusion for scene text script identification. Multimedia Tools Appl. 1\u201323 (2024)","DOI":"10.1007\/s11042-023-17986-z"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Sharma, N., Mandal, R., Sharma, R., Pal, U., Blumenstein, M.: ICDAR2015 competition on video script identification (CVSI 2015). In: 2015 13th International Conference on Document Analysis and Recognition (ICDAR), pp. 1196\u20131200. IEEE (2015)","DOI":"10.1109\/ICDAR.2015.7333950"},{"key":"7_CR23","unstructured":"Shen, Z., Zhang, M., Zhao, H., Yi, S., Li, H.: Efficient attention: attention with linear complexities. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3531\u20133539 (2021)"},{"issue":"11","key":"7_CR24","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans. Pattern Anal. Mach. Intell. 39(11), 2298\u20132304 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR25","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1016\/j.patcog.2015.11.005","volume":"52","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., Yao, C.: Script identification in the wild via discriminative convolutional neural network. Pattern Recogn. 52, 448\u2013458 (2016)","journal-title":"Pattern Recogn."},{"issue":"7","key":"7_CR26","doi-asserted-by":"publisher","first-page":"751","DOI":"10.1109\/34.689305","volume":"20","author":"T Tan","year":"1998","unstructured":"Tan, T.: Rotation invariant texture features and their use in automatic script identification. IEEE Trans. Pattern Anal. Mach. Intell. 20(7), 751\u2013756 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1007\/978-3-030-58607-2_38","volume-title":"Computer Vision \u2013 ECCV 2020","author":"W Wang","year":"2020","unstructured":"Wang, W., et al.: Scene text image super-resolution in the wild. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 650\u2013666. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_38"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Xu, W., Xu, Y., Chang, T., Tu, Z.: Co-scale conv-attentional image transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9981\u20139990 (2021)","DOI":"10.1109\/ICCV48922.2021.00983"},{"key":"7_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.104916","volume":"113","author":"K Yang","year":"2022","unstructured":"Yang, K., Yi, J., Chen, A., Liu, J., Chen, W., Jin, Z.: Convpatchtrans: a script identification network with global and local semantics deeply integrated. Eng. Appl. Artif. Intell. 113, 104916 (2022)","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"13","key":"7_CR30","doi-asserted-by":"publisher","first-page":"2837","DOI":"10.3390\/electronics12132837","volume":"12","author":"Z Zhang","year":"2023","unstructured":"Zhang, Z., Eli, E., Mamat, H., Aysa, A., Ubul, K.: EA-ConvNeXt: an approach to script identification in natural scenes based on edge flow and coordinate attention. Electronics 12(13), 2837 (2023)","journal-title":"Electronics"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Zhao, C., et al.: Scene text image super-resolution via parallelly contextual attention network. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2908\u20132917 (2021)","DOI":"10.1145\/3474085.3475469"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70536-6_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T17:10:37Z","timestamp":1732727437000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70536-6_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705359","9783031705366"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70536-6_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}