{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T07:45:07Z","timestamp":1775807107391,"version":"3.50.1"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T00:00:00Z","timestamp":1749254400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T00:00:00Z","timestamp":1749254400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10032-025-00535-9","type":"journal-article","created":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T14:53:58Z","timestamp":1749308038000},"page":"129-135","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A structured document understanding model based on gate mechanism and cross attention"],"prefix":"10.1007","volume":"29","author":[{"given":"Bin","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,7]]},"reference":[{"key":"535_CR1","unstructured":"Li, C., Guo, R., Zhou, J., An, M., Du, Y., Zhu, L., Liu, Y., Hu, X., Yu, D.: Pp-structurev2: A stronger document analysis system. arXiv preprint arXiv:2210.05391 (2022)"},{"key":"535_CR2","unstructured":"Cui, L., Xu, Y., Lv, T., Wei, F.: Document ai: Benchmarks, models and applications. arXiv preprint arXiv:2111.08609 (2021)"},{"key":"535_CR3","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: Layoutlm: Pre-training of text and layout for document image understanding. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 1192\u20131200 (2020)","DOI":"10.1145\/3394486.3403172"},{"key":"535_CR4","doi-asserted-by":"crossref","unstructured":"Li, C., Bi, B., Yan, M., Wang, W., Huang, S., Huang, F., Si, L.: Structurallm: Structural pre-training for form understanding. arXiv preprint arXiv:2105.11210 (2021)","DOI":"10.18653\/v1\/2021.acl-long.493"},{"key":"535_CR5","unstructured":"Hong, T., Kim, D., Ji, M., Hwang, W., Nam, D., Park, S.: Bros: A pre-trained language model for understanding texts in document (2021). In: URL https:\/\/openreview.Net\/forum, p. 48 (2020)"},{"key":"535_CR6","doi-asserted-by":"publisher","first-page":"6302","DOI":"10.1109\/TIP.2020.2990603","volume":"29","author":"J Jiao","year":"2020","unstructured":"Jiao, J., Tu, W.-C., Liu, D., He, S., Lau, R.W.H., Huang, T.S.: Formnet: Formatted learning for image restoration. IEEE Transactions on Image Processing 29, 6302\u20136314 (2020). https:\/\/doi.org\/10.1109\/TIP.2020.2990603","journal-title":"IEEE Transactions on Image Processing"},{"key":"535_CR7","doi-asserted-by":"crossref","unstructured":"Li, P., Gu, J., Kuen, J., Morariu, V.I., Zhao, H., Jain, R., Manjunatha, V., Liu, H.: Selfdoc: Self-supervised document representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5652\u20135660 (2021)","DOI":"10.1109\/CVPR46437.2021.00560"},{"key":"535_CR8","doi-asserted-by":"crossref","unstructured":"Wang, J., Jin, L., Ding, K.: Lilt: A simple yet effective language-independent layout transformer for structured document understanding. arXiv preprint arXiv:2202.13669 (2022)","DOI":"10.18653\/v1\/2022.acl-long.534"},{"issue":"1","key":"535_CR9","first-page":"61","volume":"56","author":"K Tian","year":"2020","unstructured":"Tian, K., Zhou, R., Dong, H., Yin, J.: An abstractive summarization method based on encoder-sharing and gated network. Beijing Da Xue Xue Bao 56(1), 61\u201367 (2020)","journal-title":"Beijing Da Xue Xue Bao"},{"key":"535_CR10","doi-asserted-by":"crossref","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.-P.: Funsd: A dataset for form understanding in noisy scanned documents. In: 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW), vol. 2, pp. 1\u20136 (2019). IEEE","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"535_CR11","doi-asserted-by":"crossref","unstructured":"Graves, A., Graves, A.: Long short-term memory. Supervised sequence labelling with recurrent neural networks, 37\u201345 (2012)","DOI":"10.1007\/978-3-642-24797-2_4"},{"key":"535_CR12","unstructured":"Gehring, J., Auli, M., Grangier, D., Yarats, D., Dauphin, Y.N.: Convolutional sequence to sequence learning. In: International Conference on Machine Learning, pp. 1243\u20131252 (2017). PMLR"},{"key":"535_CR13","doi-asserted-by":"crossref","unstructured":"Wang, F., Gu, H., Li, D., Lu, T., Zhang, P., Gu, N.: Towards deeper, lighter and interpretable cross network for ctr prediction. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 2523\u20132533 (2023)","DOI":"10.1145\/3583780.3615089"},{"key":"535_CR14","unstructured":"Joseph, M., Raj, H.: Gandalf: Gated adaptive network for deep automated learning of features. arXiv preprint arXiv:2207.08548 (2022)"},{"key":"535_CR15","unstructured":"Cholakov, R., Kolev, T.: The gatedtabtransformer. an enhanced deep learning architecture for tabular modeling. arXiv preprint arXiv:2201.00199 (2022)"},{"key":"535_CR16","unstructured":"Huang, X., Khetan, A., Cvitkovic, M., Karnin, Z.: Tabtransformer: Tabular data modeling using contextual embeddings. arXiv preprint arXiv:2012.06678 (2020)"},{"key":"535_CR17","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"535_CR18","unstructured":"Subramani, N., Matton, A., Greaves, M., Lam, A.: A survey of deep learning approaches for ocr and document understanding. arXiv preprint arXiv:2011.13534 (2020)"},{"key":"535_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"535_CR20","doi-asserted-by":"crossref","unstructured":"Li, Y., Qian, Y., Yu, Y., Qin, X., Zhang, C., Liu, Y., Yao, K., Han, J., Liu, J., Ding, E.: Structext: Structured text understanding with multi-modal transformers. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 1912\u20131920 (2021)","DOI":"10.1145\/3474085.3475345"},{"key":"535_CR21","doi-asserted-by":"crossref","unstructured":"Gemelli, A., Biswas, S., Civitelli, E., Llad\u00f3s, J., Marinai, S.: Doc2graph: a task agnostic document understanding framework based on graph neural networks. In: European Conference on Computer Vision, pp. 329\u2013344 (2022). Springer","DOI":"10.1007\/978-3-031-25069-9_22"},{"key":"535_CR22","doi-asserted-by":"crossref","unstructured":"Xu, Y., Xu, Y., Lv, T., Cui, L., Wei, F., Wang, G., Lu, Y., Florencio, D., Zhang, C., Che, W., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. arXiv preprint arXiv:2012.14740 (2020)","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"535_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109542","volume":"140","author":"P Cao","year":"2023","unstructured":"Cao, P., Wu, J.: Graphrevisedie: Multimodal information extraction with graph-revised network. Pattern Recognition 140, 109542 (2023). https:\/\/doi.org\/10.1016\/j.patcog.2023.109542","journal-title":"Pattern Recognition"},{"key":"535_CR24","unstructured":"Jiang, Z., Wang, B., Chen, J., Nakashima, Y.: Relayout: Towards real-world document understanding via layout-enhanced pre-training. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 3778\u20133793 (2025)"},{"key":"535_CR25","doi-asserted-by":"crossref","unstructured":"Mao, Z., Bai, H., Hou, L., Shang, L., Jiang, X., Liu, Q., Wong, K.-F.: Visually guided generative text-layout pre-training for document intelligence. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 4713\u20134730 (2024)","DOI":"10.18653\/v1\/2024.naacl-long.264"},{"key":"535_CR26","doi-asserted-by":"crossref","unstructured":"Yoon, C., Lee, W., Jang, S., Choi, K., Jung, M., Choi, D.: Language, ocr, form independent (lofi) pipeline for industrial document information extraction. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 1056\u20131067 (2024)","DOI":"10.18653\/v1\/2024.emnlp-industry.79"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-025-00535-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10032-025-00535-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-025-00535-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T06:50:02Z","timestamp":1775803802000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10032-025-00535-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,7]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["535"],"URL":"https:\/\/doi.org\/10.1007\/s10032-025-00535-9","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"value":"1433-2833","type":"print"},{"value":"1433-2825","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,7]]},"assertion":[{"value":"29 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 May 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 May 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 June 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}