{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T09:29:06Z","timestamp":1769938146438,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819549689","type":"print"},{"value":"9789819549696","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-4969-6_3","type":"book-chapter","created":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T08:48:53Z","timestamp":1763974133000},"page":"28-39","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing RAG System Performance Through Semantic Layout Chunking"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4079-1487","authenticated-orcid":false,"given":"Man","family":"Qin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4445-0025","authenticated-orcid":false,"given":"Qiang","family":"Sun","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0748-8040","authenticated-orcid":false,"given":"Tim","family":"French","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7409-0948","authenticated-orcid":false,"given":"Wei","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,25]]},"reference":[{"issue":"1","key":"3_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.mcpdig.2024.11.005","volume":"3","author":"DM Anisuzzaman","year":"2025","unstructured":"Anisuzzaman, D.M., Malins, J.G., Friedman, P.A., Attia, Z.I.: Fine-tuning large language models for specialized use cases. Mayo Clinic Proc. Digit. Heal. 3(1), 100184 (2025). https:\/\/doi.org\/10.1016\/j.mcpdig.2024.11.005","journal-title":"Mayo Clinic Proc. Digit. Heal."},{"key":"3_CR2","doi-asserted-by":"publisher","unstructured":"Chen, T., Wang, H., Chen, S., Yu, W., Ma, K., Zhao, X., Zhang, H., Yu, D.: Dense X retrieval: what retrieval granularity should we use? In: Al-Onaizan, Y., Bansal, M., Chen, Y.N. (eds.) Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing. pp. 15159\u201315177. Association for Computational Linguistics, Miami (2024). https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.845","DOI":"10.18653\/v1\/2024.emnlp-main.845"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Da, C., Luo, C., Zheng, Q., Yao, C.: Vision grid transformer for document layout analysis. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 19405\u201319415 (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.01783","DOI":"10.1109\/ICCV51070.2023.01783"},{"key":"3_CR4","doi-asserted-by":"publisher","unstructured":"Finardi, P., et al.: The chronicles of RAG: the retriever, the chunk and the generator. CoRR abs\/2401.07883 (2024). https:\/\/doi.org\/10.48550\/ARXIV.2401.07883","DOI":"10.48550\/ARXIV.2401.07883"},{"key":"3_CR5","unstructured":"Gao, Y., et al.: Retrieval-augmented generation for large language models: a survey (2024). https:\/\/arxiv.org\/abs\/2312.10997"},{"key":"3_CR6","doi-asserted-by":"publisher","first-page":"205520762513371","DOI":"10.1177\/20552076251337177","volume":"11","author":"OK Gargari","year":"2025","unstructured":"Gargari, O.K., Habibi, G.: Enhancing medical AI with retrieval-augmented generation: a mini narrative review. Digit. Heal. 11, 20552076251337176 (2025). https:\/\/doi.org\/10.1177\/20552076251337177","journal-title":"Digit. Heal."},{"key":"3_CR7","unstructured":"G\u00fcnther, M., Mohr, I., Williams, D.J., Wang, B., Xiao, H.: Late chunking: contextual chunk embeddings using long-context embedding models (2024). https:\/\/arxiv.org\/abs\/2409.04701"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: Proceedings of the 30th ACM International Conference on Multimedia (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Hui, Y., Lu, Y., Zhang, H.: UDA: a benchmark suite for retrieval augmented generation in real-world document analysis. In: The Thirty-eight Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2024). https:\/\/openreview.net\/forum?id=MS4oxVfBHn","DOI":"10.52202\/079017-2145"},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Jimeno-Yepes, A., You, Y., Milczek, J., Laverde, S., Li, R.: Financial report chunking for effective retrieval augmented generation. CoRR abs\/2402.05131 (2024). https:\/\/doi.org\/10.48550\/ARXIV.2402.05131","DOI":"10.48550\/ARXIV.2402.05131"},{"key":"3_CR11","unstructured":"Lewis, P., et al.: Retrieval-augmented generation for knowledge-intensive NLP tasks. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. NIPS \u201920, Curran Associates Inc., Red Hook (2020)"},{"key":"3_CR12","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, Post-Conference Workshop of ACL 2004. pp. 74\u201381 (2004). https:\/\/aclanthology.org\/W04-1013.pdf"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Lukasik, M., Dadachev, B., Papineni, K., Sim\u00f5es, G.: Text segmentation by cross segment attention. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 4707\u20134716. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.380","DOI":"10.18653\/v1\/2020.emnlp-main.380"},{"key":"3_CR14","unstructured":"L\u00e1la, J., O\u2019Donoghue, O., Shtedritski, A., Cox, S., Rodriques, S.G., White, A.D.: Paperqa: retrieval-augmented generative agent for scientific research (2023). https:\/\/arxiv.org\/abs\/2312.07559"},{"key":"3_CR15","unstructured":"Neeser, A., Latimer, K., Khatri, A., Latimer, C., Ramakrishnan, N.: Quote: question-oriented text embeddings (2025). https:\/\/arxiv.org\/abs\/2502.10976"},{"key":"3_CR16","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL), pp. 311\u2013318 (2002). https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"3_CR17","unstructured":"Pipitone, N., Alami, G.H.: Legalbench-rag: a benchmark for retrieval-augmented generation in the legal domain (2024). https:\/\/arxiv.org\/abs\/2408.10343"},{"key":"3_CR18","unstructured":"Setty, S., Thakkar, H., Lee, A., Chung, E., Vidra, N.: Improving retrieval for rag based question answering models on financial documents (2024). https:\/\/arxiv.org\/abs\/2404.07221"},{"key":"3_CR19","unstructured":"Sharma, S., et al.: Retrieval augmented generation for domain-specific question answering. In: AAAI 2024 Workshop on Scientific Document Understanding. AAAI (2024)"},{"key":"3_CR20","doi-asserted-by":"publisher","unstructured":"Tan, J., Dou, Z., Wang, W., Wang, M., Chen, W., Wen, J.R.: HtmlRAG: HTML is better than plain text for modeling retrieved knowledge in RAG systems. In: Proceedings of the ACM on Web Conference 2025, pp. 1733\u20131746 (2025). https:\/\/doi.org\/10.1145\/3696410.3714546, arXiv:2411.02959 [cs]","DOI":"10.1145\/3696410.3714546"},{"key":"3_CR21","unstructured":"Xu, Z., et al.: When does divide and conquer work for long context LLM? a noise decomposition framework (2025). https:\/\/arxiv.org\/abs\/2506.16411"},{"key":"3_CR22","doi-asserted-by":"publisher","unstructured":"Zhang, Q., Chen, Q., Li, Y., Liu, J., Wang, W.: Sequence model with self-adaptive sliding window for efficient spoken document segmentation. In: 2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 411\u2013418 (2021). https:\/\/doi.org\/10.1109\/ASRU51503.2021.9688078","DOI":"10.1109\/ASRU51503.2021.9688078"},{"key":"3_CR23","unstructured":"Zhao, J., et al.: Meta-chunking: learning efficient text segmentation via logical perception (2024). https:\/\/arxiv.org\/abs\/2410.12788"},{"key":"3_CR24","unstructured":"Zhao, Z., Kang, H., Wang, B., He, C.: Doclayout-yolo: enhancing document layout analysis through diverse synthetic data and global-to-local adaptive perception (2024). https:\/\/arxiv.org\/abs\/2410.12628"}],"container-title":["Lecture Notes in Computer Science","AI 2025: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-4969-6_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T20:57:31Z","timestamp":1769893051000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-4969-6_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,25]]},"ISBN":["9789819549689","9789819549696"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-4969-6_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,25]]},"assertion":[{"value":"25 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canberra, ACT","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ausai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ajcai2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}