{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T08:44:59Z","timestamp":1770972299125,"version":"3.50.1"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T00:00:00Z","timestamp":1756598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,31]]},"DOI":"10.1109\/mlsp62443.2025.11204203","type":"proceedings-article","created":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T17:15:52Z","timestamp":1761326152000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Semantic Chunking and Chain-Of-Thought Reasoning for Rag-Based Document Processing"],"prefix":"10.1109","author":[{"given":"Yi\u011fit","family":"Ate\u015f","sequence":"first","affiliation":[{"name":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"}]},{"given":"Alperen","family":"Sayar","sequence":"additional","affiliation":[{"name":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"}]},{"given":"\u0130brahim Umut","family":"Bozlar","sequence":"additional","affiliation":[{"name":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"}]},{"given":"Seyit","family":"Ertu\u011frul","sequence":"additional","affiliation":[{"name":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"}]},{"given":"Suayb S.","family":"Arslan","sequence":"additional","affiliation":[{"name":"Bo&#x011F;azi&#x00E7;i University,Department of Computer Engineering and Institute for DSAI"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Hallucination is inevitable: An innate limitation of large language models","author":"Xu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref2","first-page":"9459","article-title":"Retrievalaugmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Adv. in Neural Inf. Proc. Systems"},{"key":"ref3","article-title":"Retrieval-augmented generation for large language models: A survey","volume":"2","author":"Gao","year":"2023","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.372"},{"key":"ref5","first-page":"24824","article-title":"Chain-ofthought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Adv. in Neural Inf. Proc. Systems"},{"key":"ref6","article-title":"Deep reinforcement learning from human preferences","volume":"30","author":"Christiano","year":"2017","journal-title":"Adv. in Neural Inf. Proc. Systems"},{"key":"ref7","first-page":"730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv. in Neural Inf. Proc. Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA61862.2024.00258"},{"key":"ref9","article-title":"Self-rag: Learning to retrieve, generate, and critique through self-reflection","volume-title":"The Twelfth International Conference on Learning Representations (ICLR 2023)","author":"Asai"},{"issue":"2005","key":"ref10","article-title":"A framework for analyzing and improving content-based chunking algorithms","volume-title":"HP Labs Tech. Report TR","volume":"30","author":"Eshghi","year":"2005"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2017.7997030"},{"key":"ref12","article-title":"Is semantic chunking worth the computational cost?","author":"Qu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"ref14","article-title":"Nomic embed: Training a reproducible long context text embedder","author":"Nussbaum","year":"2024","journal-title":"arXiv preprint"},{"key":"ref15","article-title":"A comprehensive survey on vector database: Storage and retrieval technique, challenge","author":"Han","year":"2023","journal-title":"arXiv preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.dialdoc-1.16"},{"key":"ref17","article-title":"Casegpt: a case reasoning framework based on language models and retrieval-augmented generation","author":"Yang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3711542.3711575"}],"event":{"name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","location":"Istanbul, Turkiye","start":{"date-parts":[[2025,8,31]]},"end":{"date-parts":[[2025,9,3]]}},"container-title":["2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11204201\/11204202\/11204203.pdf?arnumber=11204203","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T18:37:29Z","timestamp":1762367849000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11204203\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,31]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/mlsp62443.2025.11204203","relation":{},"subject":[],"published":{"date-parts":[[2025,8,31]]}}}