{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T12:04:31Z","timestamp":1780488271591,"version":"3.54.1"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032212887","type":"print"},{"value":"9783032212894","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-21289-4_38","type":"book-chapter","created":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T01:05:23Z","timestamp":1774314323000},"page":"598-613","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Less LLM, More Documents: Searching for\u00a0Improved RAG"],"prefix":"10.1007","author":[{"given":"Jingjie","family":"Ning","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yibo","family":"Kong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yunfan","family":"Long","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jamie","family":"Callan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,3,25]]},"reference":[{"key":"38_CR1","doi-asserted-by":"crossref","unstructured":"Berant, J., Chou, A., Frostig, R., Liang, P.: Semantic parsing on Freebase from question-answer pairs. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1533\u20131544. Association for Computational Linguistics, Seattle, Washington, USA (2013). https:\/\/www.aclweb.org\/anthology\/D13-1160","DOI":"10.18653\/v1\/D13-1160"},{"key":"38_CR2","unstructured":"Borgeaud, S., et al.: Improving language models by retrieving from trillions of tokens (2022). https:\/\/arxiv.org\/abs\/2112.04426"},{"key":"38_CR3","unstructured":"Brown, T.B., et al.: Language models are few-shot learners (2020). https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"38_CR4","unstructured":"Chowdhery, A., et al.: Palm: scaling language modeling with pathways. J. Mach. Learn. Res. 24(1) (2023)"},{"key":"38_CR5","unstructured":"Coelho, J., et al.: Deepresearchgym: a free, transparent, and reproducible evaluation sandbox for deep research (2025). https:\/\/arxiv.org\/abs\/2505.19253"},{"key":"38_CR6","unstructured":"Gao, Y., et al.: Retrieval-augmented generation for large language models: a survey (2024). https:\/\/arxiv.org\/abs\/2312.10997"},{"key":"38_CR7","unstructured":"Ghorbani, B., et al.: Scaling laws for neural machine translation. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=hR_SMu8cxCV"},{"key":"38_CR8","unstructured":"Gupta, S., Ranjan, R., Singh, S.N.: A comprehensive survey of retrieval-augmented generation (rag): evolution, current landscape and future directions (2024). https:\/\/arxiv.org\/abs\/2410.12837"},{"key":"38_CR9","doi-asserted-by":"publisher","unstructured":"He, Z., Jiang, H., Wang, Z., Yang, Y., Qiu, L.K., Qiu, L.: Position engineering: boosting large language models through positional information manipulation. In: Al-Onaizan, Y., Bansal, M., Chen, Y.N. (eds.) Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pp. 7333\u20137345. Association for Computational Linguistics, Miami, Florida, USA (2024). https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.417. https:\/\/aclanthology.org\/2024.emnlp-main.417\/","DOI":"10.18653\/v1\/2024.emnlp-main.417"},{"key":"38_CR10","unstructured":"Hu, S., et al.: MiniCPM: unveiling the potential of small language models with scalable training strategies. In: First Conference on Language Modeling (2024). https:\/\/openreview.net\/forum?id=3X2L2TFr0f"},{"key":"38_CR11","doi-asserted-by":"publisher","unstructured":"Izacard, G., et al.: Unsupervised dense information retrieval with contrastive learning (2021). https:\/\/doi.org\/10.48550\/ARXIV.2112.09118. https:\/\/arxiv.org\/abs\/2112.09118","DOI":"10.48550\/ARXIV.2112.09118"},{"key":"38_CR12","unstructured":"Izacard, G., et al.: Atlas: few-shot learning with retrieval augmented language models. J. Mach. Learn. Res. 24(1) (2023)"},{"key":"38_CR13","doi-asserted-by":"publisher","unstructured":"Joshi, M., Choi, E., Weld, D., Zettlemoyer, L.: TriviaQA: a large scale distantly supervised challenge dataset for reading comprehension. In: Barzilay, R., Kan, M.Y. (eds.) Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1601\u20131611. Association for Computational Linguistics, Vancouver, Canada (2017). https:\/\/doi.org\/10.18653\/v1\/P17-1147. https:\/\/aclanthology.org\/P17-1147\/","DOI":"10.18653\/v1\/P17-1147"},{"key":"38_CR14","unstructured":"Kaplan, J., et al.: Scaling laws for neural language models (2020). https:\/\/arxiv.org\/abs\/2001.08361"},{"key":"38_CR15","doi-asserted-by":"publisher","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6769\u20136781. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.550. https:\/\/aclanthology.org\/2020.emnlp-main.550\/","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"38_CR16","doi-asserted-by":"publisher","unstructured":"Kwiatkowski, T., et al.: Natural questions: a benchmark for question answering research. Trans. Assoc. Comput. Linguist. 7, 452\u2013466 (2019). https:\/\/doi.org\/10.1162\/tacl_a_00276. https:\/\/aclanthology.org\/Q19-1026\/","DOI":"10.1162\/tacl_a_00276"},{"key":"38_CR17","unstructured":"Lewis, P., et al.: Retrieval-augmented generation for knowledge-intensive NLP tasks. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. NIPS 2020. Curran Associates Inc., Red Hook, NY, USA (2020)"},{"key":"38_CR18","unstructured":"Li, S., Stenzel, L., Eickhoff, C., Bahrainian, S.A.: Enhancing retrieval-augmented generation: a study of best practices. In: Rambow, O., Wanner, L., Apidianaki, M., Al-Khalifa, H., Eugenio, B.D., Schockaert, S. (eds.) Proceedings of the 31st International Conference on Computational Linguistics, pp. 6705\u20136717. Association for Computational Linguistics, Abu Dhabi, UAE (2025). https:\/\/aclanthology.org\/2025.coling-main.449\/"},{"key":"38_CR19","doi-asserted-by":"publisher","unstructured":"Narayanan, D., et al.: Efficient large-scale language model training on GPU clusters using megatron-lm. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. SC 2021. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3458817.3476209","DOI":"10.1145\/3458817.3476209"},{"key":"38_CR20","doi-asserted-by":"crossref","unstructured":"Overwijk, A., Xiong, C., Liu, X., VandenBerg, C., Callan, J.: Clueweb22: 10 billion web documents with visual and semantic information (2022). https:\/\/arxiv.org\/abs\/2211.15848","DOI":"10.1145\/3477495.3536321"},{"key":"38_CR21","unstructured":"Patterson, D., et al.: Carbon emissions and large neural network training (2021). https:\/\/arxiv.org\/abs\/2104.10350"},{"key":"38_CR22","doi-asserted-by":"publisher","unstructured":"Reynolds, L., McDonell, K.: Prompt programming for large language models: beyond the few-shot paradigm. In: Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems. CHI EA 2021. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3411763.3451760","DOI":"10.1145\/3411763.3451760"},{"key":"38_CR23","doi-asserted-by":"publisher","unstructured":"Santhanam, K., Khattab, O., Saad-Falcon, J., Potts, C., Zaharia, M.: ColBERTv2: effective and efficient retrieval via lightweight late interaction. In: Carpuat, M., de\u00a0Marneffe, M.C., Meza\u00a0Ruiz, I.V. (eds.) Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 3715\u20133734. Association for Computational Linguistics, Seattle, United States (2022). https:\/\/doi.org\/10.18653\/v1\/2022.naacl-main.272. https:\/\/aclanthology.org\/2022.naacl-main.272\/","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"38_CR24","unstructured":"Shao, R., et al.: Scaling retrieval-based language models with a trillion-token datastore. In: The Thirty-Eighth Annual Conference on Neural Information Processing Systems (2024). https:\/\/openreview.net\/forum?id=iAkhPz7Qt3"},{"key":"38_CR25","doi-asserted-by":"publisher","unstructured":"Shi, W., et al.: REPLUG: retrieval-augmented black-box language models. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 8371\u20138384. Association for Computational Linguistics, Mexico City, Mexico (2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.463. https:\/\/aclanthology.org\/2024.naacl-long.463\/","DOI":"10.18653\/v1\/2024.naacl-long.463"},{"key":"38_CR26","unstructured":"Subramanya, S.J., Devvrit, Kadekodi, R., Krishaswamy, R., Simhadri, H.V.: DiskANN: fast accurate billion-point nearest neighbor search on a single node. Curran Associates Inc., Red Hook, NY, USA (2019)"},{"key":"38_CR27","unstructured":"Thakur, N., Reimers, N., R\u00fcckl\u00e9, A., Srivastava, A., Gurevych, I.: BEIR: a heterogeneous benchmark for zero-shot evaluation of information retrieval models. In: Thirty-Fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2) (2021). https:\/\/openreview.net\/forum?id=wCu6T5xFjeJ"},{"key":"38_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlp.2024.100088","volume":"8","author":"P Upadhyay","year":"2024","unstructured":"Upadhyay, P., Agarwal, R., Dhiman, S., Sarkar, A., Chaturvedi, S.: A comprehensive survey on answer generation methods using NLP. Nat. Lang. Process. J. 8, 100088 (2024). https:\/\/doi.org\/10.1016\/j.nlp.2024.100088","journal-title":"Nat. Lang. Process. J."},{"key":"38_CR29","doi-asserted-by":"publisher","unstructured":"Vladika, J., Matthes, F.: On the influence of context size and model choice in retrieval-augmented generation systems. In: Chiruzzo, L., Ritter, A., Wang, L. (eds.) Findings of the Association for Computational Linguistics: NAACL 2025, pp. 6724\u20136736. Association for Computational Linguistics, Albuquerque, New Mexico (2025). https:\/\/doi.org\/10.18653\/v1\/2025.findings-naacl.375. https:\/\/aclanthology.org\/2025.findings-naacl.375\/","DOI":"10.18653\/v1\/2025.findings-naacl.375"},{"key":"38_CR30","doi-asserted-by":"crossref","unstructured":"Voorhees, E.M., Tice, D.M.: The TREC-8 question answering track. In: Gavrilidou, M., Carayannis, G., Markantonatou, S., Piperidis, S., Stainhauer, G. (eds.) Proceedings of the Second International Conference on Language Resources and Evaluation (LREC 2000). European Language Resources Association (ELRA), Athens, Greece (2000). https:\/\/aclanthology.org\/L00-1018\/","DOI":"10.6028\/NIST.SP.500-246.qa-overview"},{"key":"38_CR31","doi-asserted-by":"publisher","unstructured":"Wang, Y., et al.: Self-instruct: aligning language models with self-generated instructions. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 13484\u201313508. Association for Computational Linguistics, Toronto, Canada (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.754. https:\/\/aclanthology.org\/2023.acl-long.754\/","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"38_CR32","unstructured":"Xiong, L., et al.: Approximate nearest neighbor negative contrastive learning for dense text retrieval. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=zeFrfgyZln"},{"key":"38_CR33","unstructured":"Yang, A., et al.: Qwen3 technical report (2025). https:\/\/arxiv.org\/abs\/2505.09388"},{"key":"38_CR34","unstructured":"Zhang, S., et al.: Instruction tuning for large language models: a survey. arXiv preprint arXiv:2308.10792 (2023)"},{"key":"38_CR35","unstructured":"Zhang, Y., et al.: Qwen3 embedding: advancing text embedding and reranking through foundation models. arXiv preprint arXiv:2506.05176 (2025)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-21289-4_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T01:05:27Z","timestamp":1774314327000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-21289-4_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032212887","9783032212894"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-21289-4_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"25 March 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Delft","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 March 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"48","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2026.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}