{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:06:54Z","timestamp":1765498014867,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761023","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T23:55:33Z","timestamp":1762559733000},"page":"3866-3876","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["SG-Filter: Enhancing Similar Text Retrieval via Hierarchical Summarized-Semantic Index and Adaptive Filtering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6189-3251","authenticated-orcid":false,"given":"Jiancai","family":"Ye","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8280-9072","authenticated-orcid":false,"given":"Jun","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3889-8688","authenticated-orcid":false,"given":"Haoyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8566-5135","authenticated-orcid":false,"given":"Maojia","family":"Sheng","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8195-5744","authenticated-orcid":false,"given":"Tao","family":"Yang","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7000-6537","authenticated-orcid":false,"given":"Jiaming","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4286-6359","authenticated-orcid":false,"given":"Jinhao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6108-5157","authenticated-orcid":false,"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0849-3252","authenticated-orcid":false,"given":"Guohao","family":"Dai","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China, Infinigence-AI, Shanghai, China, and Shanghai Innovation Institute, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Rq-rag: Learning to refine queries for retrieval augmented generation. arXiv preprint arXiv:2404.00610","author":"Chan Chi-Min","year":"2024","unstructured":"Chi-Min Chan, Chunpu Xu, Ruibin Yuan, Hongyin Luo, Wei Xue, Yike Guo, and Jie Fu. 2024. Rq-rag: Learning to refine queries for retrieval augmented generation. arXiv preprint arXiv:2404.00610 (2024)."},{"key":"e_1_3_2_1_3_1","volume-title":"Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation. arXiv preprint arXiv:2402.03216","author":"Chen Jianlv","year":"2024","unstructured":"Jianlv Chen, Shitao Xiao, Peitian Zhang, Kun Luo, Defu Lian, and Zheng Liu. 2024. Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation. arXiv preprint arXiv:2402.03216 (2024)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99736-6_7"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.365"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210187"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Matthijs Douze Alexandr Guzhva Chengqi Deng Jeff Johnson Gergely Szilvasy Pierre-Emmanuel Mazar\u00e9 Maria Lomeli Lucas Hosseini and Herv\u00e9 J\u00e9gou. 2024. The Faiss library. (2024). arXiv:2401.08281 [cs.LG]","DOI":"10.1109\/TBDATA.2025.3618474"},{"key":"e_1_3_2_1_8_1","volume-title":"From local to global: A graph rag approach to query-focused summarization. arXiv preprint arXiv:2404.16130","author":"Edge Darren","year":"2024","unstructured":"Darren Edge, Ha Trinh, Newman Cheng, Joshua Bradley, Alex Chao, Apurva Mody, Steven Truitt, and Jonathan Larson. 2024. From local to global: A graph rag approach to query-focused summarization. arXiv preprint arXiv:2404.16130 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.517"},{"key":"e_1_3_2_1_10_1","volume-title":"Precise zero-shot dense retrieval without relevance labels. arXiv preprint arXiv:2212.10496","author":"Gao Luyu","year":"2022","unstructured":"Luyu Gao, Xueguang Ma, Jimmy Lin, and Jamie Callan. 2022. Precise zero-shot dense retrieval without relevance labels. arXiv preprint arXiv:2212.10496 (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:2312.10997","author":"Gao Yunfan","year":"2023","unstructured":"Yunfan Gao, Yun Xiong, Xinyu Gao, Kangxiang Jia, Jinliu Pan, Yuxi Bi, Yi Dai, Jiawei Sun, and Haofen Wang. 2023. Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:2312.10997 (2023)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21312"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583552"},{"key":"e_1_3_2_1_14_1","volume-title":"LightRAG: Simple and Fast Retrieval-Augmented Generation. arXiv e-prints","author":"Guo Zirui","year":"2024","unstructured":"Zirui Guo, Lianghao Xia, Yanhua Yu, Tu Ao, and Chao Huang. 2024. LightRAG: Simple and Fast Retrieval-Augmented Generation. arXiv e-prints (2024), arXiv--2410."},{"key":"e_1_3_2_1_15_1","volume-title":"A Comprehensive Survey of Retrieval-Augmented Generation (RAG): Evolution, Current Landscape and Future Directions. arXiv preprint arXiv:2410.12837","author":"Gupta Shailja","year":"2024","unstructured":"Shailja Gupta, Rajesh Ranjan, and Surya Narayan Singh. 2024. A Comprehensive Survey of Retrieval-Augmented Generation (RAG): Evolution, Current Landscape and Future Directions. arXiv preprint arXiv:2410.12837 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_18_1","volume-title":"Reinforcement Learning for Optimizing RAG for Domain Chatbots. arXiv preprint arXiv:2401.06800","author":"Kulkarni Mandar","year":"2024","unstructured":"Mandar Kulkarni, Praveen Tangarajan, Kyung Kim, and Anusua Trivedi. 2024. Reinforcement Learning for Optimizing RAG for Domain Chatbots. arXiv preprint arXiv:2401.06800 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.01.091"},{"key":"e_1_3_2_1_20_1","volume-title":"From matching to generation: A survey on generative information retrieval. arXiv preprint arXiv:2404.14851","author":"Li Xiaoxi","year":"2024","unstructured":"Xiaoxi Li, Jiajie Jin, Yujia Zhou, Yuyao Zhang, Peitian Zhang, Yutao Zhu, and Zhicheng Dou. 2024. From matching to generation: A survey on generative information retrieval. arXiv preprint arXiv:2404.14851 (2024)."},{"key":"e_1_3_2_1_21_1","volume-title":"Query rewriting for retrieval-augmented large language models. arXiv preprint arXiv:2305.14283","author":"Ma Xinbei","year":"2023","unstructured":"Xinbei Ma, Yeyun Gong, Pengcheng He, Hai Zhao, and Nan Duan. 2023. Query rewriting for retrieval-augmented large language models. arXiv preprint arXiv:2305.14283 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Sparse meets dense: A hybrid approach to enhance scientific document retrieval. arXiv preprint arXiv:2401.04055","author":"Mandikal Priyanka","year":"2024","unstructured":"Priyanka Mandikal and Raymond Mooney. 2024. Sparse meets dense: A hybrid approach to enhance scientific document retrieval. arXiv preprint arXiv:2401.04055 (2024)."},{"key":"e_1_3_2_1_23_1","unstructured":"MTEB. 2024. Medical QA Dataset. https:\/\/huggingface.co\/datasets\/mteb\/medical_qa Accessed: 2024--10--14."},{"key":"e_1_3_2_1_24_1","volume-title":"MTEB: Massive text embedding benchmark. arXiv preprint arXiv:2210.07316","author":"Muennighoff Niklas","year":"2022","unstructured":"Niklas Muennighoff, Nouamane Tazi, Lo\u00efc Magne, and Nils Reimers. 2022. MTEB: Massive text embedding benchmark. arXiv preprint arXiv:2210.07316 (2022)."},{"key":"e_1_3_2_1_25_1","unstructured":"OpenAI. 2023. ChatGPT 3.5 Turbo. https:\/\/platform.openai.com\/docs\/models\/"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3648298"},{"key":"e_1_3_2_1_27_1","volume-title":"Synchromesh: Reliable code generation from pre-trained language models. arXiv preprint arXiv:2201.11227","author":"Poesia Gabriel","year":"2022","unstructured":"Gabriel Poesia, Oleksandr Polozov, Vu Le, Ashish Tiwari, Gustavo Soares, Christopher Meek, and Sumit Gulwani. 2022. Synchromesh: Reliable code generation from pre-trained language models. arXiv preprint arXiv:2201.11227 (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2016.12.006"},{"key":"e_1_3_2_1_29_1","unstructured":"Virat Singh. 2024. Financial QA 10K. https:\/\/huggingface.co\/datasets\/virattt\/financial-qa-10K Accessed: 2024--10--14."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2740908.2742839"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.wpi.2024.102282"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.557"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.609"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457550"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.168"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.585"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29889"},{"key":"e_1_3_2_1_38_1","unstructured":"He sicheng Wang Yuxin Sun Qingxuan. 2023. M3E: Moka Massive Mixed Embedding Model."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657878"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.145"},{"volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations.","author":"Xiong Lee","key":"e_1_3_2_1_41_1","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N Bennett, Junaid Ahmed, and Arnold Overwijk. [n.d.]. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3661370"},{"key":"e_1_3_2_1_43_1","volume-title":"QA-GNN: Reasoning with language models and knowledge graphs for question answering. arXiv preprint arXiv:2104.06378","author":"Yasunaga Michihiro","year":"2021","unstructured":"Michihiro Yasunaga, Hongyu Ren, Antoine Bosselut, Percy Liang, and Jure Leskovec. 2021. QA-GNN: Reasoning with language models and knowledge graphs for question answering. arXiv preprint arXiv:2104.06378 (2021)."},{"key":"e_1_3_2_1_44_1","volume-title":"Efficient and Effective Retrieval of Dense-Sparse Hybrid Vectors using Graph-based Approximate Nearest Neighbor Search. arXiv preprint arXiv:2410.20381","author":"Zhang Haoyu","year":"2024","unstructured":"Haoyu Zhang, Jun Liu, Zhenhua Zhu, Shulin Zeng, Maojia Sheng, Tao Yang, Guohao Dai, and Yu Wang. 2024. Efficient and Effective Retrieval of Dense-Sparse Hybrid Vectors using Graph-based Approximate Nearest Neighbor Search. arXiv preprint arXiv:2410.20381 (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Retrieval-augmented generation for ai-generated content: A survey. arXiv preprint arXiv:2402.19473","author":"Zhao Penghao","year":"2024","unstructured":"Penghao Zhao, Hailin Zhang, Qinhan Yu, Zhengren Wang, Yunteng Geng, Fangcheng Fu, Ling Yang, Wentao Zhang, and Bin Cui. 2024. Retrieval-augmented generation for ai-generated content: A survey. arXiv preprint arXiv:2402.19473 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637870"},{"key":"e_1_3_2_1_47_1","volume-title":"Quoc V Le, and Denny Zhou.","author":"Zheng Huaixiu Steven","year":"2023","unstructured":"Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen, Heng-Tze Cheng, Ed H Chi, Quoc V Le, and Denny Zhou. 2023. Take a step back: Evoking reasoning via abstraction in large language models. arXiv preprint arXiv:2310.06117 (2023)."}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761023","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:03:36Z","timestamp":1765497816000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761023"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":47,"alternative-id":["10.1145\/3746252.3761023","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761023","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}