{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:46Z","timestamp":1750309546062,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714795","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T23:08:29Z","timestamp":1745363309000},"page":"1693-1702","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["<i>PEAR:<\/i>\n            <i>Position-Embedding-Agnostic Attention Re-weighting Enhances Retrieval-Augmented Generation with Zero Inference Overhead<\/i>"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2149-0807","authenticated-orcid":false,"given":"Tao","family":"Tan","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4872-5085","authenticated-orcid":false,"given":"Yining","family":"Qian","sequence":"additional","affiliation":[{"name":"Southeast University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8027-2270","authenticated-orcid":false,"given":"Ang","family":"Lv","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4029-810X","authenticated-orcid":false,"given":"Hongzhan","family":"Lin","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3697-7082","authenticated-orcid":false,"given":"Songhao","family":"Wu","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6635-2842","authenticated-orcid":false,"given":"Yongbo","family":"Wang","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0885-9628","authenticated-orcid":false,"given":"Feng","family":"Wang","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3677-7755","authenticated-orcid":false,"given":"Jingtong","family":"Wu","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9965-1851","authenticated-orcid":false,"given":"Xin","family":"Lu","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3356-6823","authenticated-orcid":false,"given":"Rui","family":"Yan","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China and Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.601"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dao Tri","year":"2024","unstructured":"Tri Dao. 2024. FlashAttention-2: Faster Attention with Better Parallelism and Work Partitioning. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Pradeep Dasigi Kyle Lo Iz Beltagy Arman Cohan Noah A. Smith and Matt Gardner. 2021. A Dataset of Information-Seeking Questions and Answers Anchored in Research Papers. arXiv:2105.03011 [cs.CL] https:\/\/arxiv.org\/abs\/2105.03011","DOI":"10.18653\/v1\/2021.naacl-main.365"},{"key":"e_1_3_2_1_4_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.751"},{"key":"e_1_3_2_1_6_1","unstructured":"Zhuocheng Gong Ang Lv Jian Guan Junxi Yan Wei Wu Huishuai Zhang Minlie Huang Dongyan Zhao and Rui Yan. 2024. Mixture-of-Modules: Reinventing Transformers as Dynamic Assemblies of Modules. arXiv:2407.06677 [cs.CL] https:\/\/arxiv.org\/abs\/2407.06677"},{"key":"e_1_3_2_1_7_1","unstructured":"Dan Hendrycks Collin Burns Steven Basart Andy Zou Mantas Mazeika Dawn Song and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. arXiv:2009.03300 [cs.CY] https:\/\/arxiv.org\/abs\/2009.03300"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.580"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_10_1","volume-title":"Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300","author":"Lee Kenton","year":"2019","unstructured":"Kenton Lee, Ming-Wei Chang, and Kristina Toutanova. 2019. Latent retrieval for weakly supervised open domain question answering. arXiv preprint arXiv:1906.00300 (2019)."},{"key":"e_1_3_2_1_11_1","volume-title":"Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela.","author":"Lewis Patrick","year":"2021","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2021. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. arXiv:2005.11401 [cs.CL] https:\/\/arxiv.org\/abs\/ 2005.11401"},{"key":"e_1_3_2_1_12_1","unstructured":"Hongzhan Lin Ang Lv Yuhan Chen Chen Zhu Yang Song Hengshu Zhu and Rui Yan. 2024. Mixture of In-Context Experts Enhance LLMs' Long Context Awareness. arXiv:2406.19598 [cs.CL] https:\/\/arxiv.org\/abs\/2406.19598"},{"key":"e_1_3_2_1_13_1","unstructured":"Nelson F. Liu Kevin Lin John Hewitt Ashwin Paranjape Michele Bevilacqua Fabio Petroni and Percy Liang. 2023. Lost in the Middle: How Language Models Use Long Contexts. arXiv:2307.03172 [cs.CL]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_15_1","unstructured":"Ang Lv Yuhan Chen Kaiyi Zhang Yulong Wang Lifeng Liu Ji-Rong Wen Jian Xie and Rui Yan. 2024. Interpreting Key Mechanisms of Factual Recall in Transformer-Based Language Models. arXiv:2403.19521 [cs.CL] https:\/\/arxiv. org\/abs\/2403.19521"},{"key":"e_1_3_2_1_16_1","unstructured":"Ang Lv Ruobing Xie Xingwu Sun Zhanhui Kang and Rui Yan. 2024. Language Models ''Grok'' to Copy. arXiv:2409.09281 [cs.CL] https:\/\/arxiv.org\/abs\/2409.09281"},{"key":"e_1_3_2_1_17_1","volume-title":"Copy Suppression: Comprehensively Understanding an Attention Head. arXiv:2310.04625 [cs.LG] https:\/\/arxiv.org\/abs\/2310.04625","author":"McDougall Callum","year":"2023","unstructured":"Callum McDougall, Arthur Conmy, Cody Rushing, Thomas McGrath, and Neel Nanda. 2023. Copy Suppression: Comprehensively Understanding an Attention Head. arXiv:2310.04625 [cs.LG] https:\/\/arxiv.org\/abs\/2310.04625"},{"key":"e_1_3_2_1_18_1","volume-title":"Circuit Component Reuse Across Tasks in Transformer Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=fpoAYV6Wsk","author":"Merullo Jack","year":"2024","unstructured":"Jack Merullo, Carsten Eickhoff, and Ellie Pavlick. 2024. Circuit Component Reuse Across Tasks in Transformer Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=fpoAYV6Wsk"},{"key":"e_1_3_2_1_19_1","unstructured":"Microsoft. 2023. Reinventing search with a new AI-powered Microsoft Bing and Edge your copilot for the web."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00024.001"},{"key":"e_1_3_2_1_21_1","volume-title":"In-context Learning and Induction Heads. Transformer Circuits Thread","author":"Olsson Catherine","year":"2022","unstructured":"Catherine Olsson, Nelson Elhage, Neel Nanda, Nicholas Joseph, Nova DasSarma, Tom Henighan, Ben Mann, Amanda Askell, Yuntao Bai, Anna Chen, Tom Conerly, Dawn Drain, Deep Ganguli, Zac Hatfield-Dodds, Danny Hernandez, Scott Johnston, Andy Jones, Jackson Kernion, Liane Lovitt, Kamal Ndousse, Dario Amodei, Tom Brown, Jack Clark, Jared Kaplan, Sam McCandlish, and Chris Olah. 2022. In-context Learning and Induction Heads. Transformer Circuits Thread (2022). https:\/\/transformer-circuits.pub\/2022\/in-context-learning-and-inductionheads\/index.html."},{"key":"e_1_3_2_1_23_1","unstructured":"Alexander Peysakhovich and Adam Lerer. 2023. Attention Sorting Combats Recency Bias In Long Context Language Models. arXiv:2310.01427 [cs.CL]"},{"volume-title":"Train Short","author":"Press Ofir","key":"e_1_3_2_1_24_1","unstructured":"Ofir Press, Noah A. Smith, and Mike Lewis. 2022. Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation. arXiv:2108.12409 [cs.CL] https:\/\/arxiv.org\/abs\/2108.12409"},{"key":"e_1_3_2_1_25_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners."},{"key":"e_1_3_2_1_26_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. CoRR abs\/1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc V. Le, Geoffrey E. Hinton, and Jeff Dean. 2017. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. CoRR abs\/1701.06538 (2017). arXiv:1701.06538 http:\/\/arxiv.org\/abs\/1701.06538"},{"key":"e_1_3_2_1_27_1","unstructured":"Jianlin Su Yu Lu Shengfeng Pan Ahmed Murtadha Bo Wen and Yunfeng Liu. 2023. RoFormer: Enhanced Transformer with Rotary Position Embedding. arXiv:2104.09864 [cs.CL] https:\/\/arxiv.org\/abs\/2104.09864"},{"key":"e_1_3_2_1_28_1","volume-title":"Toolalpaca: Generalized tool learning for language models with 3000 simulated cases. arXiv preprint arXiv:2306.05301","author":"Tang Qiaoyu","year":"2023","unstructured":"Qiaoyu Tang, Ziliang Deng, Hongyu Lin, Xianpei Han, Qiao Liang, Boxi Cao, and Le Sun. 2023. Toolalpaca: Generalized tool learning for language models with 3000 simulated cases. arXiv preprint arXiv:2306.05301 (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Qwen Team. 2024. Qwen2.5: A Party of Foundation Models. https:\/\/qwenlm.github.io\/blog\/qwen2.5\/"},{"key":"e_1_3_2_1_30_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL] https:\/\/arxiv.org\/abs\/2307.09288"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00475"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531830"},{"key":"e_1_3_2_1_33_1","unstructured":"Boshi Wang Xiang Yue Yu Su and Huan Sun. 2024. Grokked Transformers are Implicit Reasoners: A Mechanistic Journey to the Edge of Generalization. arXiv:2405.15071 [cs.CL] https:\/\/arxiv.org\/abs\/2405.15071"},{"key":"e_1_3_2_1_34_1","volume-title":"Interpretability in the wild: a circuit for indirect object identification in gpt-2 small. arXiv preprint arXiv:2211.00593","author":"Wang Kevin","year":"2022","unstructured":"Kevin Wang, Alexandre Variengien, Arthur Conmy, Buck Shlegeris, and Jacob Steinhardt. 2022. Interpretability in the wild: a circuit for indirect object identification in gpt-2 small. arXiv preprint arXiv:2211.00593 (2022)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.615"},{"key":"e_1_3_2_1_36_1","volume-title":"Towards Best Practices of Activation Patching in Language Models: Metrics and Methods. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Hf17y6u9BC","author":"Zhang Fred","year":"2024","unstructured":"Fred Zhang and Neel Nanda. 2024. Towards Best Practices of Activation Patching in Language Models: Metrics and Methods. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Hf17y6u9BC"},{"key":"e_1_3_2_1_37_1","volume-title":"Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer.","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer. 2022. OPT: Open Pre-trained Transformer Language Models. arXiv:2205.01068 [cs.CL] https:\/\/arxiv.org\/abs\/2205.01068"},{"key":"e_1_3_2_1_38_1","unstructured":"Zhenyu Zhang Runjin Chen Shiwei Liu Zhewei Yao Olatunji Ruwase Beidi Chen Xiaoxia Wu and Zhangyang Wang. 2024. Found in the Middle: How Language Models Use Long Contexts Better via Plug-and-Play Positional Encoding. arXiv:2403.04797 [cs.CL] https:\/\/arxiv.org\/abs\/2403.04797"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714795","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714795","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714795"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":37,"alternative-id":["10.1145\/3696410.3714795","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714795","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}