{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:19:09Z","timestamp":1759969149430,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715263","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:12:56Z","timestamp":1748016776000},"page":"384-393","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Distributionally Balanced End-to-End Generative Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9684-6416","authenticated-orcid":false,"given":"Yuxuan","family":"Liu","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1215-8676","authenticated-orcid":false,"given":"Tianchi","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8394-5783","authenticated-orcid":false,"given":"Zihan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0818-8913","authenticated-orcid":false,"given":"Minghui","family":"Song","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7145-2500","authenticated-orcid":false,"given":"Haizhen","family":"Huang","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4793-9715","authenticated-orcid":false,"given":"Weiwei","family":"Deng","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0834-0562","authenticated-orcid":false,"given":"Feng","family":"Sun","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7438-7248","authenticated-orcid":false,"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft AI, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Deep Variational Information Bottleneck. In International Conference on Learning Representations.","author":"Alemi Alexander A","year":"2016","unstructured":"Alexander A Alemi, Ian Fischer, Joshua V Dillon, and Kevin Murphy. 2016. Deep Variational Information Bottleneck. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_2_1","first-page":"31668","article-title":"Autoregressive search engines: Generating substrings as document identifiers","volume":"35","author":"Bevilacqua Michele","year":"2022","unstructured":"Michele Bevilacqua, Giuseppe Ottaviano, Patrick Lewis, Scott Yih, Sebastian Riedel, and Fabio Petroni. 2022. Autoregressive search engines: Generating substrings as document identifiers. Advances in Neural Information Processing Systems, Vol. 35 (2022), 31668--31683.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591631"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557271"},{"key":"e_1_3_2_2_5_1","volume-title":"International Conference on Machine Learning. PMLR, 1617--1626","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Lala Li, and Yizhou Sun. 2020. Differentiable product quantization for end-to-end embedding compression. In International Conference on Machine Learning. PMLR, 1617--1626."},{"key":"e_1_3_2_2_6_1","volume-title":"International conference on machine learning. PMLR, 1779--1788","author":"Cheng Pengyu","year":"2020","unstructured":"Pengyu Cheng, Weituo Hao, Shuyang Dai, Jiachang Liu, Zhe Gan, and Lawrence Carin. 2020. Club: A contrastive log-ratio upper bound of mutual information. In International conference on machine learning. PMLR, 1779--1788."},{"volume-title":"Elements of information theory","author":"Cover Thomas M","key":"e_1_3_2_2_7_1","unstructured":"Thomas M Cover. 1999. Elements of information theory. John Wiley & Sons."},{"key":"e_1_3_2_2_8_1","volume-title":"Sinkhorn distances: Lightspeed computation of optimal transport. Advances in neural information processing systems","author":"Cuturi Marco","year":"2013","unstructured":"Marco Cuturi. 2013. Sinkhorn distances: Lightspeed computation of optimal transport. Advances in neural information processing systems, Vol. 26 (2013)."},{"key":"e_1_3_2_2_9_1","volume-title":"Autoregressive Entity Retrieval. In International Conference on Learning Representations.","author":"Cao Nicola De","year":"2020","unstructured":"Nicola De Cao, Gautier Izacard, Sebastian Riedel, and Fabio Petroni. 2020. Autoregressive Entity Retrieval. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of NAACL-HLT. 4171--4186","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of NAACL-HLT. 4171--4186."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1137\/060659624"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01123"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1276"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.366"},{"key":"e_1_3_2_2_18_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations.","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_19_1","unstructured":"Tri Nguyen Mir Rosenberg Xia Song Jianfeng Gao Saurabh Tiwary Rangan Majumder and Li Deng. 2016. Ms marco: A human-generated machine reading comprehension dataset. (2016)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.146"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.669"},{"key":"e_1_3_2_2_22_1","unstructured":"Jianmo Ni Chen Qu Jing Lu Zhuyun Dai Gustavo Hern\u00e1ndez \u00c1brego Ji Ma Vincent Y Zhao Yi Luan Keith B Hall Ming-Wei Chang et al. 2021. Large dual encoders are generalizable retrievers. arXiv preprint arXiv:2112.07899 (2021)."},{"key":"e_1_3_2_2_23_1","volume-title":"Residual Vector Product Quantization for approximate nearest neighbor search. Expert Systems with Applications","author":"Niu Lushuai","year":"2023","unstructured":"Lushuai Niu, Zhi Xu, Longyang Zhao, Daojing He, Jianqiu Ji, Xiaoli Yuan, and Mian Xue. 2023. Residual Vector Product Quantization for approximate nearest neighbor search. Expert Systems with Applications (2023), 120832."},{"key":"e_1_3_2_2_24_1","volume-title":"From doc2query to docTTTTTquery. Online preprint","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Jimmy Lin, and AI Epistemic. 2019. From doc2query to docTTTTTquery. Online preprint, Vol. 6, 2 (2019)."},{"key":"e_1_3_2_2_25_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_2_26_1","volume-title":"Towards robust detection of adversarial examples. Advances in neural information processing systems","author":"Pang Tianyu","year":"2018","unstructured":"Tianyu Pang, Chao Du, Yinpeng Dong, and Jun Zhu. 2018. Towards robust detection of adversarial examples. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.466"},{"key":"e_1_3_2_2_28_1","volume-title":"Trung Vu, Lukasz Heldt, Lichan Hong, Yi Tay, Vinh Tran, Jonah Samost, et al.","author":"Rajput Shashank","year":"2024","unstructured":"Shashank Rajput, Nikhil Mehta, Anima Singh, Raghunandan Hulikal Keshavan, Trung Vu, Lukasz Heldt, Lichan Hong, Yi Tay, Vinh Tran, Jonah Samost, et al. 2024. Recommender systems with generative retrieval. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_2_30_1","volume-title":"Developments in automatic text retrieval. science","author":"Salton Gerard","year":"1991","unstructured":"Gerard Salton. 1991. Developments in automatic text retrieval. science, Vol. 253, 5023 (1991), 974--980."},{"key":"e_1_3_2_2_31_1","volume-title":"Monte Carlo sampling methods. Handbooks in operations research and management science","author":"Shapiro Alexander","year":"2003","unstructured":"Alexander Shapiro. 2003. Monte Carlo sampling methods. Handbooks in operations research and management science, Vol. 10 (2003), 353--425."},{"key":"e_1_3_2_2_32_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Sun Weiwei","year":"2024","unstructured":"Weiwei Sun, Lingyong Yan, Zheng Chen, Shuaiqiang Wang, Haichao Zhu, Pengjie Ren, Zhumin Chen, Dawei Yin, Maarten Rijke, and Zhaochun Ren. 2024. Learning to tokenize for generative retrieval. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599903"},{"key":"e_1_3_2_2_34_1","first-page":"21831","article-title":"Transformer memory as a differentiable search index","volume":"35","author":"Tay Yi","year":"2022","unstructured":"Yi Tay, Vinh Tran, Mostafa Dehghani, Jianmo Ni, Dara Bahri, Harsh Mehta, Zhen Qin, Kai Hui, Zhe Zhao, Jai Gupta, et al. 2022. Transformer memory as a differentiable search index. Advances in Neural Information Processing Systems, Vol. 35 (2022), 21831--21843.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_35_1","volume-title":"The information bottleneck method. arXiv preprint physics\/0004057","author":"Tishby Naftali","year":"2000","unstructured":"Naftali Tishby, Fernando C Pereira, and William Bialek. 2000. The information bottleneck method. arXiv preprint physics\/0004057 (2000)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/MP.2006.1664069"},{"key":"e_1_3_2_2_37_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_38_1","first-page":"25600","article-title":"A neural corpus indexer for document retrieval","volume":"35","author":"Wang Yujing","year":"2022","unstructured":"Yujing Wang, Yingyan Hou, Haonan Wang, Ziming Miao, Shibin Wu, Qi Chen, Yuqing Xia, Chengmin Chi, Guoshuai Zhao, Zheng Liu, et al. 2022. A neural corpus indexer for document retrieval. Advances in Neural Information Processing Systems, Vol. 35 (2022), 25600--25614.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614993"},{"key":"e_1_3_2_2_40_1","volume-title":"Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808 (2020)."},{"key":"e_1_3_2_2_41_1","volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations.","author":"Xiong Lee","year":"2021","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N Bennett, Junaid Ahmed, and Arnold Overwijk. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.464"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498443"},{"key":"e_1_3_2_2_45_1","volume-title":"Repbert: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498","author":"Zhan Jingtao","year":"2020","unstructured":"Jingtao Zhan, Jiaxin Mao, Yiqun Liu, Min Zhang, and Shaoping Ma. 2020. Repbert: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498 (2020)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.284"},{"key":"e_1_3_2_2_47_1","volume-title":"Dynamicretriever: A pre-training model-based IR system with neither sparse nor dense index. arXiv preprint arXiv:2203.00537","author":"Zhou Yujia","year":"2022","unstructured":"Yujia Zhou, Jing Yao, Zhicheng Dou, Ledell Wu, and Ji-Rong Wen. 2022a. Dynamicretriever: A pre-training model-based IR system with neither sparse nor dense index. arXiv preprint arXiv:2203.00537 (2022)."},{"key":"e_1_3_2_2_48_1","volume-title":"Ultron: An ultimate retriever on corpus with a model-based indexer. arXiv preprint arXiv:2208.09257","author":"Zhou Yujia","year":"2022","unstructured":"Yujia Zhou, Jing Yao, Zhicheng Dou, Ledell Wu, Peitian Zhang, and Ji-Rong Wen. 2022b. Ultron: An ultimate retriever on corpus with a model-based indexer. arXiv preprint arXiv:2208.09257 (2022)."},{"key":"e_1_3_2_2_49_1","volume-title":"Bridging the gap between indexing and retrieval for differentiable search index with query generation. arXiv preprint arXiv:2206.10128","author":"Zhuang Shengyao","year":"2022","unstructured":"Shengyao Zhuang, Houxing Ren, Linjun Shou, Jian Pei, Ming Gong, Guido Zuccon, and Daxin Jiang. 2022. Bridging the gap between indexing and retrieval for differentiable search index with query generation. arXiv preprint arXiv:2206.10128 (2022)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715263","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715263","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T03:04:38Z","timestamp":1759892678000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715263"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":49,"alternative-id":["10.1145\/3701716.3715263","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715263","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}