{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T10:40:01Z","timestamp":1755859201924,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"name":"Funda\u00e7\u00e3o para a Ci\u00eancia e Tecnologia","award":["UIDB\/50021\/2020,UIDP\/04516\/2020"],"award-info":[{"award-number":["UIDB\/50021\/2020,UIDP\/04516\/2020"]}]},{"DOI":"10.13039\/501100006374","name":"Carnegie Mellon Portugal","doi-asserted-by":"publisher","award":["PRT\/BD\/153683\/2021"],"award-info":[{"award-number":["PRT\/BD\/153683\/2021"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Center For Responsible AI","award":["C645008882- 00000055"],"award-info":[{"award-number":["C645008882- 00000055"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730162","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:21:38Z","timestamp":1752456098000},"page":"2982-2986","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Aligning Web Query Generation with Ranking Objectives via Direct Preference Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6207-1934","authenticated-orcid":false,"given":"Jo\u00e3o","family":"Coelho","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3856-2936","authenticated-orcid":false,"given":"Bruno","family":"Martins","sequence":"additional","affiliation":[{"name":"Instituto Superior T\u00e9cnico and INESC-ID, Lisbon, Portugal"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6290-5719","authenticated-orcid":false,"given":"Jo\u00e3o","family":"Magalh\u00e3es","sequence":"additional","affiliation":[{"name":"NOVA School of Science and Technology, Lisbon, Portugal"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0392-4183","authenticated-orcid":false,"given":"Chenyan","family":"Xiong","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders. ArXiv","author":"BehnamGhader Parishad","year":"2024","unstructured":"Parishad BehnamGhader, Vaibhav Adlakha, Marius Mosbach, Dzmitry Bahdanau, Nicolas Chapados, and Siva Reddy. 2024. LLM2Vec: Large Language Models Are Secretly Powerful Text Encoders. ArXiv, Vol. abs\/2404.05961 (2024)."},{"key":"e_1_3_2_1_2_1","volume-title":"InPars: Data Augmentation for Information Retrieval using Large Language Models. ArXiv","author":"Bonifacio Luiz Henrique","year":"2022","unstructured":"Luiz Henrique Bonifacio, Hugo Abonizio, Marzieh Fadaee, and Rodrigo Frassetto Nogueira. 2022. InPars: Data Augmentation for Information Retrieval using Large Language Models. ArXiv, Vol. abs\/2202.05144 (2022)."},{"volume-title":"Encyclopedia of Database Systems.","author":"Craswell Nick","key":"e_1_3_2_1_3_1","unstructured":"Nick Craswell. 2009. Mean Reciprocal Rank. In Encyclopedia of Database Systems."},{"key":"e_1_3_2_1_4_1","volume-title":"Overview of the TREC 2020 Deep Learning Track. In Text REtrieval Conference (TREC).","author":"Craswell Nick","year":"2021","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, and Daniel Campos. 2021. Overview of the TREC 2020 Deep Learning Track. In Text REtrieval Conference (TREC)."},{"volume-title":"Overview of the TREC 2019 Deep Learning Track. In Text REtrieval Conference (TREC).","author":"Craswell Nick","key":"e_1_3_2_1_5_1","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M. Voorhees. 2020. Overview of the TREC 2019 Deep Learning Track. In Text REtrieval Conference (TREC)."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Learning Representations (ICLR","author":"Dai Zhuyun","year":"2023","unstructured":"Zhuyun Dai, Vincent Y. Zhao, Ji Ma, Yi Luan, Jianmo Ni, Jing Lu, Anton Bakalov, Kelvin Guu, Keith B. Hall, and Ming-Wei Chang. 2023. Promptagator: Few-shot Dense Retrieval From 8 Examples. In International Conference on Learning Representations (ICLR 2023)."},{"key":"e_1_3_2_1_7_1","unstructured":"Abhimanyu Dubey et al. 2024. The Llama 3 Herd of Models. ArXiv Vol. abs\/2407.21783 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"Scaling Laws For Dense Retrieval. In International Conference on Research and Development in Information Retrieval (SIGIR","author":"Fang Yan","year":"2024","unstructured":"Yan Fang, Jingtao Zhan, Qingyao Ai, Jiaxin Mao, Weihang Su, Jia Chen, and Yiqun Liu. 2024. Scaling Laws For Dense Retrieval. In International Conference on Research and Development in Information Retrieval (SIGIR 2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Unsupervised Corpus Aware Language Model Pre-training for Dense Passage Retrieval. In Annual Meeting of the Association for Computational Linguistics (ACL","author":"Gao Luyu","year":"2022","unstructured":"Luyu Gao and Jamie Callan. 2022. Unsupervised Corpus Aware Language Model Pre-training for Dense Passage Retrieval. In Annual Meeting of the Association for Computational Linguistics (ACL 2022)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28238-6_31"},{"key":"e_1_3_2_1_11_1","volume-title":"Enhancing Dense Retrievers' Robustness with Group-level Reweighting. ArXiv","author":"Han Peixuan","year":"2024","unstructured":"Peixuan Han, Zhenghao Liu, Zhiyuan Liu, and Chenyan Xiong. 2024. Enhancing Dense Retrievers' Robustness with Group-level Reweighting. ArXiv, Vol. abs\/2310.16605 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations (ICLR","author":"Hu Edward J.","year":"2022","unstructured":"Edward J. Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations (ICLR 2022), ."},{"key":"e_1_3_2_1_13_1","volume-title":"Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research","author":"Izacard Gautier","year":"2022","unstructured":"Gautier Izacard, Mathilde Caron, Lucas Hosseini, Sebastian Riedel, Piotr Bojanowski, Armand Joulin, and Edouard Grave. 2022. Unsupervised Dense Information Retrieval with Contrastive Learning. Transactions on Machine Learning Research (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems","author":"J\u00e4rvelin Kalervo","year":"2002","unstructured":"Kalervo J\u00e4rvelin and Jaana Kek\u00e4l\u00e4inen. 2002. Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems (2002)."},{"key":"e_1_3_2_1_15_1","volume-title":"Hugo Abonizio, Marzieh Fadaee, Roberto de Alencar Lotufo, Jakub Zavrel, and Rodrigo Frassetto Nogueira.","author":"Jeronymo Vitor","year":"2023","unstructured":"Vitor Jeronymo, Luiz Henrique Bonifacio, Hugo Abonizio, Marzieh Fadaee, Roberto de Alencar Lotufo, Jakub Zavrel, and Rodrigo Frassetto Nogueira. 2023. InPars-v2: Large Language Models as Efficient Dataset Generators for Information Retrieval. ArXiv, Vol. abs\/2301.01820 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Dense Passage Retrieval for Open-Domain Question Answering. In Conference on Empirical Methods in Natural Language Processing (EMNLP","author":"Karpukhin Vladimir","year":"2020","unstructured":"Vladimir Karpukhin, Barlas Oguz, Sewon Min, Patrick S. H. Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense Passage Retrieval for Open-Domain Question Answering. In Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models. ArXiv","author":"Lee Chankyu","year":"2024","unstructured":"Chankyu Lee, Rajarshi Roy, Mengyao Xu, Jonathan Raiman, Mohammad Shoeybi, Bryan Catanzaro, and Wei Ping. 2024b. NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models. ArXiv, Vol. abs\/2405.17428 (2024)."},{"key":"e_1_3_2_1_18_1","volume-title":"Gustavo Hern\u00e1ndez \u00c1brego, Weiqiang Shi, Nithi Gupta, Aditya Kusupati, Prateek Jain, Siddhartha Reddy Jonnalagadda, Ming-Wei Chang, and Iftekhar Naim.","author":"Lee Jinhyuk","year":"2024","unstructured":"Jinhyuk Lee, Zhuyun Dai, Xiaoqi Ren, Blair Chen, Daniel Cer, Jeremy R. Cole, Kai Hui, Michael Boratko, Rajvi Kapadia, Wen Ding, Yi Luan, Sai Meher Karthik Duddu, Gustavo Hern\u00e1ndez \u00c1brego, Weiqiang Shi, Nithi Gupta, Aditya Kusupati, Prateek Jain, Siddhartha Reddy Jonnalagadda, Ming-Wei Chang, and Iftekhar Naim. 2024a. Gecko: Versatile Text Embeddings Distilled from Large Language Models. ArXiv, Vol. abs\/2403.20327 (2024)."},{"key":"e_1_3_2_1_19_1","volume-title":"Latent Retrieval for Weakly Supervised Open Domain Question Answering. In Annual Meeting of the Association for Computational Linguistics (ACL","author":"Lee Kenton","year":"2019","unstructured":"Kenton Lee, Ming-Wei Chang, and Kristina Toutanova. 2019. Latent Retrieval for Weakly Supervised Open Domain Question Answering. In Annual Meeting of the Association for Computational Linguistics (ACL 2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.220"},{"key":"e_1_3_2_1_21_1","volume-title":"Drop your Decoder: Pre-training with Bag-of-Word Prediction for Dense Passage Retrieval. ArXiv","author":"Ma Guangyuan","year":"2024","unstructured":"Guangyuan Ma, Xing Wu, Zijia Lin, and Songlin Hu. 2024. Drop your Decoder: Pre-training with Bag-of-Word Prediction for Dense Passage Retrieval. ArXiv, Vol. abs\/2401.11248 (2024)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531772"},{"key":"e_1_3_2_1_23_1","volume-title":"MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In Workshop on Cognitive Computation: Integrating Neural and Symbolic Approaches.","author":"Nguyen Tri","year":"2016","unstructured":"Tri Nguyen, Mir Rosenberg, Xia Song, Jianfeng Gao, Saurabh Tiwary, Rangan Majumder, and Li Deng. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In Workshop on Cognitive Computation: Integrating Neural and Symbolic Approaches."},{"key":"e_1_3_2_1_24_1","unstructured":"Rodrigo Nogueira and Jimmy Lin. 2019. From doc2query to docTTTTTquery. Technical Report. University of Waterloo."},{"key":"e_1_3_2_1_25_1","volume-title":"Document Expansion by Query Prediction. ArXiv","author":"Nogueira Rodrigo Frassetto","year":"2019","unstructured":"Rodrigo Frassetto Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document Expansion by Query Prediction. ArXiv, Vol. abs\/1904.08375 (2019)."},{"key":"e_1_3_2_1_26_1","volume-title":"Token-level Proximal Policy Optimization for Query Generation. ArXiv","author":"Ouyang Yichen","year":"2024","unstructured":"Yichen Ouyang, Lu Wang, Fangkai Yang, Pu Zhao, Chenghua Huang, Jianfeng Liu, Bochen Pang, Yaming Yang, Yuefeng Zhan, Hao Sun, Qingwei Lin, Saravan Rajmohan, Weiwei Deng, Dongmei Zhang, Feng Sun, and Qi Zhang. 2024. Token-level Proximal Policy Optimization for Query Generation. ArXiv, Vol. abs\/2411.00722 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"ClueWeb22: 10 Billion Web Documents with Rich Information. In International Conference on Research and Development in Information Retrieval (SIGIR","author":"Overwijk Arnold","year":"2022","unstructured":"Arnold Overwijk, Chenyan Xiong, and Jamie Callan. 2022. ClueWeb22: 10 Billion Web Documents with Rich Information. In International Conference on Research and Development in Information Retrieval (SIGIR 2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS","author":"Rafailov Rafael","year":"2023","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D. Manning, Stefano Ermon, and Chelsea Finn. 2023. Direct Preference Optimization: Your Language Model is Secretly a Reward Model. In Conference on Neural Information Processing Systems (NeurIPS 2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Robertson and Hugo Zaragoza","author":"Stephen","year":"2009","unstructured":"Stephen E. Robertson and Hugo Zaragoza. 2009. The Probabilistic Relevance Framework: BM25 and Beyond. Found. Trends Inf. Retr. (2009)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proximal Policy Optimization Algorithms. ArXiv","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. ArXiv, Vol. abs\/1707.06347 (2017)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.923"},{"key":"e_1_3_2_1_32_1","volume-title":"Representation Learning with Contrastive Predictive Coding. ArXiv","author":"van den Oord A\u00e4ron","year":"2018","unstructured":"A\u00e4ron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation Learning with Contrastive Predictive Coding. ArXiv, Vol. abs\/1807.03748 (2018)."},{"key":"e_1_3_2_1_33_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Conference on Neural Information Processing Systems (NeurIPS 2017)."},{"key":"e_1_3_2_1_34_1","volume-title":"Text Embeddings by Weakly-Supervised Contrastive Pre-training. ArXiv","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. Text Embeddings by Weakly-Supervised Contrastive Pre-training. ArXiv, Vol. abs\/2212.03533 (2022)."},{"key":"e_1_3_2_1_35_1","volume-title":"Improving Text Embeddings with Large Language Models. In Annual Meeting of the Association for Computational Linguistics (ACL","author":"Wang Liang","year":"2024","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Linjun Yang, Rangan Majumder, and Furu Wei. 2024. Improving Text Embeddings with Large Language Models. In Annual Meeting of the Association for Computational Linguistics (ACL 2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"RetroMAE: Pre-Training Retrieval-oriented Language Models Via Masked Auto-Encoder. In Conference on Empirical Methods in Natural Language Processing (EMNLP","author":"Xiao Shitao","year":"2022","unstructured":"Shitao Xiao, Zheng Liu, Yingxia Shao, and Zhao Cao. 2022. RetroMAE: Pre-Training Retrieval-oriented Language Models Via Masked Auto-Encoder. In Conference on Empirical Methods in Natural Language Processing (EMNLP 2022)."},{"key":"e_1_3_2_1_37_1","volume-title":"CMT in TREC-COVID Round 2: Mitigating the Generalization Gaps from Web to Special Domain Search. ArXiv","author":"Xiong Chenyan","year":"2020","unstructured":"Chenyan Xiong, Zhenghao Liu, Si Sun, Zhuyun Dai, Kaitao Zhang, Shi Yu, Zhiyuan Liu, Hoifung Poon, Jianfeng Gao, and Paul Bennett. 2020. CMT in TREC-COVID Round 2: Mitigating the Generalization Gaps from Web to Special Domain Search. ArXiv, Vol. abs\/2011.01580 (2020)."},{"key":"e_1_3_2_1_38_1","volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations (ICLR","author":"Xiong Lee","year":"2021","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N. Bennett, Junaid Ahmed, and Arnold Overwijk. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations (ICLR 2021)."},{"key":"e_1_3_2_1_39_1","unstructured":"An Yang et al. 2024. Qwen2.5 Technical Report. ArXiv Vol. abs\/2412.15115 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730162","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T10:02:55Z","timestamp":1755856975000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730162"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":39,"alternative-id":["10.1145\/3726302.3730162","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730162","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}