{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:01Z","timestamp":1755866401288,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,7,13]],"date-time":"2026-07-13T00:00:00Z","timestamp":1783900800000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Huawei Innovation Research Program"},{"name":"CCF-Alimama Tech Kangaroo Fund","award":["No. 2024002"],"award-info":[{"award-number":["No. 2024002"]}]},{"name":"Collaborative Research Fund","award":["No.C1043-24GF"],"award-info":[{"award-number":["No.C1043-24GF"]}]},{"name":"Tencent Rhino-Bird Focused Research Program"},{"name":"U.S. national science foundation","award":["IIS-2336768"],"award-info":[{"award-number":["IIS-2336768"]}]},{"name":"CCF-Tencent Open Fund"},{"name":"Research Impact Fund","award":["No.R1015-23"],"award-info":[{"award-number":["No.R1015-23"]}]},{"name":"Huawei Fellowship"},{"name":"CCF-Ant Research Fund"},{"name":"Kuaishou"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730365","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:25:28Z","timestamp":1752456328000},"page":"4180-4183","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AgentIR: 2nd Workshop on Agent-based Information Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4712-3676","authenticated-orcid":false,"given":"Pengyue","family":"Jia","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6451-9299","authenticated-orcid":false,"given":"Qingpeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4413-281X","authenticated-orcid":false,"given":"Ling","family":"Pan","sequence":"additional","affiliation":[{"name":"HKUST, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6116-9115","authenticated-orcid":false,"given":"Xin","family":"Xin","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9273-9037","authenticated-orcid":false,"given":"Jin","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Amsterdam, Amsterdam, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5095-3377","authenticated-orcid":false,"given":"Li","family":"Zhao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-6205","authenticated-orcid":false,"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[{"name":"Baidu, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6095-8358","authenticated-orcid":false,"given":"Grace Hui","family":"Yang","sequence":"additional","affiliation":[{"name":"Georgetown University, Washington, DC, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Ricardo Baeza-Yates Berthier Ribeiro-Neto et al. 1999. Modern information retrieval. ACM press New York."},{"key":"e_1_3_2_1_2_1","volume-title":"Proc. of NeurIPS","author":"Bengio Emmanuel","year":"2021","unstructured":"Emmanuel Bengio, Moksh Jain, Maksym Korablyov, Doina Precup, and Yoshua Bengio. 2021. Flow network based generative models for non-iterative diverse candidate generation. Proc. of NeurIPS (2021)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210004"},{"key":"e_1_3_2_1_4_1","volume-title":"Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724","author":"Cai Qingpeng","year":"2023","unstructured":"Qingpeng Cai, Shuchang Liu, Xueliang Wang, Tianyou Zuo, Wentao Xie, Bin Yang, Dong Zheng, Peng Jiang, and Kun Gai. 2023a. Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724 (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657989"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jia Chen Jiaxin Mao Yiqun Liu Min Zhang and Shaoping Ma. 2020. A context-aware click model for web search. In WSDM.","DOI":"10.1145\/3336191.3371819"},{"key":"e_1_3_2_1_9_1","volume-title":"Attacking Black-box Recommendations via Copying Cross-domain User Profiles. arXiv preprint arXiv:2005.08147","author":"Fan Wenqi","year":"2020","unstructured":"Wenqi Fan, Tyler Derr, Xiangyu Zhao, Yao Ma, Hui Liu, Jianping Wang, Jiliang Tang, and Qing Li. 2020. Attacking Black-box Recommendations via Copying Cross-domain User Profiles. arXiv preprint arXiv:2005.08147 (2020)."},{"key":"e_1_3_2_1_10_1","unstructured":"Yingqiang Ge Shuchang Liu Ruoyuan Gao Yikun Xian Yunqi Li Xiangyu Zhao Changhua Pei Fei Sun Junfeng Ge Wenwu Ou et al. 2021. Towards Long-term Fairness in Recommendation. arXiv preprint arXiv:2101.03584 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Optimizing Sponsored Search Ranking Strategy by Deep Reinforcement Learning. arXiv preprint arXiv:1803.07347","author":"He Li","year":"2018","unstructured":"Li He, Liang Wang, Kaipeng Liu, Bo Wu, and Weinan Zhang. 2018. Optimizing Sponsored Search Ranking Strategy by Deep Reinforcement Learning. arXiv preprint arXiv:1803.07347 (2018)."},{"key":"e_1_3_2_1_12_1","volume-title":"Reinforcement Learning to Rank in E-Commerce Search Engine: Formalization, Analysis, and Application. arXiv preprint arXiv:1803.00710","author":"Hu Yujing","year":"2018","unstructured":"Yujing Hu, Qing Da, Anxiang Zeng, Yang Yu, and Yinghui Xu. 2018. Reinforcement Learning to Rank in E-Commerce Search Engine: Formalization, Analysis, and Application. arXiv preprint arXiv:1803.00710 (2018)."},{"key":"e_1_3_2_1_13_1","volume-title":"Reinforcement Learning for Optimizing RAG for Domain Chatbots. arXiv preprint arXiv:2401.06800","author":"Kulkarni Mandar","year":"2024","unstructured":"Mandar Kulkarni, Praveen Tangarajan, Kyung Kim, and Anusua Trivedi. 2024. Reinforcement Learning for Optimizing RAG for Domain Chatbots. arXiv preprint arXiv:2401.06800 (2024)."},{"key":"e_1_3_2_1_14_1","unstructured":"Johnny Flame Lee. 2020. Evaluation of a Deep Reinforcement Learning Method For Query Reformulation. Ph.D. Dissertation."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583244"},{"key":"e_1_3_2_1_16_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401099"},{"key":"e_1_3_2_1_18_1","volume-title":"Proc. of NeurIPS","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al., 2022. Training language models to follow instructions with human feedback. Proc. of NeurIPS (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011393"},{"key":"e_1_3_2_1_20_1","volume-title":"Proc. of NeurIPS","author":"Pan Ling","year":"2020","unstructured":"Ling Pan, Qingpeng Cai, and Longbo Huang. 2020. Softmax deep double deterministic policy gradients. Proc. of NeurIPS (2020)."},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of ICML.","author":"Pan Ling","year":"2022","unstructured":"Ling Pan, Longbo Huang, Tengyu Ma, and Huazhe Xu. 2022a. Plan better amid conservatism: Offline multi-agent reinforcement learning with actor rectification. In Proc. of ICML."},{"key":"e_1_3_2_1_22_1","volume-title":"Proc. of ICLR.","author":"Pan Ling","year":"2024","unstructured":"Ling Pan, Moksh Jain, Kanika Madan, and Yoshua Bengio. 2024. Pre-Training and Fine-Tuning Generative Flow Networks. In Proc. of ICLR."},{"key":"e_1_3_2_1_23_1","volume-title":"Proc. of ICML.","author":"Pan Ling","year":"2023","unstructured":"Ling Pan, Nikolay Malkin, Dinghuai Zhang, and Yoshua Bengio. 2023a. Better training of gflownets with local credit and incomplete trajectories. In Proc. of ICML."},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of NeurIPS","author":"Pan Ling","year":"2021","unstructured":"Ling Pan, Tabish Rashid, Bei Peng, Longbo Huang, and Shimon Whiteson. 2021. Regularized softmax deep multi-agent q-learning. Proc. of NeurIPS (2021)."},{"key":"e_1_3_2_1_25_1","volume-title":"Proc. of ICLR.","author":"Pan Ling","year":"2022","unstructured":"Ling Pan, Dinghuai Zhang, Aaron Courville, Longbo Huang, and Yoshua Bengio. 2022b. Generative Augmented Flow Networks. In Proc. of ICLR."},{"key":"e_1_3_2_1_26_1","unstructured":"Ling Pan Dinghuai Zhang Moksh Jain Longbo Huang and Yoshua Bengio. 2023b. Stochastic generative flow networks. In Uncertainty in Artificial Intelligence."},{"key":"e_1_3_2_1_27_1","unstructured":"Razieh Rahimi and Grace Hui Yang. [n.d.]. Modeling Exploration of Intrinsically Diverse Search Tasks as Markov Decision Processes. ( [n. d.])."},{"key":"e_1_3_2_1_28_1","volume-title":"Jieer Ouyang, Yongjun Xu, and Wei Shi.","author":"Wang Zheng","year":"2024","unstructured":"Zheng Wang, Shu Xian Teo, Jieer Ouyang, Yongjun Xu, and Wei Shi. 2024. M-RAG: Reinforcing Large Language Model Performance through Retrieval-Augmented Generation with Multiple Partitions. arXiv preprint arXiv:2405.16420 (2024)."},{"key":"e_1_3_2_1_29_1","unstructured":"Likang Wu Zhi Zheng Zhaopeng Qiu Hao Wang Hongchao Gu Tingjia Shen Chuan Qin Chen Zhu Hengshu Zhu Qi Liu et al. 2023. A Survey on Large Language Models for Recommendation. arXiv preprint arXiv:2305.19860 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615303"},{"key":"e_1_3_2_1_31_1","volume-title":"PrefRec: Recommender Systems with Human Preferences for Reinforcing Long-term User Engagement. arXiv preprint arXiv:2212.02779","author":"Xue Wanqi","year":"2022","unstructured":"Wanqi Xue, Qingpeng Cai, Zhenghai Xue, Shuo Sun, Shuchang Liu, Dong Zheng, Peng Jiang, Kun Gai, and Bo An. 2022. PrefRec: Recommender Systems with Human Preferences for Reinforcing Long-term User Engagement. arXiv preprint arXiv:2212.02779 (2022)."},{"key":"e_1_3_2_1_32_1","unstructured":"Wanqi Xue Qingpeng Cai Ruohan Zhan Dong Zheng Peng Jiang and Bo An. 2023. ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor (ICLR'23). (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Dynamic information retrieval modeling. Synthesis lectures on information concepts, retrieval, and services","author":"Yang Grace Hui","year":"2016","unstructured":"Grace Hui Yang, Marc Sloan, and Jun Wang. 2016. Dynamic information retrieval modeling. Synthesis lectures on information concepts, retrieval, and services (2016)."},{"key":"e_1_3_2_1_34_1","volume-title":"Proc. of NeurIPS","author":"Zhang Dinghuai","year":"2024","unstructured":"Dinghuai Zhang, Hanjun Dai, Nikolay Malkin, Aaron C Courville, Yoshua Bengio, and Ling Pan. 2024. Let the flows tell: Solving graph combinatorial problems with GFlowNets. Proc. of NeurIPS (2024)."},{"key":"e_1_3_2_1_35_1","volume-title":"Aaron Courville, and Yoshua Bengio.","author":"Zhang Dinghuai","year":"2023","unstructured":"Dinghuai Zhang, Ling Pan, Ricky TQ Chen, Aaron Courville, and Yoshua Bengio. 2023. Distributional gflownets with quantile flows. arXiv preprint arXiv:2302.05793 (2023)."},{"volume-title":"DRL4IR: 2nd Workshop on Deep Reinforcement Learning for Information Retrieval","author":"Zhang Weinan","key":"e_1_3_2_1_36_1","unstructured":"Weinan Zhang, Xiangyu Zhao, Li Zhao, Dawei Yin, and Grace Hui Yang. 2021. DRL4IR: 2nd Workshop on Deep Reinforcement Learning for Information Retrieval."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401467"},{"key":"e_1_3_2_1_38_1","volume-title":"KuaiSim: A comprehensive simulator for recommender systems. arXiv preprint arXiv:2309.12645","author":"Zhao Kesen","year":"2023","unstructured":"Kesen Zhao, Shuchang Liu, Qingpeng Cai, Xiangyu Zhao, Ziru Liu, Dong Zheng, Peng Jiang, and Kun Gai. 2023. KuaiSim: A comprehensive simulator for recommender systems. arXiv preprint arXiv:2309.12645 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531703"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730365","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730365","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:19:41Z","timestamp":1755865181000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730365"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":43,"alternative-id":["10.1145\/3726302.3730365","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730365","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}