{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:08:32Z","timestamp":1775815712239,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00521-024-10649-6","type":"journal-article","created":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T07:38:06Z","timestamp":1731915486000},"page":"499-511","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Real-time bidding with multi-agent reinforcement learning in multi-channel display advertising"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8783-3798","authenticated-orcid":false,"given":"Chen","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Baoyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siyao","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1243-0123","authenticated-orcid":false,"given":"Keming","family":"Mao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiyu","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingyu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,18]]},"reference":[{"key":"10649_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel P, Andrew Y Ng (2004) Apprenticeship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on Machine learning, page\u00a01,","DOI":"10.1145\/1015330.1015430"},{"key":"10649_CR2","doi-asserted-by":"crossref","unstructured":"Abji N, Garcia AL (2010) An auction-based approach to spectrum allocation using multi-agent reinforcement learning. In: 21st Annual IEEE International Symposium on Personal, Indoor and Mobile Radio Communications, pages 2233\u20132238. IEEE,","DOI":"10.1109\/PIMRC.2010.5671682"},{"key":"10649_CR3","doi-asserted-by":"crossref","unstructured":"Aggarwal G, Badanidiyuru A, Mehta A (2019) Autobidding with constraints. In: International Conference on Web and Internet Economics, pages 17\u201330. Springer,","DOI":"10.1007\/978-3-030-35389-6_2"},{"key":"10649_CR4","doi-asserted-by":"crossref","unstructured":"Akrour R, Schoenauer M, Sebag M (2011) Preference-based policy learning. In Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011. In: Proceedings, Part I 11, pages 12\u201327. Springer,","DOI":"10.1007\/978-3-642-23780-5_11"},{"key":"10649_CR5","doi-asserted-by":"crossref","unstructured":"Akrour R, Schoenauer M, Sebag M (2012) April: Active preference learning-based reinforcement learning. In Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2012, Bristol, UK, September 24-28, 2012. Proceedings, Part II 23, pages 116\u2013131. Springer,","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"10649_CR6","unstructured":"Altman E (1999) Constrained Markov decision processes: stochastic modeling. Routledge,"},{"key":"10649_CR7","doi-asserted-by":"crossref","unstructured":"Cai H, Ren K, Zhang W, Malialis K, Wang J, Yu Y, Guo D (2017) Real-time bidding by reinforcement learning in display advertising. In: Proceedings of the Tenth ACM International Conference on Web Search and Data Mining, pages 661\u2013670,","DOI":"10.1145\/3018661.3018702"},{"key":"10649_CR8","unstructured":"Christiano PF, Leike J, Brown T, Martic M, Legg S, Amodei D.(2017) Deep reinforcement learning from human preferences. Advances in neural information processing systems, 30,"},{"key":"10649_CR9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2014.X.031","author":"C Daniel","year":"2014","unstructured":"Daniel C, Viering M, Metz J, Kroemer O, Peters J (2014) Active reward learning. Robot Sci syst. https:\/\/doi.org\/10.15607\/RSS.2014.X.031","journal-title":"Robot Sci syst"},{"key":"10649_CR10","doi-asserted-by":"crossref","unstructured":"Du M, Sassioui R, Varisteas G, State R, Brorsson M, Cherkaoui O et al (2017) Improving real-time bidding using a constrained markov decision process. In: International conference on advanced data mining and applications, pages 711\u2013726. Springer,","DOI":"10.1007\/978-3-319-69179-4_50"},{"issue":"3","key":"10649_CR11","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1257\/jep.23.3.37","volume":"23","author":"David S Evans","year":"2009","unstructured":"Evans David S (2009) The online advertising industry: economics, evolution, and privacy. J econ perspect 23(3):37\u201360","journal-title":"J econ perspect"},{"key":"10649_CR12","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1605.06676","author":"Jakob Foerster","year":"2016","unstructured":"Foerster Jakob, Assael Ioannis Alexandros, De Freitas Nando, Whiteson Shimon (2016) Learning to communicate with deep multi-agent reinforcement learning. Adv neural inform process syst. https:\/\/doi.org\/10.48550\/arXiv.1605.06676","journal-title":"Adv neural inform process syst"},{"key":"10649_CR13","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International conference on machine learning, pages 1587\u20131596. PMLR,"},{"key":"10649_CR14","unstructured":"Fujimoto S, Meger D, Precup D (2019) Off-policy deep reinforcement learning without exploration. In: International conference on machine learning, pages 2052\u20132062. PMLR,"},{"issue":"3","key":"10649_CR15","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1287\/mksc.1100.0583","volume":"30","author":"Avi Goldfarb","year":"2011","unstructured":"Goldfarb Avi, Tucker Catherine (2011) Online display advertising: targeting and obtrusiveness. Market Sci 30(3):389\u2013404","journal-title":"Market Sci"},{"key":"10649_CR16","doi-asserted-by":"crossref","unstructured":"Jayesh K Gupta, Egorov M, Kochenderfer M (2017) Cooperative multi-agent control using deep reinforcement learning. In: International conference on autonomous agents and multiagent systems, pages 66\u201383. Springer,","DOI":"10.1007\/978-3-319-71682-4_5"},{"issue":"5364","key":"10649_CR17","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1126\/science.280.5364.682","volume":"280","author":"Garrett Hardin","year":"1998","unstructured":"Hardin Garrett (1998) Extensions of\" the tragedy of the commons\". Science 280(5364):682\u2013683","journal-title":"Science"},{"key":"10649_CR18","doi-asserted-by":"crossref","unstructured":"He Y, Chen X, Wu D, Pan J, Tan Q, Yu C, Xu J, Zhu X (2021) A unified solution to constrained bidding in online display advertising. In: Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining, pages 2993\u20133001,","DOI":"10.1145\/3447548.3467199"},{"key":"10649_CR19","first-page":"242","volume":"98","author":"J Hu","year":"1998","unstructured":"Hu J, Wellman MP (1998) Multiagent reinforcement learning: theoretical framework and an algorithm. InICML 98:242\u2013250","journal-title":"InICML"},{"key":"10649_CR20","first-page":"6952","volume":"36","author":"Lu Wen Huang","year":"2022","unstructured":"Wen Huang Lu, Zhang Xintao Wu (2022) Achieving counterfactual fairness for causal bandit. In Proceed AAAI Confer Artific Intell 36:6952\u20136959","journal-title":"In Proceed AAAI Confer Artific Intell"},{"key":"10649_CR21","doi-asserted-by":"crossref","unstructured":"Jin J, Song C, Li H, Gai K, Wang J, Zhang W (2018) Real-time bidding with multi-agent reinforcement learning in display advertising. In: Proceedings of the 27th ACM international conference on information and knowledge management, pages 2193\u20132201,","DOI":"10.1145\/3269206.3272021"},{"key":"10649_CR22","doi-asserted-by":"crossref","unstructured":"Knox WB, Stone P. (2009) Interactively shaping agents via human reinforcement: The tamer framework. In: Proceedings of the fifth international conference on Knowledge capture, pages 9\u201316,","DOI":"10.1145\/1597735.1597738"},{"key":"10649_CR23","unstructured":"Kostrikov I, Nair A, Levine S (2021) Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169,"},{"key":"10649_CR24","unstructured":"Lee K, Smith L, Abbeel P (2021) Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. arXiv preprint arXiv:2106.05091,"},{"key":"10649_CR25","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1706.02275","author":"Ryan Lowe","year":"2017","unstructured":"Lowe Ryan, Wu Yi I, Tamar Aviv, Harb Jean, Abbeel OpenAI Pieter, Mordatch Igor (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Adv neural inform process syst. https:\/\/doi.org\/10.48550\/arXiv.1706.02275","journal-title":"Adv neural inform process syst"},{"key":"10649_CR26","doi-asserted-by":"crossref","unstructured":"Lu J, Yang C, Gao X, Wang L, Li C, Chen G (2019) Reinforcement learning with sequential information clustering in real-time bidding. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, pages 1633\u20131641,","DOI":"10.1145\/3357384.3358027"},{"key":"10649_CR27","doi-asserted-by":"crossref","unstructured":"Mordatch I, Abbeel P (2018) Emergence of grounded compositional language in multi-agent populations. In: Proceedings of the AAAI Conference on Artificial Intelligence, volume\u00a032,","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"10649_CR28","unstructured":"Nair S, Mitchell E, Chen K, Savarese S, Finn C et al (2022) Learning language-conditioned robot behavior from offline data and crowd-sourced annotation. In: Conference on Robot Learning, pages 1303\u20131315. PMLR,"},{"key":"10649_CR29","first-page":"2","volume":"1","author":"Andrew Y Ng","year":"2000","unstructured":"Ng Andrew Y (2000) Algorithms for inverse reinforcement learning. Incml 1:2","journal-title":"Incml"},{"key":"10649_CR30","doi-asserted-by":"crossref","unstructured":"Perlich C, Dalessandro B, Hook R, Stitelman O, Raeder T, Provost F (2012) Bid optimizing and inventory scoring in targeted online advertising. In: Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining, pages 804\u2013812,","DOI":"10.1145\/2339530.2339655"},{"key":"10649_CR31","doi-asserted-by":"crossref","unstructured":"Sadigh D, Dragan A, Sastry S, Seshia S. (2017) Active preference-based learning of reward functions","DOI":"10.15607\/RSS.2017.XIII.053"},{"key":"10649_CR32","first-page":"4902","volume":"33","author":"Jing-Cheng Shi","year":"2019","unstructured":"Shi Jing-Cheng, Yang Yu, Da Qing, Chen Shi Yong, Zeng An Xiang (2019) Virtual-taobao: virtualizing real-world online retail environment for reinforcement learning. In Proceed AAAI Confer Artific Intell 33:4902\u20134909","journal-title":"In Proceed AAAI Confer Artific Intell"},{"key":"10649_CR33","first-page":"3008","volume":"33","author":"Nisan Stiennon","year":"2020","unstructured":"Stiennon Nisan, Ouyang Long, Jeffrey Wu, Ziegler Daniel, Lowe Ryan, Voss Chelsea, Radford Alec, Amodei Dario, Christiano Paul F et al (2020) Learning to summarize with human feedback. Adv Neural Inform Process Syst 33:3008\u20133021","journal-title":"Adv Neural Inform Process Syst"},{"key":"10649_CR34","unstructured":"Sutton RS, McAllester D, Singh S, Mansour Y (1999) Policy gradient methods for reinforcement learning with function approximation. Advances in neural information processing systems, 12,"},{"key":"10649_CR35","doi-asserted-by":"crossref","unstructured":"Tan M (1993) Multi-agent reinforcement learning: Independent vs. cooperative agents. In: Proceedings of the tenth international conference on machine learning, pages 330\u2013337","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"10649_CR36","doi-asserted-by":"crossref","unstructured":"Wang J, Yuan S (2015) Real-time bidding: A new frontier of computational advertising research. In: Proceedings of the Eighth ACM International Conference on Web Search and Data Mining, pages 415\u2013416,","DOI":"10.1145\/2684822.2697041"},{"issue":"4\u20135","key":"10649_CR37","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1561\/1500000049","volume":"11","author":"Wang Jun","year":"2017","unstructured":"Jun Wang, Weinan Zhang, Shuai Yuan (2017) Display advertising with real-time bidding (rtb) and behavioural targeting. Found Trends\u00ae in Inform Retri 11(4\u20135):297\u2013435","journal-title":"Found Trends\u00ae in Inform Retri"},{"key":"10649_CR38","unstructured":"Wang Y, Liu J, Liu Y, Hao J, He Y, Hu J, Yan WP, Li M. (2017) Ladder: A human-level bidding agent for large-scale real-time online auctions. arXiv preprint arXiv:1708.05565,"},{"key":"10649_CR39","first-page":"1442","volume":"25","author":"Aaron Wilson","year":"2012","unstructured":"Wilson Aaron, Fern Alan, Tadepalli Prasad (2012) A bayesian approach for policy learning from trajectory preference queries. Adv neural inform process syst 25:1442\u20131450","journal-title":"Adv neural inform process syst"},{"key":"10649_CR40","doi-asserted-by":"crossref","unstructured":"Wu D, Chen X, Yang X, Wang H, Tan Q, Zhang X, Xu J, Gai K. (2018) Budget constrained bidding by model-free reinforcement learning in display advertising. In: Proceedings of the 27th ACM International Conference on Information and Knowledge Management, pages 1443\u20131451,","DOI":"10.1145\/3269206.3271748"},{"key":"10649_CR41","doi-asserted-by":"crossref","unstructured":"Yang X, Sun D, Zhu R, Deng T, Guo Z, Ding Z, Qin S, Zhu Y (2019) Aiads: Automated and intelligent advertising system for sponsored search. In: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pages 1881\u20131890,","DOI":"10.1145\/3292500.3330782"},{"key":"10649_CR42","first-page":"10299","volume":"34","author":"Yiqin Yang","year":"2021","unstructured":"Yang Yiqin, Ma Xiaoteng, Li Chenghao, Zheng Zewu, Zhang Qiyuan, Huang Gao, Yang Jun, Zhao Qianchuan (2021) Believe what you see: implicit constraint approach for offline multi-agent reinforcement learning. Adv Neural Inform Process Syst 34:10299\u201310312","journal-title":"Adv Neural Inform Process Syst"},{"key":"10649_CR43","doi-asserted-by":"crossref","unstructured":"Yuan S, Wang J, Zhao X (2013) Real-time bidding for online advertising: measurement and analysis. In: Proceedings of the seventh international workshop on data mining for online advertising, pages 1\u20138,","DOI":"10.1145\/2501040.2501980"},{"key":"10649_CR44","doi-asserted-by":"crossref","unstructured":"Zhang W, Wang J (2015) Statistical arbitrage mining for display advertising. In: Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pages 1465\u20131474,","DOI":"10.1145\/2783258.2783269"},{"key":"10649_CR45","doi-asserted-by":"crossref","unstructured":"Zhang W, Yuan S, Wang J (2014) Optimal real-time bidding for display advertising. In: Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining, pages 1077\u20131086,","DOI":"10.1145\/2623330.2623633"},{"key":"10649_CR46","first-page":"20410","volume":"34","author":"Yang Zhang","year":"2021","unstructured":"Zhang Yang, Tang Bo, Yang Qingyu, An Dou, Tang Hongyin, Xi Chenyang, Li Xueying, Xiong Feiyu (2021) Bcorle (\u03bb): an offline reinforcement learning and evaluation framework for coupons allocation in e-commerce market. Adv Neural Inform Process Syst 34:20410\u201320422","journal-title":"Adv Neural Inform Process Syst"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10649-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-10649-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-10649-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T02:18:13Z","timestamp":1737598693000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-10649-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,18]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["10649"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-10649-6","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,18]]},"assertion":[{"value":"8 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Written informed consent for publication of the data used in this paper is obtained from the Software College, Northeastern University and all authors, and does not involve any ethical conflicts.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and Informed Consent for Data Used"}}]}}