{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:20:48Z","timestamp":1742912448699,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031264115"},{"type":"electronic","value":"9783031264122"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-26412-2_13","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T10:03:54Z","timestamp":1678961034000},"page":"200-215","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Reinforcement Learning for\u00a0Multi-Agent Stochastic Resource Collection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8083-7323","authenticated-orcid":false,"given":"Niklas","family":"Strauss","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8829-0863","authenticated-orcid":false,"given":"David","family":"Winkel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9724-4009","authenticated-orcid":false,"given":"Max","family":"Berrendorf","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6566-6343","authenticated-orcid":false,"given":"Matthias","family":"Schubert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"issue":"12","key":"13_CR1","doi-asserted-by":"publisher","first-page":"7804","DOI":"10.1109\/TITS.2020.3009289","volume":"22","author":"G Bono","year":"2020","unstructured":"Bono, G., Dibangoye, J.S., Simonin, O., Matignon, L., Pereyron, F.: Solving multi-agent routing problems using deep attention mechanisms. IEEE Trans. Intell. Transp. Syst. 22(12), 7804\u20137813 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"13_CR2","unstructured":"Chakravorty, J., et al.: Option-critic in cooperative multi-agent systems. arXiv preprint arXiv:1911.12825 (2019)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: AAAI, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"issue":"1","key":"13_CR4","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/0377-2217(95)00050-X","volume":"88","author":"M Gendreau","year":"1996","unstructured":"Gendreau, M., Laporte, G., S\u00e9guin, R.: Stochastic vehicle routing. Eur. J. Oper. Res. 88(1), 3\u201312 (1996)","journal-title":"Eur. J. Oper. Res."},{"key":"13_CR5","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1007\/978-3-030-29911-8_7","volume-title":"PRICAI 2019: Trends in Artificial Intelligence","author":"D Han","year":"2019","unstructured":"Han, D., B\u00f6hmer, W., Wooldridge, M., Rogers, A.: Multi-agent hierarchical reinforcement learning with dynamic termination. In: Nayak, A.C., Sharma, A. (eds.) PRICAI 2019. LNCS (LNAI), vol. 11671, pp. 80\u201392. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-29911-8_7"},{"issue":"6","key":"13_CR6","doi-asserted-by":"publisher","first-page":"750","DOI":"10.1007\/s10458-019-09421-1","volume":"33","author":"P Hernandez-Leal","year":"2019","unstructured":"Hernandez-Leal, P., Kartal, B., Taylor, M.E.: A survey and critique of multiagent deep reinforcement learning. Auton. Agents Multi-Agent Syst. 33(6), 750\u2013797 (2019). https:\/\/doi.org\/10.1007\/s10458-019-09421-1","journal-title":"Auton. Agents Multi-Agent Syst."},{"key":"13_CR7","unstructured":"Hu, J., Wellman, M.P., et al.: Multiagent reinforcement learning: theoretical framework and an algorithm. In: ICML, vol. 98, pp. 242\u2013250. Citeseer (1998)"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Kim, J., Kim, K.: Optimizing large-scale fleet management on a road network using multi-agent deep reinforcement learning with graph neural network. In: ITSC, pp. 990\u2013995. IEEE (2021)","DOI":"10.1109\/ITSC48978.2021.9565029"},{"key":"13_CR9","unstructured":"Kool, W., Van Hoof, H., Welling, M.: Attention, learn to solve routing problems! arXiv preprint arXiv:1803.08475 (2018)"},{"key":"13_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1007\/3-540-49116-3_38","volume-title":"STACS 99","author":"E Koutsoupias","year":"1999","unstructured":"Koutsoupias, E., Papadimitriou, C.: Worst-case equilibria. In: Meinel, C., Tison, S. (eds.) STACS 1999. LNCS, vol. 1563, pp. 404\u2013413. Springer, Heidelberg (1999). https:\/\/doi.org\/10.1007\/3-540-49116-3_38"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Kumar, S.N., Panneerselvam, R.: A survey on the vehicle routing problem and its variants (2012)","DOI":"10.4236\/iim.2012.43010"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Li, M., et al.: Efficient ridesharing order dispatching with mean field multi-agent reinforcement learning. In: The world wide web conference, pp. 983\u2013994 (2019)","DOI":"10.1145\/3308558.3313433"},{"key":"13_CR13","first-page":"1","volume":"99","author":"Z Liu","year":"2020","unstructured":"Liu, Z., Li, J., Wu, K.: Context-aware taxi dispatching at city-scale using deep reinforcement learning. IEEE Trans. Intell. Transp. Syst. 99, 1\u201314 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Makar, R., Mahadevan, S., Ghavamzadeh, M.: Hierarchical multi-agent reinforcement learning. In: Proceedings of the fifth International Conference on Autonomous agents, pp. 246\u2013253 (2001)","DOI":"10.1145\/375735.376302"},{"key":"13_CR15","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"13_CR16","unstructured":"Nazari, M., Oroojlooy, A., Snyder, L., Tak\u00e1c, M.: Playing Atari with deep reinforcement learning. In: Advance Neural Information Processing System, vol. 31 (2018)"},{"key":"13_CR17","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1007\/978-981-15-5577-0_51","volume-title":"Artificial Intelligence Algorithms and Applications","author":"B Peng","year":"2020","unstructured":"Peng, B., Wang, J., Zhang, Z.: A deep reinforcement learning algorithm using dynamic attention model for vehicle routing problems. In: Li, K., Li, W., Wang, H., Liu, Y. (eds.) ISICA 2019. CCIS, vol. 1205, pp. 636\u2013650. Springer, Singapore (2020). https:\/\/doi.org\/10.1007\/978-981-15-5577-0_51"},{"issue":"16","key":"13_CR18","doi-asserted-by":"publisher","first-page":"12033","DOI":"10.1007\/s00521-019-04237-2","volume":"32","author":"KK Qin","year":"2020","unstructured":"Qin, K.K., Shao, W., Ren, Y., Chan, J., Salim, F.D.: Solving multiple travelling officers problem with population-based optimization algorithms. Neural Comput. Appl. 32(16), 12033\u201312059 (2020)","journal-title":"Neural Comput. Appl."},{"key":"13_CR19","unstructured":"Rashid, T., Samvelyan, M., Schroeder, C., Farquhar, G., Foerster, J., Whiteson, S.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML, pp. 4295\u20134304. PMLR (2018)"},{"key":"13_CR20","unstructured":"Rohanimanesh, K., Mahadevan, S.: Learning to take concurrent actions. In: Advance Neural Information Processing System, vol. 15 (2002)"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Schmoll, S., Schubert, M.: Semi-markov reinforcement learning for stochastic resource collection. In: IJCAI, pp. 3349\u20133355 (2021)","DOI":"10.24963\/ijcai.2020\/463"},{"issue":"2","key":"13_CR22","doi-asserted-by":"publisher","first-page":"802","DOI":"10.1109\/JIOT.2017.2759218","volume":"5","author":"W Shao","year":"2017","unstructured":"Shao, W., Salim, F.D., Gu, T., Dinh, N.T., Chan, J.: Traveling officer problem: managing car parking violations efficiently using sensor data. IEEE Internet Things J. 5(2), 802\u2013810 (2017)","journal-title":"IEEE Internet Things J."},{"key":"13_CR23","unstructured":"Sukhbaatar, S., Fergus, R., et al.: Learning multiagent communication with backpropagation. In: Advance Neural Information Processing System, vol. 29 (2016)"},{"key":"13_CR24","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)"},{"issue":"4","key":"13_CR25","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395","volume":"12","author":"A Tampuu","year":"2017","unstructured":"Tampuu, A., Matiisen, T., Kodelja, D., Kuzovkin, I., Korjus, K., Aru, J., Aru, J., Vicente, R.: Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4), e0172395 (2017)","journal-title":"PLoS ONE"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: Independent vs. cooperative agents. In: ICML, pp. 330\u2013337 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"13_CR27","unstructured":"Tang, H., et al.: Hierarchical deep multiagent reinforcement learning with temporal abstraction. arXiv preprint arXiv:1809.09332 (2018)"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Tang, X., et al.: A deep value-network based approach for multi-driver order dispatching. In: Proceedings of the 25th ACM SIGKDD, pp. 1780\u20131790 (2019)","DOI":"10.1145\/3292500.3330724"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: AAAI, vol. 30 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"13_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advance Neural Information Processing System, vol. 30 (2017)"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Zheng, L., et al.: Magent: a many-agent reinforcement learning platform for artificial collective intelligence. In: AAAI, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11371"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, M., et al.: Multi-agent reinforcement learning for order-dispatching via order-vehicle distribution matching. In: Proceedings of the 28th ACM Int\u2019l Conf on Information and Knowledge Management, pp. 2645\u20132653 (2019)","DOI":"10.1145\/3357384.3357799"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-26412-2_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T10:06:45Z","timestamp":1678961205000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-26412-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031264115","9783031264122"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-26412-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1060","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"236","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17 demo track papers have been accepted from 28 submissions","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}