{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:14:59Z","timestamp":1743099299850,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030675394"},{"type":"electronic","value":"9783030675400"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-67540-0_20","type":"book-chapter","created":{"date-parts":[[2021,1,21]],"date-time":"2021-01-21T13:12:57Z","timestamp":1611234777000},"page":"337-354","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["BiC-DDPG: Bidirectionally-Coordinated Nets for Deep Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Gongju","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dianxi","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yajie","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,1,22]]},"reference":[{"key":"20_CR1","unstructured":"Brown, N., Sandholm, T.: Safe and nested endgame solving for imperfect-information games. In: Workshops at the Thirty-First AAAI Conference on Artificial Intelligence (2017)"},{"key":"20_CR2","unstructured":"Bubeck, S., Cesa-Bianchi, N.: Regret analysis of stochastic and nonstochastic multi-armed bandit problems. Found. Trends Mach. Learn. 5(1), QT06, 1\u20137, 9\u201321, 23\u201343, 45\u201365, 67\u2013105, 107\u2013115, 117\u2013127 (2012)"},{"key":"20_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-540-75538-8_7","volume-title":"Computers and Games","author":"R Coulom","year":"2007","unstructured":"Coulom, R.: Efficient selectivity and backup operators in Monte-Carlo tree search. In: van den Herik, H.J., Ciancarini, P., Donkers, H.H.L.M.J. (eds.) CG 2006. LNCS, vol. 4630, pp. 72\u201383. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-75538-8_7"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Dudani, S.A.: The distance-weighted k-nearest-neighbor rule. IEEE Trans. Syst. Man Cybern. SMC-6, 325\u2013327 (1976)","DOI":"10.1109\/TSMC.1976.5408784"},{"key":"20_CR5","unstructured":"Dulac-Arnold, G., et al.: Deep reinforcement learning in large discrete action spaces. http:\/\/arxiv.org\/abs\/ArtificialIntelligence (2015)"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients (2017)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"20_CR7","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1016\/j.isatra.2019.03.012","volume":"93","author":"J Liu","year":"2019","unstructured":"Liu, J., Li, P., Chen, W., Qin, K., Qi, L.: Distributed formation control of fractional-order multi-agent systems with relative damping and nonuniform time-delays. ISA Trans. 93, 189\u2013198 (2019)","journal-title":"ISA Trans."},{"key":"20_CR8","unstructured":"Kolokoltsov, V.N., Malafeyev, O.A.: Multi-agent interaction and nonlinear Markov games (2019)"},{"issue":"6","key":"20_CR9","first-page":"A187","volume":"8","author":"TP Lillicrap","year":"2015","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. Comput. Sci. 8(6), A187 (2015)","journal-title":"Comput. Sci."},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. In: Machine Learning Proceedings 1994, pp. 157\u2013163. Elsevier (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"20_CR11","first-page":"322","volume":"1","author":"ML Littman","year":"2001","unstructured":"Littman, M.L.: Friend-or-foe Q-learning in general-sum games. ICML 1, 322\u2013328 (2001)","journal-title":"ICML"},{"key":"20_CR12","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/S1389-0417(01)00015-8","volume":"2","author":"ML Littman","year":"2001","unstructured":"Littman, M.L.: Value-function reinforcement learning in Markov games. Cogn. Syst. Res. 2, 55\u201366 (2001)","journal-title":"Cogn. Syst. Res."},{"key":"20_CR13","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. ArXiv abs\/1706.02275 (2017)"},{"key":"20_CR14","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. ArXiv abs\/1312.5602 (2013)"},{"key":"20_CR15","unstructured":"Peng, P., et al.: Multiagent bidirectionally-coordinated nets: emergence of human-level coordination in learning to play StarCraft combat games (2017)"},{"key":"20_CR16","unstructured":"Rashid, T., Samvelyan, M., de Witt, C.S., Farquhar, G., Foerster, J.N., Whiteson, S.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML (2018)"},{"key":"20_CR17","unstructured":"Samvelyan, M., et al.: The StarCraft multi-agent challenge. In: AAMAS (2019)"},{"issue":"11","key":"20_CR18","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.: Bidirectional recurrent neural networks. IEEE Trans. Sig. Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Sig. Process."},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Seabold, S., Perktold, J.: Statsmodels: econometric and statistical modeling with Python (2010)","DOI":"10.25080\/Majora-92bf1922-011"},{"issue":"7587","key":"20_CR20","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"20_CR21","unstructured":"Sukhbaatar, S., Szlam, A., Fergus, R.: Learning multiagent communication with backpropagation. ArXiv abs\/1605.07736 (2016)"},{"key":"20_CR22","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning. In: AAMAS (2018)"},{"key":"20_CR23","first-page":"285","volume":"16","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: an introduction. IEEE Trans. Neural Netw. 16, 285\u2013286 (1998)","journal-title":"IEEE Trans. Neural Netw."},{"key":"20_CR24","unstructured":"Tavares, A.R., Azpurua, H., Santos, A., Chaimowicz, L.: Rock, paper, StarCraft: strategy selection in real-time strategy games. In: AIIDE (2016)"},{"issue":"7782","key":"20_CR25","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"key":"20_CR26","unstructured":"Vinyals, O., et al.: StarCraft II: a new challenge for reinforcement learning (2017)"},{"key":"20_CR27","unstructured":"Watkins, C.J.C.H.: Learning from delayed reward. Ph.D. thesis, Kings College University of Cambridge (1989)"}],"container-title":["Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering","Collaborative Computing: Networking, Applications and Worksharing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-67540-0_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T17:05:48Z","timestamp":1670864748000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-67540-0_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030675394","9783030675400"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-67540-0_20","relation":{},"ISSN":["1867-8211","1867-822X"],"issn-type":[{"type":"print","value":"1867-8211"},{"type":"electronic","value":"1867-822X"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"22 January 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CollaborateCom","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Collaborative Computing: Networking, Applications and Worksharing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"colcom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/collaboratecom.eai-conferences.org\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Confy+","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"211","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"61","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}