{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T20:52:08Z","timestamp":1765486328565},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030302405"},{"type":"electronic","value":"9783030302412"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-30241-2_5","type":"book-chapter","created":{"date-parts":[[2019,8,31]],"date-time":"2019-08-31T05:56:10Z","timestamp":1567230970000},"page":"49-60","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Reinforcement Learning in Multi-agent Games: Open AI Gym Diplomacy Environment"],"prefix":"10.1007","author":[{"given":"Diogo","family":"Cruz","sequence":"first","affiliation":[]},{"given":"Jos\u00e9 Aleixo","family":"Cruz","sequence":"additional","affiliation":[]},{"given":"Henrique","family":"Lopes Cardoso","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,8,30]]},"reference":[{"key":"5_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., et al.: TensorFlow: large-scale machine learning on heterogeneous systems (2015). \n                      https:\/\/www.tensorflow.org\/"},{"key":"5_CR2","unstructured":"Brockman, G., et al.: OpenAI gym. arXiv preprint. \n                      arXiv:1606.01540\n                      \n                     (2016)"},{"key":"5_CR3","volume-title":"The Rules of Diplomacy","author":"AB Calhamer","year":"2000","unstructured":"Calhamer, A.B.: The Rules of Diplomacy, 4th edn. Avalon Hill, Baltimore (2000)","edition":"4"},{"key":"5_CR4","unstructured":"Dhariwal, P., et al.: OpenAI baselines (2017)"},{"key":"5_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/BFb0027053","volume-title":"From Reaction to Cognition","author":"A Drogoul","year":"1995","unstructured":"Drogoul, A.: When ants play chess (or can strategies emerge from tactical behaviours?). In: Castelfranchi, C., M\u00fcller, J.-P. (eds.) MAAMAW 1993. LNCS, vol. 957, pp. 11\u201327. Springer, Heidelberg (1995). \n                      https:\/\/doi.org\/10.1007\/BFb0027053"},{"issue":"7","key":"5_CR6","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1016\/j.engappai.2011.06.005","volume":"24","author":"A Fabregues","year":"2011","unstructured":"Fabregues, A., Sierra, C.: DipGame: a challenging negotiation testbed. Eng. Appl. Artif. Intell. 24(7), 1137\u20131146 (2011)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"5_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1007\/978-3-319-27543-7_9","volume-title":"Transactions on Computational Collective Intelligence XX","author":"A Ferreira","year":"2015","unstructured":"Ferreira, A., Lopes Cardoso, H., Reis, L.P.: Strategic negotiation and trust in diplomacy \u2013 the DipBlue approach. In: Nguyen, N.T., Kowalczyk, R., Duval, B., van den Herik, J., Loiseau, S., Filipe, J. (eds.) Transactions on Computational Collective Intelligence XX. LNCS, vol. 9420, pp. 179\u2013200. Springer, Cham (2015). \n                      https:\/\/doi.org\/10.1007\/978-3-319-27543-7_9"},{"key":"5_CR8","unstructured":"Hill, A., et al.: Stable baselines (2018). \n                      https:\/\/github.com\/hill-a\/stable-baselines"},{"key":"5_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1007\/978-3-030-17294-7_8","volume-title":"Agreement Technologies","author":"D Jonge de","year":"2019","unstructured":"de Jonge, D., Baarslag, T., Aydo\u011fan, R., Jonker, C., Fujita, K., Ito, T.: The challenge of negotiation in the game of diplomacy. In: Lujak, M. (ed.) AT 2018. LNCS (LNAI), vol. 11327, pp. 100\u2013114. Springer, Cham (2019). \n                      https:\/\/doi.org\/10.1007\/978-3-030-17294-7_8"},{"issue":"1","key":"5_CR10","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/s10489-017-0919-y","volume":"47","author":"D Jonge de","year":"2017","unstructured":"de Jonge, D., Sierra, C.: D-Brane: a diplomacy playing agent for automated negotiations research. Appl. Intell. 47(1), 158\u2013177 (2017)","journal-title":"Appl. Intell."},{"key":"5_CR11","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint. \n                      arXiv:1312.5602\n                      \n                     (2013)"},{"key":"5_CR12","unstructured":"OpenAI: OpenAI five. \n                      https:\/\/blog.openai.com\/openai-five\/"},{"key":"5_CR13","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch. In: NIPS-W (2017)"},{"key":"5_CR14","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms (2017)"},{"issue":"7587","key":"5_CR15","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"5_CR16","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. The MIT Press, Cambridge (2018)","edition":"2"},{"issue":"2","key":"5_CR17","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro, G.: TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput. 6(2), 215\u2013219 (1994)","journal-title":"Neural Comput."},{"key":"5_CR18","unstructured":"Wu, Y., Mansimov, E., Liao, S., Grosse, R.B., Ba, J.: Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. CoRR abs\/1708.05144 (2017)"}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-30241-2_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,31]],"date-time":"2019-08-31T06:24:52Z","timestamp":1567232692000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-30241-2_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030302405","9783030302412"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-30241-2_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"30 August 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vila Real","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/epia2019.utad.pt\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"252","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"119","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.32","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.86","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}