{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:45:52Z","timestamp":1742921152667,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031402913"},{"type":"electronic","value":"9783031402920"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40292-0_7","type":"book-chapter","created":{"date-parts":[[2023,8,8]],"date-time":"2023-08-08T23:02:48Z","timestamp":1691535768000},"page":"69-82","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["PRACM: Predictive Rewards for\u00a0Actor-Critic with\u00a0Mixing Function in\u00a0Multi-Agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Sheng","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuhong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,9]]},"reference":[{"key":"7_CR1","unstructured":"B\u00f6hmer, W., Kurin, V., Whiteson, S.: Deep coordination graphs. In: International Conference on Machine Learning. pp. 980\u2013991. PMLR (2020)"},{"key":"7_CR2","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Hao, J., et al.: Exploration in deep reinforcement learning: from single-agent to multiagent domain. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3236361"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"He, W., Chen, T.: Scalable online disease diagnosis via multi-model-fused actor-critic reinforcement learning. arXiv preprint arXiv:2206.03659 (2022)","DOI":"10.1145\/3534678.3542672"},{"key":"7_CR5","unstructured":"Jang, E., Gu, S., Poole, B.: Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:1611.01144 (2016)"},{"key":"7_CR6","unstructured":"Kim, D., et al.: Learning to schedule communication in multi-agent reinforcement learning. arXiv preprint arXiv:1902.01554 (2019)"},{"key":"7_CR7","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neucom.2016.01.031","volume":"190","author":"L Kraemer","year":"2016","unstructured":"Kraemer, L., Banerjee, B.: Multi-agent reinforcement learning as a rehearsal for decentralized planning. Neurocomputing 190, 82\u201394 (2016)","journal-title":"Neurocomputing"},{"key":"7_CR8","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1016\/j.ins.2022.07.052","volume":"611","author":"J Li","year":"2022","unstructured":"Li, J., Wu, F., Shi, H., Hwang, K.S.: A collaboration of multi-agent model using an interactive interface. Inf. Sci. 611, 349\u2013363 (2022)","journal-title":"Inf. Sci."},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Li, W., Liu, W., Shao, S., Huang, S., Song, A.: Attention-based intrinsic reward mixing network for credit assignment in multi-agent reinforcement learning. IEEE Trans. Games (2023)","DOI":"10.1109\/TG.2023.3263013"},{"key":"7_CR10","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Adv. Inf. Process. Syst. 30 (2017)"},{"key":"7_CR11","first-page":"8304","volume":"35","author":"Z Ma","year":"2022","unstructured":"Ma, Z., Wang, R., Li, F.F., Bernstein, M., Krishna, R.: Elign: Expectation alignment as a multi-agent intrinsic reward. Adv. Neural Inf. Process. Syst. 35, 8304\u20138317 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"7_CR12","unstructured":"Ndousse, K.K., Eck, D., Levine, S., Jaques, N.: Emergent social learning via multi-agent reinforcement learning. In: International Conference on Machine Learning. pp. 7991\u20138004. PMLR (2021)"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Omidshafiei, S., et al.: Learning to teach in cooperative multiagent reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol. 33, pp. 6128\u20136136 (2019)","DOI":"10.1609\/aaai.v33i01.33016128"},{"key":"7_CR14","unstructured":"Peng, B., et al.: Facmac: Factored multi-agent centralised policy gradients. Adv. Neural Inf. Process. Syst. 34, 12208\u201312221 (2021)"},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Pina, R., De Silva, V., Hook, J., Kondoz, A.: Residual q-networks for value function factorizing in multiagent reinforcement learning. IEEE Trans. Neural Netw. Learn. Syst. (2022)","DOI":"10.1109\/TNNLS.2022.3183865"},{"key":"7_CR16","first-page":"14998","volume":"35","author":"M Prajapat","year":"2022","unstructured":"Prajapat, M., Turchetta, M., Zeilinger, M., Krause, A.: Near-optimal multi-agent learning for safe coverage control. Adv. Neural Inf. Process. Syst. 35, 14998\u201315012 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"1","key":"7_CR17","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s00521-021-06569-4","volume":"34","author":"A Puente-Castro","year":"2021","unstructured":"Puente-Castro, A., Rivero, D., Pazos, A., Fernandez-Blanco, E.: A review of artificial intelligence applied to path planning in UAV swarms. Neural Comput. Appl. 34(1), 153\u2013170 (2021). https:\/\/doi.org\/10.1007\/s00521-021-06569-4","journal-title":"Neural Comput. Appl."},{"issue":"1","key":"7_CR18","first-page":"7234","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid, T., Samvelyan, M., De Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. J. Mach. Learn. Res. 21(1), 7234\u20137284 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Ryu, H., Shin, H., Park, J.: Multi-agent actor-critic with hierarchical graph attention network. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol. 34, pp. 7236\u20137243 (2020)","DOI":"10.1609\/aaai.v34i05.6214"},{"key":"7_CR20","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1016\/j.jmsy.2022.06.008","volume":"64","author":"S Sahoo","year":"2022","unstructured":"Sahoo, S., Lo, C.Y.: Smart manufacturing powered by recent technological advancements: a review. J. Manuf. Syst. 64, 236\u2013250 (2022)","journal-title":"J. Manuf. Syst."},{"key":"7_CR21","unstructured":"Samvelyan, M., et al.: The starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)"},{"issue":"24","key":"7_CR22","doi-asserted-by":"publisher","first-page":"7527","DOI":"10.1080\/00207543.2022.2029611","volume":"60","author":"R Sharma","year":"2022","unstructured":"Sharma, R., Shishodia, A., Gunasekaran, A., Min, H., Munim, Z.H.: The role of artificial intelligence in supply chain management: mapping the territory. Int. J. Prod. Res. 60(24), 7527\u20137550 (2022)","journal-title":"Int. J. Prod. Res."},{"key":"7_CR23","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D.E., Yi, Y.: Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International conference on machine learning. pp. 5887\u20135896. PMLR (2019)"},{"key":"7_CR24","unstructured":"Wang, J., Ren, Z., Liu, T., Yu, Y., Zhang, C.: QPLEX: Duplex dueling multi-agent q-learning. arXiv preprint arXiv:2008.01062 (2020)"},{"key":"7_CR25","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.jmsy.2022.04.004","volume":"63","author":"J Wang","year":"2022","unstructured":"Wang, J., Li, Y., Gao, R.X., Zhang, F.: Hybrid physics-based and data-driven models for smart manufacturing: Modelling, simulation, and explainability. J. Manuf. Syst. 63, 381\u2013391 (2022)","journal-title":"J. Manuf. Syst."},{"key":"7_CR26","unstructured":"Wang, Y., Han, B., Wang, T., Dong, H., Zhang, C.: Off-policy multi-agent decomposed policy gradients. arXiv preprint arXiv:2007.12322 (2020)"},{"issue":"1","key":"7_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2021.102759","volume":"59","author":"Z Wang","year":"2022","unstructured":"Wang, Z., Li, M., Lu, J., Cheng, X.: Business innovation based on artificial intelligence and blockchain technology. Inf. Process. Manag. 59(1), 102759 (2022)","journal-title":"Inf. Process. Manag."},{"key":"7_CR28","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: International conference on machine learning. pp. 1995\u20132003. PMLR (2016)"},{"key":"7_CR29","first-page":"16509","volume":"35","author":"M Wen","year":"2022","unstructured":"Wen, M., et al.: Multi-agent reinforcement learning is a sequence modeling problem. Adv. Neural Inf. Process. Syst. 35, 16509\u201316521 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"12","key":"7_CR30","doi-asserted-by":"publisher","first-page":"4316","DOI":"10.3390\/s22124316","volume":"22","author":"J Yang","year":"2022","unstructured":"Yang, J., Ni, J., Li, Y., Wen, J., Chen, D.: The intelligent path planning system of agricultural robot via reinforcement learning. Sensors 22(12), 4316 (2022)","journal-title":"Sensors"},{"key":"7_CR31","unstructured":"Yang, T., et al.: Exploration in deep reinforcement learning: a comprehensive survey. arXiv preprint arXiv:2109.06668 (2021)"},{"key":"7_CR32","doi-asserted-by":"publisher","unstructured":"Ye, Z., Chen, Y., Jiang, X., Song, G., Yang, B., Fan, S.: Improving sample efficiency in Multi-Agent Actor-Critic methods. Appl. Intell. 1\u201314 (2021). https:\/\/doi.org\/10.1007\/s10489-021-02554-5","DOI":"10.1007\/s10489-021-02554-5"},{"key":"7_CR33","unstructured":"Zhang, T., Li, Y., Wang, C., Xie, G., Lu, Z.: Fop: Factorizing optimal joint policy of maximum-entropy multi-agent reinforcement learning. In: International Conference on Machine Learning. pp. 12491\u201312500. PMLR (2021)"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, T., Liu, Z., Wu, S., Pu, Z., Yi, J.: Intrinsic reward with peer incentives for cooperative multi-agent reinforcement learning. In: 2022 International Joint Conference on Neural Networks (IJCNN). pp. 1\u20137. IEEE (2022)","DOI":"10.1109\/IJCNN55064.2022.9892092"},{"key":"7_CR35","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1016\/j.neucom.2021.07.014","volume":"459","author":"X Zhang","year":"2021","unstructured":"Zhang, X., Liu, Y., Xu, X., Huang, Q., Mao, H., Carie, A.: Structural relational inference actor-critic for multi-agent reinforcement learning. Neurocomputing 459, 383\u2013394 (2021)","journal-title":"Neurocomputing"},{"key":"7_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2023.104033","volume":"148","author":"D Zhou","year":"2023","unstructured":"Zhou, D., Gayah, V.V.: Scalable multi-region perimeter metering control for urban networks: a multi-agent deep reinforcement learning approach. Transp. Res. Part C Emerg. Technol. 148, 104033 (2023)","journal-title":"Transp. Res. Part C Emerg. Technol."},{"key":"7_CR37","first-page":"11853","volume":"33","author":"M Zhou","year":"2020","unstructured":"Zhou, M., Liu, Z., Sui, P., Li, Y., Chung, Y.Y.: Learning implicit credit assignment for cooperative multi-agent reinforcement learning. Adv. Neural Inf. Process. Syst. 33, 11853\u201311864 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40292-0_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,8]],"date-time":"2023-08-08T23:05:34Z","timestamp":1691535934000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40292-0_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031402913","9783031402920"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40292-0_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"9 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ksem2023.conferences.academy\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"395","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"114","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}