{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T01:21:16Z","timestamp":1773451276906,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Science and Technology Project in Shaanxi Province of China","award":["2021KW-16"],"award-info":[{"award-number":["2021KW-16"]}]},{"name":"Science and Technology Project in Xi\u2019an","award":["22GXFW0123"],"award-info":[{"award-number":["22GXFW0123"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s13042-024-02374-2","type":"journal-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T10:01:51Z","timestamp":1727690511000},"page":"2009-2022","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A multi-agent collaborative algorithm for task-oriented dialogue systems"],"prefix":"10.1007","volume":"16","author":[{"given":"Jingtao","family":"Sun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7758-2964","authenticated-orcid":false,"given":"Jiayin","family":"Kou","sequence":"additional","affiliation":[]},{"given":"Weipeng","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Wenyan","family":"Hou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"2374_CR1","doi-asserted-by":"crossref","unstructured":"Li Q, Li P, Ren Z, et al (2022) Knowledge bridging for empathetic dialogue generation","DOI":"10.1609\/aaai.v36i10.21347"},{"issue":"1","key":"2374_CR2","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1162\/coli_a_00368","volume":"46","author":"L Zhou","year":"2020","unstructured":"Zhou L, Gao J, Li D et al (2020) The design and implementation of xiaoice, an empathetic social chatbot. Comput Linguist 46(1):53\u201393","journal-title":"Comput Linguist"},{"key":"2374_CR3","doi-asserted-by":"crossref","unstructured":"Sakata W, Shibata T, Tanaka R, et al (2019) FAQ retrieval using query-question similarity and BERT-based query-answer relevance. In: Proceedings of the 42nd International ACM SIGIR Conference on research and development in information retrieval, 2019, pp 1113\u20131116","DOI":"10.1145\/3331184.3331326"},{"key":"2374_CR4","doi-asserted-by":"crossref","unstructured":"Budzianowski P, Vuli\u0107 I (2019) Hello, It\u2019s GPT-2 - How Can I Help You? Towards the Use of Pretrained Language Models for Task-Oriented Dialogue Systems. In Proceedings of the 3rd Workshop on Neural Generation and Translation, pages 15\u201322, Hong Kong. Association for Computational Linguistics.","DOI":"10.18653\/v1\/D19-5602"},{"key":"2374_CR5","doi-asserted-by":"crossref","unstructured":"Su S-Y, Lo K-L, Yeh Y-T, et al (2018) Natural language generation by hierarchical decoding with linguistic patterns. In: Proceedings of the 2018 Conference of the North American Chapter of the association for computational linguistics: human language technologies, 2018, pp 61\u201366","DOI":"10.18653\/v1\/N18-2010"},{"key":"2374_CR6","doi-asserted-by":"crossref","unstructured":"Zhong V, Xiong C, Socher R (2018). Global-locally self-attentive dialogue state tracker. 2018 arXiv preprint arXiv:1805.09655","DOI":"10.18653\/v1\/P18-1135"},{"key":"2374_CR7","doi-asserted-by":"crossref","unstructured":"Gupta S, Shah R, Mohit M, et al. Semantic parsing for task oriented dialog using hierarchical representations. In: Proceedings of the 2018 Conference on empirical methods in natural language processing, 2018, pp 2787\u20132792","DOI":"10.18653\/v1\/D18-1300"},{"key":"2374_CR8","doi-asserted-by":"crossref","unstructured":"Mrk\u0161i\u0107 N, S\u00e9aghdha DO, Wen TH, et al (2016) Neural belief tracker: Data-driven dialogue state tracking. arXiv preprint arXiv:1606.03777","DOI":"10.18653\/v1\/P17-1163"},{"key":"2374_CR9","doi-asserted-by":"crossref","unstructured":"Dai Z, Yang Z, Yang Y, et al (2019) Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860","DOI":"10.18653\/v1\/P19-1285"},{"key":"2374_CR10","unstructured":"Shalyminov I (2020) Data-efficient methods for dialogue systems. arXiv preprint arXiv:2012.02929"},{"key":"2374_CR11","doi-asserted-by":"crossref","unstructured":"Hu Y, Lee C H, Xie T, et al (2022) In-context learning for few-shot dialogue state tracking. arXiv preprint arXiv:2203.08568","DOI":"10.18653\/v1\/2022.findings-emnlp.193"},{"key":"2374_CR12","doi-asserted-by":"crossref","unstructured":"Lipton Z, Li X, Gao J, et al (2018) Bbq-networks: Efficient exploration in deep reinforcement learning for task-oriented dialogue systems. In: Proceedings of the AAAI Conference on artificial intelligence 2018, 32(1)","DOI":"10.1609\/aaai.v32i1.11946"},{"key":"2374_CR13","doi-asserted-by":"crossref","unstructured":"Li J, Monroe W, Ritter A, et al (2016) Deep reinforcement learning for dialogue generation. arXiv preprint arXiv:1606.01541","DOI":"10.18653\/v1\/D16-1127"},{"key":"2374_CR14","doi-asserted-by":"crossref","unstructured":"Budzianowski P, Wen T-H, Tseng B-H, Casanueva I, Ultes S, Ramadan O, Ga\u02c7 si\u00b4 c M (2018) Multiwoz: a large scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling. In: 2018 Conference on empirical methods in natural language processing, 2018, pp 5016\u20135026","DOI":"10.18653\/v1\/D18-1547"},{"key":"2374_CR15","doi-asserted-by":"crossref","unstructured":"Ham D, Lee J G, Jang Y, et al (2020) End-to-end neural pipeline for goal-oriented dialogue systems using GPT-2. In: Proceedings of the 58th Annual Meeting of the association for computational linguistics, 2020, pp 583\u2013592","DOI":"10.18653\/v1\/2020.acl-main.54"},{"key":"2374_CR16","doi-asserted-by":"crossref","unstructured":"G\u00fcr I, Hakkani-T\u00fcr D, T\u00fcr G, et al (2018) User modeling for task oriented dialogues. In: 2018 IEEE Spoken Language Technology Workshop (SLT), 2018, pp 900\u2013906","DOI":"10.1109\/SLT.2018.8639652"},{"issue":"10","key":"2374_CR17","first-page":"11765","volume":"36","author":"M Zhong","year":"2022","unstructured":"Zhong M, Liu Y, Xu Y et al (2022) Dialoglm: Pre-trained model for long dialogue understanding and summarization. Proc AAAI Conf Artif Intell 36(10):11765\u201311773","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2374_CR18","doi-asserted-by":"crossref","unstructured":"Wang Q, Ding L, Cao Y, et al (2023) Divide, conquer, and combine: mixture of semantic-independent experts for zero-shot dialogue state tracking. arXiv preprint arXiv:2306.00434","DOI":"10.18653\/v1\/2023.acl-long.114"},{"key":"2374_CR19","doi-asserted-by":"crossref","unstructured":"Jia Q, Liu Y, Ren S, et al (2020) Multi-turn response selection using dialogue dependency relations. arXiv preprint arXiv:2010.01502","DOI":"10.18653\/v1\/2020.emnlp-main.150"},{"key":"2374_CR20","doi-asserted-by":"crossref","unstructured":"Lin X, Jian W, He J, et al (2020) Generating informative conversational response using recurrent knowledge-interaction and knowledge-copy. In: Proceedings of the 58th Annual Meeting of the Association for computational linguistics, 2020, pp 41\u201352","DOI":"10.18653\/v1\/2020.acl-main.6"},{"key":"2374_CR21","doi-asserted-by":"crossref","unstructured":"Du W, Black AW (2019) Boosting dialog response generation. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp 38\u201343, Florence, Italy. Association for Computational Linguistics","DOI":"10.18653\/v1\/P19-1005"},{"key":"2374_CR22","unstructured":"Wang Q, Ding L, Cao Y, et al (2023) Recursively summarizing enables long-term dialogue memory in large language models. arXiv preprint arXiv:2308.15022"},{"key":"2374_CR23","unstructured":"Li X, Chen Y N, Li L, et al (2017) End-to-end task-completion neural dialogue systems. arXiv preprint arXiv:1703.01008"},{"key":"2374_CR24","doi-asserted-by":"crossref","unstructured":"Li Z, Niu C, Meng F, et al (2019) Incremental transformer with deliberation decoder for document grounded conversations. arXiv preprint arXiv:1907.08854","DOI":"10.18653\/v1\/P19-1002"},{"key":"2374_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2023.107325","author":"A Abo-eleneen","year":"2023","unstructured":"Abo-eleneen A, Palliyali A, Catal C (2023) The role of Reinforcement Learning in software testing. Inform Softw Technol. https:\/\/doi.org\/10.1016\/j.infsof.2023.107325","journal-title":"Inform Softw Technol"},{"issue":"12","key":"2374_CR26","first-page":"14801","volume":"37","author":"W Du","year":"2023","unstructured":"Du W, Ye J, Gu J et al (2023) SafeLight: a reinforcement learning method toward collision-free traffic signal control. Proc AAAI Conf Arti Intell 37(12):14801\u201314810","journal-title":"Proc AAAI Conf Arti Intell"},{"issue":"6","key":"2374_CR27","doi-asserted-by":"publisher","first-page":"4493","DOI":"10.1109\/TSC.2023.3326197","volume":"16","author":"H Aboutorab","year":"2023","unstructured":"Aboutorab H, Hussain OK, Saberi M et al (2023) Reinforcement learning-based news recommendation system. In IEEE Trans Serv Comput 16(6):4493\u20134502","journal-title":"In IEEE Trans Serv Comput"},{"key":"2374_CR28","doi-asserted-by":"crossref","unstructured":"Jaques N, Shen J H, Ghandeharioun A, et al (2020) Human-centric dialog training via offline reinforcement learning. arXiv preprint arXiv:2010.05848","DOI":"10.18653\/v1\/2020.emnlp-main.327"},{"key":"2374_CR29","doi-asserted-by":"crossref","unstructured":"Takanobu R, Zhu H, Huang M (2019) Guided dialog policy learning: reward estimation for multi-domain task-oriented dialog. arXiv preprint arXiv:1908.10719","DOI":"10.18653\/v1\/D19-1010"},{"issue":"01","key":"2374_CR30","first-page":"6722","volume":"33","author":"Z Li","year":"2019","unstructured":"Li Z, Kiseleva J, De Rijke M (2019) Dialogue generation: From imitation learning to inverse reinforcement learning. Proc AAAI Conf Artif Intell 33(01):6722\u20136729","journal-title":"Proc AAAI Conf Artif Intell"},{"issue":"2","key":"2374_CR31","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1109\/MMUL.2021.3065678","volume":"28","author":"X Huang","year":"2021","unstructured":"Huang X, Ren M, Han Q et al (2021) Emotion detection for conversations based on reinforcement learning framework. IEEE Multimed 28(2):76\u201385","journal-title":"IEEE Multimed"},{"key":"2374_CR32","doi-asserted-by":"crossref","unstructured":"Le AC (2021) A deep reinforcement learning model using long contexts for Chatbots. In: 2021 International Conference on system science and engineering (ICSSE). IEEE, 2021, pp 83\u201387","DOI":"10.1109\/ICSSE52999.2021.9538427"},{"key":"2374_CR33","doi-asserted-by":"crossref","unstructured":"Papangelis A, Wang Y C, Molino P, et al (2019) Collaborative multi-agent dialogue model training via reinforcement learning. arXiv preprint arXiv:1907.05507","DOI":"10.18653\/v1\/W19-5912"},{"key":"2374_CR34","doi-asserted-by":"crossref","unstructured":"Das A, Kottur S, Moura JMF, et al (2017) Learning cooperative visual dialog agents with deep reinforcement learning. In: Proceedings of the IEEE International Conference on computer vision, 2017, pp 2951\u20132960","DOI":"10.1109\/ICCV.2017.321"},{"key":"2374_CR35","doi-asserted-by":"crossref","unstructured":"Zhang Z, Liao L, Zhu X, et al (2020) Learning goal-oriented dialogue policy with opposite agent awareness. arXiv preprint arXiv:2004.09731","DOI":"10.18653\/v1\/2020.aacl-main.16"},{"key":"2374_CR36","doi-asserted-by":"crossref","unstructured":"Wang H, Wong KF (2021). A collaborative multi-agent reinforcement learning framework for dialog action decomposition. In: Proceedings of the 2021 Conference on empirical methods in natural language processing, 2021, pp 7882\u20137889","DOI":"10.18653\/v1\/2021.emnlp-main.621"},{"key":"2374_CR37","doi-asserted-by":"crossref","unstructured":"Kim H, Kim B, Kim G (2020) Will I sound like me? improving persona consistency in dialogues through pragmatic self-consciousness. arXiv preprint arXiv:2004.05816","DOI":"10.18653\/v1\/2020.emnlp-main.65"},{"key":"2374_CR38","unstructured":"Wang Z, Yu Y, Zheng W, et al (2024) Multi-Agent collaboration framework for recommender systems. arXiv preprint arXiv:2402.15235"},{"key":"2374_CR39","unstructured":"Gyevnar B, Wang C, Lucas C G, et al (2023) Causal social explanations for stochastic sequential multi-agent decision-making. arXiv preprint arXiv:2302.10809"},{"key":"2374_CR40","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/j.cogsys.2023.01.002","volume":"79","author":"Z Nagoev","year":"2023","unstructured":"Nagoev Z, Nagoeva O, Anchokov M et al (2023) The symbol grounding problem in the system of general artificial intelligence based on multi-agent neurocognitive architecture. Cogn Syst Res 79:71\u201384","journal-title":"Cogn Syst Res"},{"issue":"6","key":"2374_CR41","doi-asserted-by":"publisher","first-page":"2239","DOI":"10.1007\/s13042-022-01759-5","volume":"14","author":"D Fan","year":"2023","unstructured":"Fan D, Shen H, Dong L (2023) Twin attentive deep reinforcement learning for multi-agent defensive convoy. Int J Mach Learn Cybern 14(6):2239\u20132250","journal-title":"Int J Mach Learn Cybern"},{"key":"2374_CR42","doi-asserted-by":"crossref","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Machine Learning Proceedings 1994. Morgan Kaufmann, 1994, pp 157\u2013163","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"2374_CR43","unstructured":"Papoudakis G, Christianos F, Rahman A, Albrecht SV (2019) Dealing with non-stationarity in multi-agent deep reinforcement learning. CoRR, abs\/1906.04737"},{"issue":"7540","key":"2374_CR44","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529","journal-title":"Nature"},{"key":"2374_CR45","unstructured":"Bahdanau D, Brakel P, Xu K, et al (2016) An actor-critic algorithm for sequence prediction. arXiv preprint arXiv:1607.07086"},{"key":"2374_CR46","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of the 31st international conference on neural information processing systems, pp 6382\u20136393"},{"issue":"4","key":"2374_CR47","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1287\/moor.27.4.819.297","volume":"27","author":"DS Bernstein","year":"2002","unstructured":"Bernstein DS, Givan R, Immerman N et al (2002) The complexity of decentralized control of Markov decision processes. Math Oper Res 27(4):819\u2013840","journal-title":"Math Oper Res"},{"key":"2374_CR48","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8:229\u2013256","journal-title":"Mach Learn"},{"key":"2374_CR49","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O 2017 Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347."},{"key":"2374_CR50","doi-asserted-by":"crossref","unstructured":"Liu B, Lane I (2017) Iterative policy learning in end-to-end trainable task-oriented neural dialog models. In: 2017 IEEE Automatic Speech Recognition and Understanding Workshop, 2017, pp 482\u2013489","DOI":"10.1109\/ASRU.2017.8268975"},{"key":"2374_CR51","unstructured":"Rashid T, Samvelyan M, Schroeder C, et al (2018) Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International Conference on machine learning. PMLR, 2018, pp 4295\u20134304"},{"key":"2374_CR52","doi-asserted-by":"crossref","unstructured":"Wang H, Wong KF (2021) A collaborative multi-agent reinforcement learning framework for dialog action decomposition. In: Proceedings of the 2021 Conference on empirical methods in natural language processing, 2021, pp 7882\u20137889","DOI":"10.18653\/v1\/2021.emnlp-main.621"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02374-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02374-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02374-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,20]],"date-time":"2025-02-20T09:46:53Z","timestamp":1740044813000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02374-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"references-count":52,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["2374"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02374-2","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"4 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflicts of interest regarding the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}