{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T00:17:37Z","timestamp":1769559457636,"version":"3.49.0"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031243820","type":"print"},{"value":"9783031243837","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-24383-7_22","type":"book-chapter","created":{"date-parts":[[2023,1,24]],"date-time":"2023-01-24T16:18:49Z","timestamp":1674577129000},"page":"396-414","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning Dialogue Policy Efficiently Through Dyna Proximal Policy Optimization"],"prefix":"10.1007","author":[{"given":"Chenping","family":"Huang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,25]]},"reference":[{"key":"22_CR1","doi-asserted-by":"crossref","unstructured":"Asri, L.E., He, J., Suleman, K.: A sequence-to-sequence model for user simulation in spoken dialogue systems. arXiv preprint arXiv:1607.00070 (2016)","DOI":"10.21437\/Interspeech.2016-1175"},{"issue":"2","key":"22_CR2","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1145\/3166054.3166058","volume":"19","author":"H Chen","year":"2017","unstructured":"Chen, H., Liu, X., Yin, D., Tang, J.: A survey on dialogue systems: recent advances and new frontiers. ACM SIGKDD Explor. Newsl. 19(2), 25\u201335 (2017)","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Feng, Y., Wang, Y., Li, H.: A sequence-to-sequence approach to dialogue state tracking. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 1714\u20131725 (2021)","DOI":"10.18653\/v1\/2021.acl-long.135"},{"issue":"3","key":"22_CR4","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1007\/s12559-020-09718-4","volume":"13","author":"M Firdaus","year":"2021","unstructured":"Firdaus, M., Golchha, H., Ekbal, A., Bhattacharyya, P.: A deep multi-task model for dialogue act classification, intent detection and slot filling. Cogn. Comput. 13(3), 626\u2013645 (2021)","journal-title":"Cogn. Comput."},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Gordon-Hall, G., Gorinski, P., Cohen, S.B.: Learning dialog policies from weak demonstrations. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 1394\u20131405 (2020)","DOI":"10.18653\/v1\/2020.acl-main.129"},{"issue":"2","key":"22_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3054912","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M.M., Elyan, E., Jayne, C.: Imitation learning: a survey of learning methods. ACM Comput. Surv. (CSUR) 50(2), 1\u201335 (2017)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"22_CR7","unstructured":"Keizer, S., et al.: Parameter estimation for agenda-based user simulation. In: Proceedings of the SIGDIAL 2010 Conference, pp. 116\u2013123 (2010)"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Kreyssig, F., Casanueva, I., Budzianowski, P., Gasic, M.: Neural user simulation for corpus-based policy optimisation of spoken dialogue systems. In: Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue, pp. 60\u201369 (2018)","DOI":"10.18653\/v1\/W18-5007"},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Lee, C.H., Cheng, H., Ostendorf, M.: Dialogue state tracking with a language model using schema-driven prompting. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 4937\u20134949 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.404"},{"key":"22_CR10","unstructured":"Li, X., Lipton, Z.C., Dhingra, B., Li, L., Gao, J., Chen, Y.N.: A user simulator for task-completion dialogues. arXiv preprint arXiv:1612.05688 (2016)"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Liu, B., Lane, I.: Iterative policy learning in end-to-end trainable task-oriented neural dialog models. In: 2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 482\u2013489. IEEE (2017)","DOI":"10.1109\/ASRU.2017.8268975"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Lu, K., Zhang, S., Chen, X.: Goal-oriented dialogue policy learning from failures. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 2596\u20132603 (2019)","DOI":"10.1609\/aaai.v33i01.33012596"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Papangelis, A., Wang, Y.C., Molino, P., Tur, G., Uber, A.: Collaborative multi-agent dialogue model training via reinforcement learning. In: 20th Annual Meeting of the Special Interest Group on Discourse and Dialogue, p. 92 (2019)","DOI":"10.18653\/v1\/W19-5912"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Peng, B., Li, X., Gao, J., Liu, J., Wong, K.F.: Deep Dyna-Q: integrating planning for task-completion dialogue policy learning. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2182\u20132192 (2018)","DOI":"10.18653\/v1\/P18-1203"},{"key":"22_CR16","doi-asserted-by":"crossref","unstructured":"Peng, B., et al.: Few-shot natural language generation for task-oriented dialog. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 172\u2013182 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.17"},{"key":"#cr-split#-22_CR17.1","doi-asserted-by":"crossref","unstructured":"Schatzmann, J., Thomson, B., Weilhammer, K., Ye, H., Young, S.: Agenda-based user simulation for bootstrapping a POMDP dialogue system. In: Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics","DOI":"10.3115\/1614108.1614146"},{"key":"#cr-split#-22_CR17.2","unstructured":"Companion Volume, Short Papers, pp. 149-152 (2007)"},{"key":"22_CR18","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. arXiv e-prints arXiv:1506.02438, June 2015"},{"key":"22_CR19","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Shi, W., Qian, K., Wang, X., Yu, Z.: How to build user simulators to train RL-based dialog systems. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 1990\u20132000 (2019)","DOI":"10.18653\/v1\/D19-1206"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Su, S.Y., Li, X., Gao, J., Liu, J., Chen, Y.N.: Discriminative deep Dyna-Q: robust planning for dialogue policy learning. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 3813\u20133823 (2018)","DOI":"10.18653\/v1\/D18-1416"},{"key":"22_CR22","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In: Machine Learning Proceedings 1990, pp. 216\u2013224. Elsevier (1990)","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Takanobu, R., Liang, R., Huang, M.: Multi-agent task-oriented dialog policy learning with role-aware reward decomposition. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 625\u2013638 (2020)","DOI":"10.18653\/v1\/2020.acl-main.59"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Teng, D., Qin, L., Che, W., Zhao, S., Liu, T.: Injecting word information with multi-level word adapter for Chinese spoken language understanding. In: ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8188\u20138192. IEEE (2021)","DOI":"10.1109\/ICASSP39728.2021.9413657"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Tseng, B.H., Dai, Y., Kreyssig, F., Byrne, B.: Transferable dialogue systems and user simulators. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 152\u2013166 (2021)","DOI":"10.18653\/v1\/2021.acl-long.13"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Wang, H., Peng, B., Wong, K.F.: Learning efficient dialogue policy from demonstrations through shaping. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 6355\u20136365 (2020)","DOI":"10.18653\/v1\/2020.acl-main.566"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Wang, H., Wong, K.F.: A collaborative multi-agent reinforcement learning framework for dialog action decomposition. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 7882\u20137889 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.621"},{"key":"22_CR28","doi-asserted-by":"crossref","unstructured":"Wen, T.H., Gasic, M., Mrksic, N., Su, P.H., Vandyke, D., Young, S.: Semantically conditioned lstm-based natural language generation for spoken dialogue systems. arXiv preprint arXiv:1508.01745 (2015)","DOI":"10.18653\/v1\/D15-1199"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Wu, Y., Li, X., Liu, J., Gao, J., Yang, Y.: Switch-based active deep Dyna-Q: efficient adaptive planning for task-completion dialogue policy learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 7289\u20137296 (2019)","DOI":"10.1609\/aaai.v33i01.33017289"},{"key":"22_CR30","unstructured":"Zahavy, T., Haroush, M., Merlis, N., Mankowitz, D.J., Mannor, S.: Learn what not to learn: Action elimination with deep reinforcement learning. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 31. Curran Associates, Inc. (2018)"},{"key":"22_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, X., Gao, J., Chen, E.: Budgeted policy learning for task-oriented dialogue systems. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 3742\u20133751 (2019)","DOI":"10.18653\/v1\/P19-1364"},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Wang, Z., Huang, Z.: Automatic curriculum learning with over repetition penalty for dialogue policy learning, vol. 35(16), pp. 14540\u201314548 (2021)","DOI":"10.1609\/aaai.v35i16.17709"},{"key":"22_CR33","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Wang, Z., Zhu, C., Wang, S.: Efficient dialogue complementary policy learning via deep Q-network policy and episodic memory policy. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 4311\u20134323 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.354"},{"key":"22_CR34","doi-asserted-by":"crossref","unstructured":"Zhu, Q., et al.: ConvLab-2: an open-source toolkit for building, evaluating, and diagnosing dialogue systems. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pp. 142\u2013149 (2020)","DOI":"10.18653\/v1\/2020.acl-demos.19"}],"container-title":["Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering","Collaborative Computing: Networking, Applications and Worksharing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-24383-7_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,24]],"date-time":"2023-01-24T16:26:49Z","timestamp":1674577609000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-24383-7_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031243820","9783031243837"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-24383-7_22","relation":{},"ISSN":["1867-8211","1867-822X"],"issn-type":[{"value":"1867-8211","type":"print"},{"value":"1867-822X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"25 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CollaborateCom","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Collaborative Computing: Networking, Applications and Worksharing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"colcom2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/collaboratecom.eai-conferences.org\/2022","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Confyplus.eai.eu","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"171","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"57","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}