{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,10]],"date-time":"2025-04-10T05:12:24Z","timestamp":1744261944805,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319937007"},{"type":"electronic","value":"9783319937014"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-93701-4_21","type":"book-chapter","created":{"date-parts":[[2018,6,11]],"date-time":"2018-06-11T11:49:52Z","timestamp":1528717792000},"page":"273-286","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Improving Search Through A3C Reinforcement Learning Based Conversational Agent"],"prefix":"10.1007","author":[{"given":"Milan","family":"Aggarwal","sequence":"first","affiliation":[]},{"given":"Aarushi","family":"Arora","sequence":"additional","affiliation":[]},{"given":"Shagun","family":"Sodhani","sequence":"additional","affiliation":[]},{"given":"Balaji","family":"Krishnamurthy","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,6,12]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"El Asri, L., He, J., Suleman, K.: A sequence-to-sequence model for user simulation in spoken dialogue systems. arXiv preprint arXiv:1607.00070 (2016)","DOI":"10.21437\/Interspeech.2016-1175"},{"key":"21_CR2","unstructured":"Bachman, P., Sordoni, A., Trischler, A.: Towards information-seeking agents. arXiv preprint arXiv:1612.02605 (2016)"},{"key":"21_CR3","series-title":"NATO ASI Series","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/978-3-642-76153-9_28","volume-title":"Neurocomputing","author":"JS Bridle","year":"1990","unstructured":"Bridle, J.S.: Probabilistic interpretation of feedforward classification network outputs, with relationships to statistical pattern recognition. In: Souli\u00e9, F.F., H\u00e9rault, J. (eds.) Neurocomputing. NATO ASI Series, vol. 68, pp. 227\u2013236. Springer, Heidelberg (1990). https:\/\/doi.org\/10.1007\/978-3-642-76153-9_28"},{"key":"21_CR4","series-title":"Lecture Notes in Electrical Engineering","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1007\/978-981-10-2585-3_8","volume-title":"Dialogues with Social Robots","author":"H Cuay\u00e1huitl","year":"2017","unstructured":"Cuay\u00e1huitl, H.: SimpleDS: a simple deep reinforcement learning dialogue system. In: Jokinen, K., Wilcock, G. (eds.) Dialogues with Social Robots. LNEE, vol. 999, pp. 109\u2013118. Springer, Singapore (2017). https:\/\/doi.org\/10.1007\/978-981-10-2585-3_8"},{"issue":"3","key":"21_CR5","first-page":"5","volume":"7","author":"H Cuayhuitl","year":"2011","unstructured":"Cuayhuitl, H., Dethlefs, N.: Spatially-aware dialogue control using hierarchical reinforcement learning. ACM Trans. Speech Lang. Process. (TSLP) 7(3), 5 (2011)","journal-title":"ACM Trans. Speech Lang. Process. (TSLP)"},{"key":"21_CR6","doi-asserted-by":"publisher","first-page":"627","DOI":"10.1037\/0033-2909.125.6.627","volume":"125","author":"EL Deci","year":"1999","unstructured":"Deci, E.L., Koestner, R., Ryan, R.M.: A meta-analytic review of experiments examining the effects of extrinsic rewards on intrinsic motivation. Psychol. Bull. 125, 627 (1999)","journal-title":"Psychol. Bull."},{"key":"21_CR7","unstructured":"Dodge, J., Gane, A., Zhang, X., Bordes, A., Chopra, S., Miller, A., Szlam, A., Weston, J.: Evaluating prerequisite qualities for learning end-to-end dialog systems. arXiv preprint arXiv:1511.06931 (2015)"},{"issue":"8","key":"21_CR8","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"21_CR9","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"21_CR10","unstructured":"Levin, E., Pieraccini, R., Eckert, W.: Learning dialogue strategies within the Markov decision process framework. In: Proceedings of the 1997 IEEE Workshop on Automatic Speech Recognition and Understanding, pp. 72\u201379. IEEE (1997)"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Li, J., Galley, M., Brockett, C., Spithourakis, G.P., Gao, J., Dolan, B.: A persona-based neural conversation model. arXiv preprint arXiv:1603.06155 (2016)","DOI":"10.18653\/v1\/P16-1094"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Li, J., Monroe, W., Ritter, A., Galley, M., Gao, J., Jurafsky, D.: Deep reinforcement learning for dialogue generation. arXiv preprint arXiv:1606.01541 (2016)","DOI":"10.18653\/v1\/D16-1127"},{"key":"21_CR13","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Narasimhan, K., Yala, A., Barzilay, R.: Improving information extraction by acquiring external evidence with reinforcement learning. arXiv preprint arXiv:1603.07954 (2016)","DOI":"10.18653\/v1\/D16-1261"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Nogueira, R., Cho, K.: Task-oriented query reformulation with reinforcement learning. arXiv preprint arXiv:1704.04572 (2017)","DOI":"10.18653\/v1\/D17-1061"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Peng, B., Li, X., Li, L., Gao, J., Celikyilmaz, A., Lee, S., Wong, K.-F.: Composite task-completion dialogue policy learning via hierarchical deep reinforcement learning. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 2221\u20132230 (2017)","DOI":"10.18653\/v1\/D17-1237"},{"issue":"Sep","key":"21_CR17","first-page":"1265","volume":"6","author":"G Shani","year":"2005","unstructured":"Shani, G., Heckerman, D., Brafman, R.I.: An MDP-based recommender system. J. Mach. Learn. Res. 6(Sep), 1265\u20131295 (2005)","journal-title":"J. Mach. Learn. Res."},{"key":"21_CR18","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, vol. 1. MIT Press, Cambridge (1998)"},{"key":"21_CR19","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in Neural Information Processing Systems, pp. 1057\u20131063 (2000)"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Ultes, S., Budzianowski, P., Casanueva, I., Mrkic, N., Barahona, L.R., Pei-Hao, S., Wen, T.-H., Gaic, M., Young, S.: Domain-independent user satisfaction reward estimation for dialogue policy learning. In: Proceedings of Interspeech 2017, pp. 1721\u20131725 (2017)","DOI":"10.21437\/Interspeech.2017-1032"},{"key":"21_CR21","unstructured":"Vinyals, O., Le, Q.: A neural conversational model. arXiv preprint arXiv:1506.05869 (2015)"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Walker, M.A., Litman, D.J., Kamm, C.A., Abella, A.: PARADISE: a framework for evaluating spoken dialogue agents. In: Proceedings of the Eighth Conference on European Chapter of the Association for Computational Linguistics, pp. 271\u2013280. Association for Computational Linguistics (1997)","DOI":"10.3115\/979617.979652"},{"key":"21_CR23","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Ph.D. dissertation. Kings College, Cambridge (1989)"},{"key":"21_CR24","unstructured":"Weston, J., Chopra, S., Bordes, A.: Memory networks. arXiv preprint arXiv:1410.3916 (2014)"},{"key":"21_CR25","unstructured":"Wunder, M., Littman, M.L., Babes, M.: Classes of multiagent Q-learning dynamics with epsilon-greedy exploration. In: Proceedings of the 27th International Conference on Machine Learning, ICML 2010, pp. 1167\u20131174 (2010)"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, T., Eskenazi, M.: Towards end-to-end learning for dialog state tracking and management using deep reinforcement learning. arXiv preprint arXiv:1606.02560 (2016)","DOI":"10.18653\/v1\/W16-3601"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Mottaghi, R., Kolve, E., Lim, J.J., Gupta, A., Fei-Fei, L., Farhadi, A.: Target-driven visual navigation in indoor scenes using deep reinforcement learning. In: 2017 IEEE International Conference on Robotics and Automation, ICRA, pp. 3357\u20133364. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"21_CR28","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.eswa.2016.09.040","volume":"69","author":"J Wei","year":"2017","unstructured":"Wei, J., He, J., Chen, K., Zhou, Y., Tang, Z.: Collaborative filtering and deep learning based recommendation system for cold start items. Expert Syst. Appl. 69, 29\u201339 (2017)","journal-title":"Expert Syst. Appl."}],"container-title":["Lecture Notes in Computer Science","Computational Science \u2013 ICCS 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-93701-4_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,11]],"date-time":"2022-06-11T00:11:43Z","timestamp":1654906303000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-93701-4_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319937007","9783319937014"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-93701-4_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"12 June 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuxi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 June 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccs-computsci2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iccs-meeting.org\/iccs2018\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"406","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"148","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"60","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}