{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:15:42Z","timestamp":1750220142247,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3542635","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"4826-4827","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["concept2code: Deep Reinforcement Learning for Conversational AI"],"prefix":"10.1145","author":[{"given":"Omprakash","family":"Sonie","sequence":"first","affiliation":[{"name":"DeepThinking.AI, Bangalore, India"}]},{"given":"Abir","family":"Chakraborty","sequence":"additional","affiliation":[{"name":"Microsoft, Bangalore, India"}]},{"given":"Ankan","family":"Mullick","sequence":"additional","affiliation":[{"name":"IIT Kharagpur, Kharagpur, India"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Reinforcement Learning an Introduction","author":"Richard Sutton","unstructured":"Richard Sutton et. al., Reinforcement Learning an Introduction 2nd Edition","edition":"2"},{"key":"e_1_3_2_1_2_1","unstructured":"David Silver Introduction to Reinforcement Learning (Course)"},{"key":"e_1_3_2_1_3_1","unstructured":"Sergey Levine et. al. Deep Reinforcement Learning (Course)"},{"key":"e_1_3_2_1_4_1","unstructured":"Jiwei Li. et. al. Deep Reinforcement Learning for Dialogue Generation"},{"key":"e_1_3_2_1_5_1","unstructured":"Jason D. Williams et. al. Hybrid Code Networks: practical and efficient end-to-end dialog control with supervised and reinforcement learning"},{"key":"e_1_3_2_1_6_1","unstructured":"Iulian V. Serban et. al. A Deep Reinforcement Learning Chatbot"},{"key":"e_1_3_2_1_7_1","unstructured":"Nnamdi Iregbulem et. al. Deep Reinforcement Learning for Task-Oriented Dialogue"},{"key":"e_1_3_2_1_8_1","unstructured":"Yaser K. et. al. Deep Reinforcement Learning for Sequence-to-Sequence Models"},{"key":"e_1_3_2_1_9_1","unstructured":"Mahipal Jadeja et. al. Deep Reinforcement Learning for Conversational AI"},{"key":"e_1_3_2_1_10_1","unstructured":"Chinnadhurai Sankar et. al. Deep Reinforcement Learning For Modeling Chit-Chat DialogWith Discrete Attributes"},{"key":"e_1_3_2_1_11_1","unstructured":"[11] Vidhushini Srinivasan et. al. Natural Language Generation Using Reinforcement Learning with External Rewards"},{"key":"e_1_3_2_1_12_1","unstructured":"Abdelrhman Saleh et. al. Hierarchical Reinforcement Learning for Open-Domain Dialog"},{"key":"e_1_3_2_1_13_1","unstructured":"Jianfeng Liu et. al. GoChat: Goal-oriented Chatbots with Hierarchical Reinforcement Learning"},{"key":"e_1_3_2_1_14_1","unstructured":"Natasha Jaques et. al. Human-centric dialog training via offline reinforcement learning"},{"key":"e_1_3_2_1_15_1","unstructured":"Baolin Peng et. al. Deep Dyna-Q: Integrating Planning for Task-Completion Dialogue Policy Learning"},{"key":"e_1_3_2_1_16_1","unstructured":"Ruijie Zhou et. al. Natural Language Models using RL with Emotion Feedback"},{"key":"e_1_3_2_1_17_1","unstructured":"Anant Khandelwal WeaSuL?: Weakly Supervised Dialogue Policy Learning: Reward Estimation for Multi-turn Dialogue"},{"key":"e_1_3_2_1_18_1","unstructured":"Ryuichi T. et. al. A Weakly Supervised Method for Topic Segmentation and Labelling in Goal-oriented Dialogues via Reinforcement Learning"},{"key":"e_1_3_2_1_19_1","unstructured":"Heriberto Cuay\u00e1huit et.al. Deep Reinforcement Learning for Multi-Domain Dialogue Systems"},{"volume-title":"et. al","author":"Chen Lili","key":"e_1_3_2_1_20_1","unstructured":"Lili Chen, et. al., Decision Transformer: Reinforcement Learning via Sequence Modeling"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Omprakash Sonie et. al. KDD-2019 Concept to Code: Deep Neural Conversational System","DOI":"10.1145\/3298689.3346957"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Washington DC USA","acronym":"KDD '22"},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3542635","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3542635","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:59:55Z","timestamp":1750186795000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3542635"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":21,"alternative-id":["10.1145\/3534678.3542635","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3542635","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}