{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T06:09:15Z","timestamp":1743055755136,"version":"3.40.3"},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030442668"},{"type":"electronic","value":"9783030442675"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-44267-5_25","type":"book-chapter","created":{"date-parts":[[2020,4,2]],"date-time":"2020-04-02T08:04:54Z","timestamp":1585814694000},"page":"168-174","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Continuous Control in Deep Reinforcement Learning with Direct Policy Derivation from Q Network"],"prefix":"10.1007","author":[{"given":"Aydar","family":"Akhmetzyanov","sequence":"first","affiliation":[]},{"given":"Rauf","family":"Yagfarov","sequence":"additional","affiliation":[]},{"given":"Salimzhan","family":"Gafurov","sequence":"additional","affiliation":[]},{"given":"Mikhail","family":"Ostanin","sequence":"additional","affiliation":[]},{"given":"Alexandr","family":"Klimchik","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,3]]},"reference":[{"key":"25_CR1","unstructured":"Bellman, R.E.: Dynamic Programming (1957)"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Wu, L., Tian, F., Qin, T., Lai, J., Liu, T.Y.: A study of reinforcement learning for neural machine translation. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 3612\u20133621 (2018)","DOI":"10.18653\/v1\/D18-1397"},{"key":"25_CR3","unstructured":"Shalev-Shwartz, S., Shammah, S., Shashua, A.: Safe, multi-agent, reinforcement learning for autonomous driving. arXiv preprint \narXiv:1610.03295\n\n (2016)"},{"key":"25_CR4","unstructured":"Andrychowicz, M., et al.: Learning Dexterous In-Hand Manipulation (2018)"},{"issue":"3\u20134","key":"25_CR5","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"25_CR6","unstructured":"Mnih, V., et al.: Playing Atari with Deep Reinforcement Learning (2013)"},{"key":"25_CR7","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning (2015)"},{"key":"25_CR8","unstructured":"OpenAI gym RL environments. \nhttps:\/\/gym.openai.com\/"},{"key":"25_CR9","unstructured":"OpenAI baselines. \nhttps:\/\/github.com\/openai\/baselines"},{"key":"25_CR10","unstructured":"Source code of Deep Policy Derivation Q-network research. \nhttps:\/\/github.com\/AydarAkhmetzyanov\/DPDQN-Continuous-control-in-deep-reinforcement-learning-with-direct-policy-derivation-from-Q-network"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Tokic, M.: Adaptive \u03b5-greedy exploration in reinforcement learning based on value differences. In: Proceedings of the 33rd Annual German Conference on Advances in Artificial Intelligence, pp. 203\u2013210. Springer, Heidelberg (2010)","DOI":"10.1007\/978-3-642-16111-7_23"},{"key":"25_CR12","unstructured":"Paszke, A., et al.: Automatic differentiation in pytorch (2017)"},{"key":"25_CR13","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1856\u20131865 (2018)"},{"key":"25_CR14","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1582\u20131591 (2018)"},{"key":"25_CR15","unstructured":"Mnih, V., Badia, A.P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., Kavukcuoglu, K.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"25_CR16","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint \narXiv:1707.06347\n\n (2017)"}],"container-title":["Advances in Intelligent Systems and Computing","Human Interaction, Emerging Technologies and Future Applications II"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-44267-5_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,2]],"date-time":"2020-04-02T18:59:23Z","timestamp":1585853963000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-44267-5_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030442668","9783030442675"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-44267-5_25","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"3 April 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IHIET","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human Interaction and Emerging Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lausanne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 April 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 April 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ihiet2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ihiet-ai.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}