{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:39:56Z","timestamp":1765546796205,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030843366"},{"type":"electronic","value":"9783030843373"}],"license":[{"start":{"date-parts":[[2021,8,8]],"date-time":"2021-08-08T00:00:00Z","timestamp":1628380800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,8]],"date-time":"2021-08-08T00:00:00Z","timestamp":1628380800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-84337-3_11","type":"book-chapter","created":{"date-parts":[[2021,8,7]],"date-time":"2021-08-07T17:04:06Z","timestamp":1628355846000},"page":"134-146","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Reinforcement Learning: A Friendly Introduction"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2861-8545","authenticated-orcid":false,"given":"Dema","family":"Daoun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1694-4108","authenticated-orcid":false,"given":"Fabiha","family":"Ibnat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4073-0476","authenticated-orcid":false,"given":"Zulfikar","family":"Alom","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5990-9305","authenticated-orcid":false,"given":"Zeyar","family":"Aung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5529-9482","authenticated-orcid":false,"given":"Mohammad Abdul","family":"Azim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,8]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Coates, A., Quigley, M., Ng, A.Y.: An application of reinforcement learning to aerobatic helicopter flight. In: Advances in Neural Information Processing Systems, pp. 1\u20138 (2007)","DOI":"10.7551\/mitpress\/7503.003.0006"},{"key":"11_CR2","unstructured":"Achiam, J.: Introduction to RL (2018). BOpen AI. https:\/\/spinningup.openai.com\/en\/latest\/spinningup\/rl_intro.html"},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Arabnejad, H., Pahl, C., Jamshidi, P., Estrada, G.: A comparison of reinforcement learning techniques for fuzzy cloud auto-scaling. In: Proceedings of the 2017 17th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing, pp. 64\u201373 (2017)","DOI":"10.1109\/CCGRID.2017.15"},{"key":"11_CR4","unstructured":"Arjona-Medina, J.A., Gillhofer, M., Widrich, M., Unterthiner, T., Brandstetter, J., Hochreiter, S.: RUDDER: return decomposition for delayed rewards. arXiv preprint arXiv:1806.07857 (2018)"},{"issue":"8","key":"11_CR5","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1073\/pnas.38.8.716","volume":"38","author":"R Bellman","year":"1952","unstructured":"Bellman, R.: On the theory of dynamic programming. Proc. Natl. Acad. Sci. U.S.A. 38(8), 716 (1952)","journal-title":"Proc. Natl. Acad. Sci. U.S.A."},{"key":"11_CR6","volume-title":"Applied Dynamic Programming","author":"RE Bellman","year":"2015","unstructured":"Bellman, R.E., Dreyfus, S.E.: Applied Dynamic Programming. Princeton University Press, Princeton (2015)"},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Bu, X., Rao, J., Xu, C.Z.: A reinforcement learning approach to online web systems auto-configuration. In: Proceedings of the 2009 29th IEEE International Conference on Distributed Computing Systems, pp. 2\u201311 (2009)","DOI":"10.1109\/ICDCS.2009.76"},{"issue":"1","key":"11_CR8","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1007379606734","volume":"28","author":"R Caruana","year":"1997","unstructured":"Caruana, R.: Multitask learning. Mach. Learn. 28(1), 41\u201375 (1997)","journal-title":"Mach. Learn."},{"key":"11_CR9","unstructured":"Chan, S.C., Fishman, S., Canny, J., Korattikara, A., Guadarrama, S.: Measuring the reliability of reinforcement learning algorithms. arXiv preprint arXiv:1912.05663 (2019)"},{"key":"11_CR10","unstructured":"De Luca, G.: What is a policy in reinforcement learning? (2020). Baeldung. https:\/\/www.baeldung.com\/cs\/ml-policy-reinforcement-learning"},{"issue":"3","key":"11_CR11","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1016\/0377-0427(93)90024-6","volume":"48","author":"IT Dimov","year":"1993","unstructured":"Dimov, I.T., Tonev, O.I.: Monte Carlo algorithms: performance analysis for some computer architectures. J. Comput. Appl. Math. 48(3), 253\u2013277 (1993)","journal-title":"J. Comput. Appl. Math."},{"key":"11_CR12","unstructured":"Dulac-Arnold, G., Mankowitz, D., Hester, T.: Challenges of real-world reinforcement learning. arXiv preprint arXiv:1904.12901 (2019)"},{"key":"11_CR13","unstructured":"Fazly, R.: Data science book (2020). GitHub. https:\/\/github.com\/FazlyRabbiBD\/Data-Science-Book\/blob\/master\/8-ReinforcementLearning.ipynb"},{"key":"11_CR14","unstructured":"Guru99: Reinforcement learning: what is, algorithms, applications, example (2020). Guru99. https:\/\/www.guru99.com\/reinforcement-learning-tutorial.html"},{"issue":"3","key":"11_CR15","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s10994-012-5322-7","volume":"90","author":"T Hester","year":"2013","unstructured":"Hester, T., Stone, P.: TEXPLORE: real-time sample-efficient reinforcement learning for robots. Mach. Learn. 90(3), 385\u2013429 (2013)","journal-title":"Mach. Learn."},{"key":"11_CR16","unstructured":"Hui, J.: RL \u2013 value learning (2018). Medium. https:\/\/jonathan-hui.medium.com\/rl-value-learning-24f52b49c36d"},{"key":"11_CR17","unstructured":"Jiang, J., Dun, C., Huang, T., Lu, Z.: Graph convolutional reinforcement learning. arXiv preprint arXiv:1810.09202 (2018)"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Jin, J., Song, C., Li, H., Gai, K., Wang, J., Zhang, W.: Real-time bidding with multi-agent reinforcement learning in display advertising. In: Proceedings of the 27th ACM International Conference on Information and Knowledge Management, pp. 2193\u20132201 (2018)","DOI":"10.1145\/3269206.3272021"},{"key":"11_CR19","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"issue":"4","key":"11_CR20","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"VR Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: On actor-critic algorithms. SIAM J. Control Optim. 42(4), 1143\u20131166 (2003)","journal-title":"SIAM J. Control Optim."},{"key":"11_CR21","unstructured":"Lee, K., Lee, K., Shin, J., Lee, H.: Network randomization: a simple technique for generalization in deep reinforcement learning. arXiv preprint arXiv:1910.05396 (2019)"},{"issue":"2","key":"11_CR22","first-page":"814","volume":"3","author":"S Manju","year":"2011","unstructured":"Manju, S., Punithavalli, M.: An analysis of Q-learning algorithms with strategies of reward function. Int. J. Comput. Sci. Eng. 3(2), 814\u2013820 (2011)","journal-title":"Int. J. Comput. Sci. Eng."},{"key":"11_CR23","unstructured":"Mann, T.A., et al.: Learning from delayed outcomes via proxies with applications to recommender systems. In: International Conference on Machine Learning, pp. 4324\u20134332. PMLR (2019)"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Mao, H., Alizadeh, M., Menache, I., Kandula, S.: Resource management with deep reinforcement learning. In: Proceedings of the 15th ACM Workshop on Hot Topics in Networks, pp. 50\u201356 (2016)","DOI":"10.1145\/3005745.3005750"},{"key":"11_CR25","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"11_CR26","doi-asserted-by":"crossref","unstructured":"Moazami, S., Doerschuk, P.: Modeling survival in model-based reinforcement learning. arXiv preprint arXiv:2004.08648 (2020)","DOI":"10.1109\/TransAI49837.2020.00009"},{"key":"11_CR27","unstructured":"Mondal, A.K., Jamali, N.: A survey of reinforcement learning techniques: strategies, recent development, and future directions. arXiv preprint arXiv:2001.06921 (2020)"},{"key":"11_CR28","unstructured":"Osband, I., et al.: Behaviour suite for reinforcement learning. arXiv preprint arXiv:1908.03568 (2019)"},{"key":"11_CR29","unstructured":"Van der Pol, E., Oliehoek, F.A.: Coordinated deep reinforcement learners for traffic light control. In: Proceedings of the NIPS 2016 Workshop on Learning, Inference and Control of Multi-Agent Systems, pp. 1\u20138 (2016)"},{"key":"11_CR30","unstructured":"Rummery, G.A., Niranjan, M.: On-line Q-learning using connectionist systems. Technical report, Cambridge University Engineering Department, UK (1994)"},{"issue":"3","key":"11_CR31","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1147\/rd.33.0210","volume":"3","author":"AL Samuel","year":"1959","unstructured":"Samuel, A.L.: Some studies in machine learning using the game of checkers. IBM J. Res. Dev. 3(3), 210\u2013229 (1959)","journal-title":"IBM J. Res. Dev."},{"key":"11_CR32","unstructured":"SAS: Machine learning: what it is and why it matters (2020), SAS. https:\/\/www.sas.com\/en_us\/insights\/analytics\/machine-learning.html"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Sharma, A.R., Kaushik, P.: Literature survey of statistical, deep and reinforcement learning in natural language processing. In: Proceedings of the 2017 IEEE International Conference on Computing, Communication and Automation, pp. 350\u2013354 (2017)","DOI":"10.1109\/CCAA.2017.8229841"},{"issue":"7587","key":"11_CR34","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"11_CR35","unstructured":"Silver, D., et al.: Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv preprint arXiv:1712.01815 (2017)"},{"key":"11_CR36","unstructured":"Singh, A.: Reinforcement learning: Bellman equation and optimality (Part 2). Towards Data Sci. (2019). https:\/\/towardsdatascience.com\/reinforcement-learning-markov-decision-process-part-2-96837c936ec3"},{"key":"11_CR37","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"11_CR38","unstructured":"Taylor, G., Burmeister, R., Xu, Z., Singh, B., Patel, A., Goldstein, T.: Training neural networks without gradients: a scalable ADMM approach. In: Proceedings of the 33rd International Conference on Machine Learning, pp. 2722\u20132731 (2016)"},{"issue":"2","key":"11_CR39","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1162\/neco.1994.6.2.215","volume":"6","author":"G Tesauro","year":"1994","unstructured":"Tesauro, G.: TD-Gammon, a self-teaching backgammon program, achieves master-level play. Neural Comput. 6(2), 215\u2013219 (1994)","journal-title":"Neural Comput."},{"key":"11_CR40","doi-asserted-by":"publisher","first-page":"1448","DOI":"10.4249\/scholarpedia.1448","volume":"3","author":"F Woergoetter","year":"2008","unstructured":"Woergoetter, F., Porr, B.: Reinforcement learning. ScholarPedia 3, 1448 (2008)","journal-title":"ScholarPedia"},{"key":"11_CR41","unstructured":"Zhang, J.: Reinforcement learning - model based planning methods. Towards Data Science (2020). https:\/\/towardsdatascience.com\/reinforcement-learning-model-based-planning-methods-5e99cae0abb8"},{"key":"11_CR42","doi-asserted-by":"crossref","unstructured":"Zheng, G., et al.: DRN: a deep reinforcement learning framework for news recommendation. In: Proceedings of the 2018 World Wide Web Conference, pp. 167\u2013176 (2018)","DOI":"10.1145\/3178876.3185994"},{"issue":"12","key":"11_CR43","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1021\/acscentsci.7b00492","volume":"3","author":"Z Zhou","year":"2017","unstructured":"Zhou, Z., Li, X., Zare, R.N.: Optimizing chemical reactions with deep reinforcement learning. ACS Cent. Sci. 3(12), 1337\u20131344 (2017)","journal-title":"ACS Cent. Sci."},{"key":"11_CR44","unstructured":"Zhu, H.: The ingredients of real-world robotic reinforcement learning. arXiv preprint arXiv:2004.12570 (2020)"}],"container-title":["Lecture Notes in Networks and Systems","The International Conference on Deep Learning, Big Data and Blockchain (Deep-BDB 2021)"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-84337-3_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,7]],"date-time":"2023-11-07T01:55:21Z","timestamp":1699322121000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-84337-3_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,8]]},"ISBN":["9783030843366","9783030843373"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-84337-3_11","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"type":"print","value":"2367-3370"},{"type":"electronic","value":"2367-3389"}],"subject":[],"published":{"date-parts":[[2021,8,8]]},"assertion":[{"value":"8 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Deep-BDB","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The International Conference on Deep Learning, Big Data and Blockchain","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"deepbdb2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}